/*
 * Copyright (c) 2013 Hugh Bailey <obs.jim@gmail.com>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#pragma once

#include "lexer.h"

#ifdef __cplusplus
extern "C" {
#endif

EXPORT char *cf_literal_to_str(const char *literal, size_t count);

/* ------------------------------------------------------------------------- */
/*
 * A C-family lexer token is defined as:
 *   1.) A generic 'name' token.  (abc123_def456)
 *   2.) A numeric sequence (usually starting with a number)
 *   3.) A sequence of generic whitespace defined as spaces and tabs
 *   4.) A newline
 *   5.) A string or character sequence (surrounded by single or double quotes)
 *   6.) A single character of a type not specified above
 */
enum cf_token_type {
	CFTOKEN_NONE,
	CFTOKEN_NAME,
	CFTOKEN_NUM,
	CFTOKEN_SPACETAB,
	CFTOKEN_NEWLINE,
	CFTOKEN_STRING,
	CFTOKEN_OTHER
};

struct cf_token {
	const struct cf_lexer *lex;
	struct strref str;
	struct strref unmerged_str;
	enum cf_token_type type;
};

static inline void cf_token_clear(struct cf_token *t)
{
	memset(t, 0, sizeof(struct cf_token));
}

static inline void cf_token_copy(struct cf_token *dst,
		const struct cf_token *src)
{
	memcpy(dst, src, sizeof(struct cf_token));
}

static inline void cf_token_add(struct cf_token *dst,
		const struct cf_token *add)
{
	strref_add(&dst->str, &add->str);
	strref_add(&dst->unmerged_str, &add->unmerged_str);
}

/* ------------------------------------------------------------------------- */
/*
 *   The c-family lexer is a base lexer for generating a list of string
 * reference tokens to be used with c-style languages.
 *
 *   This base lexer is meant to be used as a stepping stone for an actual
 * language lexer/parser.
 *
 *   It reformats the text in the two following ways:
 *     1.) Spliced lines (escaped newlines) are merged
 *     2.) All comments are converted to a single space
 */

struct cf_lexer {
	char *file;
	struct lexer base_lexer;
	char *reformatted, *write_offset;
	DARRAY(struct cf_token) tokens;
	bool unexpected_eof; /* unexpected multi-line comment eof */
};

EXPORT void cf_lexer_init(struct cf_lexer *lex);
EXPORT void cf_lexer_free(struct cf_lexer *lex);

static inline struct cf_token *cf_lexer_get_tokens(struct cf_lexer *lex)
{
	return lex->tokens.array;
}

EXPORT bool cf_lexer_lex(struct cf_lexer *lex, const char *str,
		const char *file);

/* ------------------------------------------------------------------------- */
/* c-family preprocessor definition */

struct cf_def {
	struct cf_token name;
	DARRAY(struct cf_token) params;
	DARRAY(struct cf_token) tokens;
	bool macro;
};

static inline void cf_def_init(struct cf_def *cfd)
{
	cf_token_clear(&cfd->name);
	da_init(cfd->params);
	da_init(cfd->tokens);
	cfd->macro = false;
}

static inline void cf_def_addparam(struct cf_def *cfd, struct cf_token *param)
{
	da_push_back(cfd->params, param);
}

static inline void cf_def_addtoken(struct cf_def *cfd, struct cf_token *token)
{
	da_push_back(cfd->tokens, token);
}

static inline struct cf_token *cf_def_getparam(const struct cf_def *cfd,
		size_t idx)
{
	return cfd->params.array+idx;
}

static inline void cf_def_free(struct cf_def *cfd)
{
	cf_token_clear(&cfd->name);
	da_free(cfd->params);
	da_free(cfd->tokens);
}

/* ------------------------------------------------------------------------- */
/*
 * C-family preprocessor
 *
 *   This preprocessor allows for standard c-style preprocessor directives
 * to be applied to source text, such as:
 *
 *   + #include
 *   + #define/#undef
 *   + #ifdef/#ifndef/#if/#elif/#else/#endif
 *
 *   Still left to implement (TODO):
 *   + #if/#elif
 *   + "defined" preprocessor keyword
 *   + system includes 
 *   + variadic macros
 *   + custom callbacks (for things like pragma)
 *   + option to exclude features such as #import, variadic macros, and other
 *     features for certain language implementations
 *   + macro parameter string operator #
 *   + macro parameter token concactenation operator ##
 *   + predefined macros
 *   + restricted macros
 */

struct cf_preprocessor {
	struct cf_lexer *lex;
	struct error_data *ed;
	DARRAY(struct cf_def)   defines;
	DARRAY(char*)           sys_include_dirs;
	DARRAY(struct cf_lexer) dependencies;
	DARRAY(struct cf_token) tokens;
	bool ignore_state;
};

EXPORT void cf_preprocessor_init(struct cf_preprocessor *pp);
EXPORT void cf_preprocessor_free(struct cf_preprocessor *pp);

EXPORT bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
		struct error_data *ed);

static inline void cf_preprocessor_add_sys_include_dir(
		struct cf_preprocessor *pp, const char *include_dir)
{
	if (include_dir)
		da_push_back(pp->sys_include_dirs, bstrdup(include_dir));
}

EXPORT void cf_preprocessor_add_def(struct cf_preprocessor *pp,
		struct cf_def *def);
EXPORT void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
		const char *def_name);

static inline struct cf_token *cf_preprocessor_get_tokens(
		struct cf_preprocessor *pp)
{
	return pp->tokens.array;
}

#ifdef __cplusplus
}
#endif