206 lines
5.6 KiB
C
206 lines
5.6 KiB
C
/*
|
|
* Copyright (c) 2013 Hugh Bailey <obs.jim@gmail.com>
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "lexer.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
EXPORT char *cf_literal_to_str(const char *literal, size_t count);
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
/*
|
|
* A C-family lexer token is defined as:
|
|
* 1.) A generic 'name' token. (abc123_def456)
|
|
* 2.) A numeric sequence (usually starting with a number)
|
|
* 3.) A sequence of generic whitespace defined as spaces and tabs
|
|
* 4.) A newline
|
|
* 5.) A string or character sequence (surrounded by single or double quotes)
|
|
* 6.) A single character of a type not specified above
|
|
*/
|
|
enum cf_token_type {
|
|
CFTOKEN_NONE,
|
|
CFTOKEN_NAME,
|
|
CFTOKEN_NUM,
|
|
CFTOKEN_SPACETAB,
|
|
CFTOKEN_NEWLINE,
|
|
CFTOKEN_STRING,
|
|
CFTOKEN_OTHER
|
|
};
|
|
|
|
struct cf_token {
|
|
const struct cf_lexer *lex;
|
|
struct strref str;
|
|
struct strref unmerged_str;
|
|
enum cf_token_type type;
|
|
};
|
|
|
|
static inline void cf_token_clear(struct cf_token *t)
|
|
{
|
|
memset(t, 0, sizeof(struct cf_token));
|
|
}
|
|
|
|
static inline void cf_token_copy(struct cf_token *dst,
|
|
const struct cf_token *src)
|
|
{
|
|
memcpy(dst, src, sizeof(struct cf_token));
|
|
}
|
|
|
|
static inline void cf_token_add(struct cf_token *dst,
|
|
const struct cf_token *add)
|
|
{
|
|
strref_add(&dst->str, &add->str);
|
|
strref_add(&dst->unmerged_str, &add->unmerged_str);
|
|
}
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
/*
|
|
* The c-family lexer is a base lexer for generating a list of string
|
|
* reference tokens to be used with c-style languages.
|
|
*
|
|
* This base lexer is meant to be used as a stepping stone for an actual
|
|
* language lexer/parser.
|
|
*
|
|
* It reformats the text in the two following ways:
|
|
* 1.) Spliced lines (escaped newlines) are merged
|
|
* 2.) All comments are converted to a single space
|
|
*/
|
|
|
|
struct cf_lexer {
|
|
char *file;
|
|
struct lexer base_lexer;
|
|
char *reformatted, *write_offset;
|
|
DARRAY(struct cf_token) tokens;
|
|
bool unexpected_eof; /* unexpected multi-line comment eof */
|
|
};
|
|
|
|
EXPORT void cf_lexer_init(struct cf_lexer *lex);
|
|
EXPORT void cf_lexer_free(struct cf_lexer *lex);
|
|
|
|
static inline struct cf_token *cf_lexer_get_tokens(struct cf_lexer *lex)
|
|
{
|
|
return lex->tokens.array;
|
|
}
|
|
|
|
EXPORT bool cf_lexer_lex(struct cf_lexer *lex, const char *str,
|
|
const char *file);
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
/* c-family preprocessor definition */
|
|
|
|
struct cf_def {
|
|
struct cf_token name;
|
|
DARRAY(struct cf_token) params;
|
|
DARRAY(struct cf_token) tokens;
|
|
bool macro;
|
|
};
|
|
|
|
static inline void cf_def_init(struct cf_def *cfd)
|
|
{
|
|
cf_token_clear(&cfd->name);
|
|
da_init(cfd->params);
|
|
da_init(cfd->tokens);
|
|
cfd->macro = false;
|
|
}
|
|
|
|
static inline void cf_def_addparam(struct cf_def *cfd, struct cf_token *param)
|
|
{
|
|
da_push_back(cfd->params, param);
|
|
}
|
|
|
|
static inline void cf_def_addtoken(struct cf_def *cfd, struct cf_token *token)
|
|
{
|
|
da_push_back(cfd->tokens, token);
|
|
}
|
|
|
|
static inline struct cf_token *cf_def_getparam(const struct cf_def *cfd,
|
|
size_t idx)
|
|
{
|
|
return cfd->params.array+idx;
|
|
}
|
|
|
|
static inline void cf_def_free(struct cf_def *cfd)
|
|
{
|
|
cf_token_clear(&cfd->name);
|
|
da_free(cfd->params);
|
|
da_free(cfd->tokens);
|
|
}
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
/*
|
|
* C-family preprocessor
|
|
*
|
|
* This preprocessor allows for standard c-style preprocessor directives
|
|
* to be applied to source text, such as:
|
|
*
|
|
* + #include
|
|
* + #define/#undef
|
|
* + #ifdef/#ifndef/#if/#elif/#else/#endif
|
|
*
|
|
* Still left to implement (TODO):
|
|
* + #if/#elif
|
|
* + "defined" preprocessor keyword
|
|
* + system includes
|
|
* + variadic macros
|
|
* + custom callbacks (for things like pragma)
|
|
* + option to exclude features such as #import, variadic macros, and other
|
|
* features for certain language implementations
|
|
* + macro parameter string operator #
|
|
* + macro parameter token concactenation operator ##
|
|
* + predefined macros
|
|
* + restricted macros
|
|
*/
|
|
|
|
struct cf_preprocessor {
|
|
struct cf_lexer *lex;
|
|
struct error_data *ed;
|
|
DARRAY(struct cf_def) defines;
|
|
DARRAY(char*) sys_include_dirs;
|
|
DARRAY(struct cf_lexer) dependencies;
|
|
DARRAY(struct cf_token) tokens;
|
|
bool ignore_state;
|
|
};
|
|
|
|
EXPORT void cf_preprocessor_init(struct cf_preprocessor *pp);
|
|
EXPORT void cf_preprocessor_free(struct cf_preprocessor *pp);
|
|
|
|
EXPORT bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
|
|
struct error_data *ed);
|
|
|
|
static inline void cf_preprocessor_add_sys_include_dir(
|
|
struct cf_preprocessor *pp, const char *include_dir)
|
|
{
|
|
if (include_dir)
|
|
da_push_back(pp->sys_include_dirs, bstrdup(include_dir));
|
|
}
|
|
|
|
EXPORT void cf_preprocessor_add_def(struct cf_preprocessor *pp,
|
|
struct cf_def *def);
|
|
EXPORT void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
|
|
const char *def_name);
|
|
|
|
static inline struct cf_token *cf_preprocessor_get_tokens(
|
|
struct cf_preprocessor *pp)
|
|
{
|
|
return pp->tokens.array;
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|