nut/common/parseconf.c

650 lines
14 KiB
C
Raw Normal View History

2010-03-25 23:20:59 +00:00
/* parseconf.c - state machine-driven dynamic configuration file parser
Copyright (C) 2002 Russell Kroll <rkroll@exploits.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* parseconf, version 4.
*
* This one abandons the "callback" system introduced last time. It
* didn't turn out as well as I had hoped - you got stuck "behind"
* parseconf too often.
*
* There is now a context buffer, and you call pconf_init to set it up.
* All subsequent calls must have it as the first argument. There are
* two entry points for parsing lines. You can have it read a file
2022-06-29 10:37:36 +00:00
* (pconf_file_begin and pconf_file_next), take lines directly from
2010-03-25 23:20:59 +00:00
* the caller (pconf_line), or go along a character at a time (pconf_char).
* The parsing is identical no matter how you feed it.
*
* Since there are no more callbacks, you take the successful return
* from the function and access ctx->arglist and ctx->numargs yourself.
* You must check for errors with pconf_parse_error before using them,
* since it might not be complete. This lets the caller handle all
* error reporting that's nonfatal.
*
* Fatal errors are those that involve memory allocation. If the user
* defines an error handler when calling pconf_init, that function will
2022-06-29 10:37:36 +00:00
* be called with the error message before parseconf exits. By default
2010-03-25 23:20:59 +00:00
* it will just write the message to stderr before exiting.
*
* Input vs. Output:
*
* What it reads --> What ends up in each argument
*
* this is a line --> "this" "is" "a" "line"
* this "is also" a line --> "this" "is also" "a" "line"
* embedded\ space --> "embedded space"
* embedded\\backslash --> "embedded\backslash"
*
* Arguments are split by whitespace (isspace()) unless that whitespace
* occurs inside a "quoted pair like this".
*
* You can also escape the double quote (") character. The backslash
* also allows you to join lines, allowing you to have logical lines
* that span physical lines, just like you can do in some shells.
*
2022-06-29 10:37:36 +00:00
* Lines normally end with a newline, but reaching EOF will also force
2010-03-25 23:20:59 +00:00
* parsing on what's been scanned so far.
2022-06-29 10:37:36 +00:00
*
2010-03-25 23:20:59 +00:00
* Design:
*
2022-06-29 10:37:36 +00:00
* Characters are read one at a time to drive the state machine.
2010-03-25 23:20:59 +00:00
* As words are completed (by hitting whitespace or ending a "" item),
* they are committed to the next buffer in the arglist. realloc is
* used, so the buffer can grow to handle bigger words.
*
* The arglist also grows as necessary with a similar approach. As a
* result, you can parse extremely long words and lines with an insane
* number of elements.
*
* Finally, there is argsize, which remembers how long each of the
* arglist elements are. This is how we know when to expand them.
*
*/
2022-06-29 10:37:36 +00:00
#include "common.h"
2010-03-25 23:20:59 +00:00
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
2022-06-29 10:37:36 +00:00
#include <string.h>
2010-03-25 23:20:59 +00:00
#include <unistd.h>
2022-06-29 10:37:36 +00:00
#include <fcntl.h>
2010-03-25 23:20:59 +00:00
#include "parseconf.h"
2022-06-29 10:37:36 +00:00
#include "attribute.h"
#include "nut_stdint.h"
2010-03-25 23:20:59 +00:00
/* possible states */
#define STATE_FINDWORDSTART 1
#define STATE_FINDEOL 2
#define STATE_QUOTECOLLECT 3
#define STATE_QC_LITERAL 4
#define STATE_COLLECT 5
#define STATE_COLLECTLITERAL 6
#define STATE_ENDOFLINE 7
#define STATE_PARSEERR 8
2022-06-29 10:37:36 +00:00
static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt)
__attribute__((noreturn));
2010-03-25 23:20:59 +00:00
static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt)
{
if (ctx->errhandler)
ctx->errhandler(errtxt);
else
fprintf(stderr, "parseconf: fatal error: %s\n", errtxt);
exit(EXIT_FAILURE);
}
static void add_arg_word(PCONF_CTX_t *ctx)
{
2022-06-29 10:37:36 +00:00
size_t argpos;
2010-03-25 23:20:59 +00:00
size_t wbuflen;
/* this is where the new value goes */
argpos = ctx->numargs;
ctx->numargs++;
/* when facing more args than ever before, expand the list */
if (ctx->numargs > ctx->maxargs) {
ctx->maxargs = ctx->numargs;
/* resize the lists */
2022-06-29 10:37:36 +00:00
ctx->arglist = realloc(ctx->arglist,
2010-03-25 23:20:59 +00:00
sizeof(char *) * ctx->numargs);
if (!ctx->arglist)
pconf_fatal(ctx, "realloc arglist failed");
2022-06-29 10:37:36 +00:00
ctx->argsize = realloc(ctx->argsize,
2011-06-01 20:31:49 +00:00
sizeof(size_t) * ctx->numargs);
2010-03-25 23:20:59 +00:00
if (!ctx->argsize)
pconf_fatal(ctx, "realloc argsize failed");
/* ensure sane starting values */
ctx->arglist[argpos] = NULL;
ctx->argsize[argpos] = 0;
}
wbuflen = strlen(ctx->wordbuf);
/* now see if the string itself grew compared to last time */
if (wbuflen >= ctx->argsize[argpos]) {
size_t newlen;
/* allow for the trailing NULL */
newlen = wbuflen + 1;
/* expand the string storage */
ctx->arglist[argpos] = realloc(ctx->arglist[argpos], newlen);
if (!ctx->arglist[argpos])
pconf_fatal(ctx, "realloc arglist member failed");
/* remember the new size */
ctx->argsize[argpos] = newlen;
}
/* strncpy doesn't give us a trailing NULL, so prep the space */
memset(ctx->arglist[argpos], '\0', ctx->argsize[argpos]);
/* finally copy the new value into the provided space */
strncpy(ctx->arglist[argpos], ctx->wordbuf, wbuflen);
}
static void addchar(PCONF_CTX_t *ctx)
{
size_t wbuflen;
wbuflen = strlen(ctx->wordbuf);
2022-06-29 10:37:36 +00:00
/* CVE-2012-2944: only allow the subset of ASCII charset from Space to ~ */
2012-06-01 13:55:19 +00:00
if ((ctx->ch < 0x20) || (ctx->ch > 0x7f)) {
fprintf(stderr, "addchar: discarding invalid character (0x%02x)!\n",
ctx->ch);
return;
}
2010-03-25 23:20:59 +00:00
if (ctx->wordlen_limit != 0) {
if (wbuflen >= ctx->wordlen_limit) {
/* limit reached: don't append any more */
return;
}
}
/* allow for the null */
if (wbuflen >= (ctx->wordbufsize - 1)) {
ctx->wordbufsize += 8;
ctx->wordbuf = realloc(ctx->wordbuf, ctx->wordbufsize);
if (!ctx->wordbuf)
pconf_fatal(ctx, "realloc wordbuf failed");
/* repoint as wordbuf may have moved */
ctx->wordptr = &ctx->wordbuf[wbuflen];
}
2022-06-29 10:37:36 +00:00
*ctx->wordptr++ = (char)ctx->ch;
2010-03-25 23:20:59 +00:00
*ctx->wordptr = '\0';
}
static void endofword(PCONF_CTX_t *ctx)
{
if (ctx->arg_limit != 0) {
if (ctx->numargs >= ctx->arg_limit) {
/* don't accept this word - just drop it */
ctx->wordptr = ctx->wordbuf;
*ctx->wordptr = '\0';
return;
}
}
add_arg_word(ctx);
ctx->wordptr = ctx->wordbuf;
*ctx->wordptr = '\0';
}
/* look for the beginning of a word */
static int findwordstart(PCONF_CTX_t *ctx)
{
/* newline = the physical line is over, so the logical one is too */
if (ctx->ch == 10)
return STATE_ENDOFLINE;
/* the rest of the line is a comment */
if (ctx->ch == '#')
return STATE_FINDEOL;
/* space = not in a word yet, so loop back */
2022-06-29 10:37:36 +00:00
if (isspace((size_t)ctx->ch))
return STATE_FINDWORDSTART;
2010-03-25 23:20:59 +00:00
/* \ = literal = accept the next char blindly */
if (ctx->ch == '\\')
return STATE_COLLECTLITERAL;
/* " = begin word bounded by quotes */
if (ctx->ch == '"')
return STATE_QUOTECOLLECT;
/* at this point the word just started */
addchar(ctx);
2012-06-01 13:55:19 +00:00
/* if the first character is a '=' this is considered a whole word */
if (ctx->ch == '=') {
endofword(ctx);
return STATE_FINDWORDSTART;
}
2010-03-25 23:20:59 +00:00
return STATE_COLLECT;
2022-06-29 10:37:36 +00:00
}
2010-03-25 23:20:59 +00:00
/* eat characters until the end of the line is found */
static int findeol(PCONF_CTX_t *ctx)
{
/* newline = found it, so start a new line */
if (ctx->ch == 10)
return STATE_ENDOFLINE;
/* come back here */
return STATE_FINDEOL;
}
/* set up the error reporting details */
static void pconf_seterr(PCONF_CTX_t *ctx, const char *errmsg)
{
snprintf(ctx->errmsg, PCONF_ERR_LEN, "%s", errmsg);
ctx->error = 1;
2022-06-29 10:37:36 +00:00
}
2010-03-25 23:20:59 +00:00
/* quote characters inside a word bounded by "quotes" */
static int quotecollect(PCONF_CTX_t *ctx)
{
/* user is trying to break us */
if (ctx->ch == '#') {
pconf_seterr(ctx, "Unbalanced word due to unescaped # in quotes");
endofword(ctx);
/* this makes us drop all the way out of the caller */
return STATE_PARSEERR;
}
/* another " means we're done with this word */
if (ctx->ch == '"') {
endofword(ctx);
2022-06-29 10:37:36 +00:00
2010-03-25 23:20:59 +00:00
return STATE_FINDWORDSTART;
}
/* literal - special case since it needs to return here */
if (ctx->ch == '\\')
return STATE_QC_LITERAL;
/* otherwise save it and loop back */
addchar(ctx);
return STATE_QUOTECOLLECT;
}
/* take almost anything literally, but return to quotecollect */
static int qc_literal(PCONF_CTX_t *ctx)
{
/* continue onto the next line of the file */
if (ctx->ch == 10)
return STATE_QUOTECOLLECT;
addchar(ctx);
return STATE_QUOTECOLLECT;
}
/* collect characters inside a word */
static int collect(PCONF_CTX_t *ctx)
{
/* comment means the word is done, and skip to the end of the line */
if (ctx->ch == '#') {
endofword(ctx);
return STATE_FINDEOL;
}
/* newline means the word is done, and the line is done */
if (ctx->ch == 10) {
endofword(ctx);
return STATE_ENDOFLINE;
}
/* space means the word is done */
2022-06-29 10:37:36 +00:00
if (isspace((size_t)ctx->ch)) {
2010-03-25 23:20:59 +00:00
endofword(ctx);
return STATE_FINDWORDSTART;
}
2012-06-01 13:55:19 +00:00
/* '=' means the word is done and the = is a single char word*/
if (ctx->ch == '=') {
endofword(ctx);
findwordstart(ctx);
return STATE_FINDWORDSTART;
}
2010-03-25 23:20:59 +00:00
/* \ = literal = accept the next char blindly */
if (ctx->ch == '\\')
return STATE_COLLECTLITERAL;
/* otherwise store it and come back for more */
addchar(ctx);
return STATE_COLLECT;
}
/* take almost anything literally */
static int collectliteral(PCONF_CTX_t *ctx)
{
/* continue to the next line */
if (ctx->ch == 10)
return STATE_COLLECT;
addchar(ctx);
return STATE_COLLECT;
}
/* clean up memory before going back to the user */
static void free_storage(PCONF_CTX_t *ctx)
{
unsigned int i;
free(ctx->wordbuf);
/* clear out the individual words first */
for (i = 0; i < ctx->maxargs; i++)
free(ctx->arglist[i]);
free(ctx->arglist);
free(ctx->argsize);
/* put things back to the initial state */
ctx->arglist = NULL;
ctx->argsize = NULL;
ctx->numargs = 0;
ctx->maxargs = 0;
}
int pconf_init(PCONF_CTX_t *ctx, void errhandler(const char *))
{
/* set up the ctx elements */
ctx->f = NULL;
ctx->state = STATE_FINDWORDSTART;
ctx->numargs = 0;
ctx->maxargs = 0;
ctx->arg_limit = PCONF_DEFAULT_ARG_LIMIT;
ctx->wordlen_limit = PCONF_DEFAULT_WORDLEN_LIMIT;
ctx->linenum = 0;
ctx->error = 0;
ctx->arglist = NULL;
ctx->argsize = NULL;
ctx->wordbufsize = 16;
ctx->wordbuf = calloc(1, ctx->wordbufsize);
if (!ctx->wordbuf)
pconf_fatal(ctx, "malloc wordbuf failed");
ctx->wordptr = ctx->wordbuf;
ctx->errhandler = errhandler;
ctx->magic = PCONF_CTX_t_MAGIC;
return 1;
}
static int check_magic(PCONF_CTX_t *ctx)
{
if (!ctx)
return 0;
if (ctx->magic != PCONF_CTX_t_MAGIC) {
snprintf(ctx->errmsg, PCONF_ERR_LEN, "Invalid ctx buffer");
return 0;
}
return 1;
}
int pconf_file_begin(PCONF_CTX_t *ctx, const char *fn)
{
if (!check_magic(ctx))
return 0;
ctx->f = fopen(fn, "r");
if (!ctx->f) {
snprintf(ctx->errmsg, PCONF_ERR_LEN, "Can't open %s: %s",
fn, strerror(errno));
return 0;
}
2022-06-29 10:37:36 +00:00
/* prevent fd leaking to child processes */
fcntl(fileno(ctx->f), F_SETFD, FD_CLOEXEC);
2010-03-25 23:20:59 +00:00
return 1; /* OK */
}
static void parse_char(PCONF_CTX_t *ctx)
{
switch(ctx->state) {
case STATE_FINDWORDSTART:
ctx->state = findwordstart(ctx);
break;
case STATE_FINDEOL:
ctx->state = findeol(ctx);
break;
case STATE_QUOTECOLLECT:
ctx->state = quotecollect(ctx);
break;
case STATE_QC_LITERAL:
ctx->state = qc_literal(ctx);
break;
case STATE_COLLECT:
ctx->state = collect(ctx);
break;
case STATE_COLLECTLITERAL:
ctx->state = collectliteral(ctx);
break;
} /* switch */
}
/* return 1 if an error occurred, but only do it once */
int pconf_parse_error(PCONF_CTX_t *ctx)
{
if (!check_magic(ctx))
return 0;
if (ctx->error == 1) {
ctx->error = 0;
return 1;
}
return 0;
}
/* clean up the ctx space */
void pconf_finish(PCONF_CTX_t *ctx)
{
if (!check_magic(ctx))
return;
if (ctx->f)
fclose(ctx->f);
free_storage(ctx);
ctx->magic = 0;
}
/* read from a file until a whole line is ready for use */
int pconf_file_next(PCONF_CTX_t *ctx)
{
if (!check_magic(ctx))
return 0;
ctx->linenum++;
/* start over for the new line */
ctx->numargs = 0;
ctx->state = STATE_FINDWORDSTART;
while ((ctx->ch = fgetc(ctx->f)) != EOF) {
parse_char(ctx);
if (ctx->state == STATE_PARSEERR)
return 1;
if (ctx->state == STATE_ENDOFLINE)
return 1;
}
/* deal with files that don't end in a newline */
if (ctx->numargs != 0) {
/* still building a word? */
if (ctx->wordptr != ctx->wordbuf)
endofword(ctx);
return 1;
}
/* finished with nothing left over */
return 0;
}
/* parse a provided line */
int pconf_line(PCONF_CTX_t *ctx, const char *line)
{
size_t i, linelen;
if (!check_magic(ctx))
return 0;
ctx->linenum++;
/* start over for the new line */
ctx->numargs = 0;
ctx->state = STATE_FINDWORDSTART;
linelen = strlen(line);
for (i = 0; i < linelen; i++) {
ctx->ch = line[i];
parse_char(ctx);
if (ctx->state == STATE_PARSEERR)
return 1;
if (ctx->state == STATE_ENDOFLINE)
return 1;
}
/* deal with any lingering characters */
/* still building a word? */
2022-06-29 10:37:36 +00:00
if (ctx->wordptr != ctx->wordbuf)
2010-03-25 23:20:59 +00:00
endofword(ctx); /* tie it off */
return 1;
}
#define PCONF_ESCAPE "#\\\""
char *pconf_encode(const char *src, char *dest, size_t destsize)
{
size_t i, srclen, destlen, maxlen;
if (destsize < 1)
return dest;
memset(dest, '\0', destsize);
/* always leave room for a final NULL */
maxlen = destsize - 1;
srclen = strlen(src);
destlen = 0;
for (i = 0; i < srclen; i++) {
if (strchr(PCONF_ESCAPE, src[i])) {
/* if they both won't fit, we're done */
if (destlen >= maxlen - 1)
return dest;
dest[destlen++] = '\\';
}
/* bail out when dest is full */
if (destlen >= maxlen)
return dest;
dest[destlen++] = src[i];
}
return dest;
}
/* parse input a character at a time */
int pconf_char(PCONF_CTX_t *ctx, char ch)
{
if (!check_magic(ctx))
return -1;
/* if the last call finished a line, clean stuff up for another */
if ((ctx->state == STATE_ENDOFLINE) || (ctx->state == STATE_PARSEERR)) {
ctx->numargs = 0;
ctx->state = STATE_FINDWORDSTART;
}
ctx->ch = ch;
parse_char(ctx);
if (ctx->state == STATE_ENDOFLINE)
return 1;
if (ctx->state == STATE_PARSEERR)
return -1;
return 0;
}