Rewrote tokenization code

This commit is contained in:
2022-07-25 04:08:43 -05:00
parent 6b2e112f3b
commit 36d2fefa3b
4 changed files with 125 additions and 138 deletions

View File

@@ -2,7 +2,7 @@ SRC = $(wildcard src/*.c)
OBJ = $(subst .c,.o,$(subst src,work,$(SRC))) OBJ = $(subst .c,.o,$(subst src,work,$(SRC)))
LIBS = gnutls LIBS = gnutls
LDFLAGS = -pie -lrt -ldl $(shell pkg-config --libs $(LIBS)) LDFLAGS = -pie -lrt -ldl $(shell pkg-config --libs $(LIBS))
CFLAGS := -O2 -pipe -Wall -Wpedantic -Wshadow -ansi -D_XOPEN_SOURCE=500 CFLAGS := -O2 -pipe -Wall -Wpedantic -Wshadow -ansi -D_XOPEN_SOURCE=500 -ggdb
CFLAGS += -Isrc/ -fpie -D_POSIX_C_SOURCE=200809L $(shell pkg-config --cflags $(LIBS)) CFLAGS += -Isrc/ -fpie -D_POSIX_C_SOURCE=200809L $(shell pkg-config --cflags $(LIBS))
INSTALLDIR := /usr/sbin INSTALLDIR := /usr/sbin
HEADERDIR := /usr/include/ HEADERDIR := /usr/include/

View File

@@ -9,6 +9,15 @@ sitefiles consist of commands, which are of the form
sitefiles also allow comments with # sitefiles also allow comments with #
Actions and arguments are made of tokens. A token is some text surrounded by
whitespace, where backslashes are always escaped literally.
For example:
`token` -> `"token"`
`token\ with\ escapes` -> 'token with escapes'
`token\\with\\backslashes` -> 'token\with\backslashes'
# Part 2: Commands # Part 2: Commands
* ```set [variable] [value]``` - sets some local variable for the following * ```set [variable] [value]``` - sets some local variable for the following

View File

@@ -128,6 +128,8 @@ int sendErrorResponse(Stream *stream, const char *error) {
int ret; int ret;
int len = snprintf(NULL, 0, template, error); int len = snprintf(NULL, 0, template, error);
char *response = malloc(len + 1); char *response = malloc(len + 1);
if (response == NULL)
return 1;
sprintf(response, template, error); sprintf(response, template, error);
ret = sendStringResponse(stream, error, response, ret = sendStringResponse(stream, error, response,
"Content-Type: text/html\r\n", NULL); "Content-Type: text/html\r\n", NULL);
@@ -185,6 +187,8 @@ int sendPipe(Stream *stream, const char *status, int fd, ...) {
size_t responseLen = 0; size_t responseLen = 0;
char *response = malloc(allocResponse); char *response = malloc(allocResponse);
va_list ap; va_list ap;
if (response == NULL)
goto error;
for (;;) { for (;;) {
ssize_t len; ssize_t len;
if (responseLen >= allocResponse) { if (responseLen >= allocResponse) {

View File

@@ -33,145 +33,109 @@
* good. * good.
* */ * */
typedef enum { typedef enum {
SUCCESS, ARG,
LINE_END, LINE_END,
FILE_END, FILE_END,
ERROR TOKEN_ERROR
} ReturnCode; } TokenType;
/* this isn't ideal, but it's necessary to avoid namespace collisions. */
static void freeTokens(int argc, char **argv) { typedef struct {
TokenType type;
char *data;
} Token;
typedef enum {
NORMAL,
PAST_END,
COMMAND_ERROR
} CommandType;
static void freecommand(int argc, char **argv) {
int i; int i;
for (i = 0; i < argc; i++) for (i = 0; i < argc; i++)
free(argv[i]); free(argv[i]);
free(argv); free(argv);
} }
static ReturnCode getToken(FILE *file, char **ret) { static void gettoken(FILE *file, Token *ret) {
typedef enum { int c;
QUOTED, char *data;
NONQUOTED
} TokenType;
TokenType type;
size_t allocatedLen = 50;
size_t len; size_t len;
size_t alloc;
for (;;) { for (;;) {
int c = fgetc(file); c = fgetc(file);
if (c == '\n') switch (c) {
return LINE_END; case '\n':
if (c == EOF) ret->type = LINE_END;
return FILE_END; return;
if (c == '#') { case EOF:
while (c != '\n') ret->type = FILE_END;
c = fgetc(file); return;
return LINE_END; case ' ': case '\t':
} continue;
if (!isspace(c)) {
if (c == '"')
type = QUOTED;
else {
type = NONQUOTED;
ungetc(c, file);
}
break;
} }
ret->type = ARG;
ungetc(c, file);
break;
} }
*ret = malloc(allocatedLen); alloc = 20;
data = xmalloc(alloc);
for (len = 0;; len++) { for (len = 0;; ++len) {
int c; if (len >= alloc) {
if (len >= allocatedLen) { alloc *= 2;
char *newret; data = xrealloc(data, alloc);
allocatedLen *= 2;
newret = realloc(*ret, allocatedLen);
if (newret == NULL)
goto error;
*ret = newret;
} }
c = fgetc(file); c = fgetc(file);
switch (type) { if (isspace(c) || c == EOF) {
case QUOTED: data[len] = '\0';
if (c == '"') ret->type = ARG;
goto gotToken; ret->data = data;
break; return;
case NONQUOTED:
if (isspace(c)) {
ungetc(c, file);
goto gotToken;
}
break;
} }
switch (c) { switch (c) {
case '\\': case '\\':
c = fgetc(file); c = fgetc(file);
if (c == EOF) if (c == EOF) {
goto error; ret->type = TOKEN_ERROR;
break; return;
case EOF: }
if (type == NONQUOTED) default:
goto gotToken; data[len] = c;
goto error;
} }
(*ret)[len] = c;
} }
gotToken:
(*ret)[len] = '\0';
return SUCCESS;
error:
free(*ret);
return ERROR;
} }
static ReturnCode getCommand(FILE *file, int *argcret, char ***argvret) { static CommandType getcommand(FILE *file, int *argcret, char ***argvret) {
/* THIS FUNCTION WILL NOT RETURN LINE_END */ int argc, argalloc;
int argc;
char **argv; char **argv;
int allocatedTokens; argalloc = 5;
if (feof(file)) argv = xmalloc(argalloc * sizeof *argv);
return FILE_END;
argc = 0;
allocatedTokens = 5;
argv = malloc(allocatedTokens * sizeof(*argv));
for (;;) {
ReturnCode code;
if (argc >= allocatedTokens) {
char **newargv;
allocatedTokens *= 2;
newargv = realloc(argv,
allocatedTokens * sizeof(char *));
if (newargv == NULL)
goto error;
argv = newargv;
}
code = getToken(file, argv + argc);
switch (code) { for (argc = 0;; ++argc) {
case ERROR: Token token;
goto error; if (argc >= argalloc) {
case LINE_END: argalloc *= 2;
if (argc == 0) argv = xrealloc(argv, argalloc * sizeof *argv);
continue; }
/* We allow empty lines */ gettoken(file, &token);
/* fallthrough */ switch (token.type) {
case FILE_END: case FILE_END:
if (argc == 0) { if (argc == 0)
free(argv); return PAST_END;
return FILE_END; case LINE_END:
} *argcret = argc;
*argcret = argc; *argvret = argv;
*argvret = argv; return NORMAL;
return SUCCESS; case ARG:
case SUCCESS: argv[argc] = token.data;
argc++; break;
break; case TOKEN_ERROR:
return COMMAND_ERROR;
} }
} }
error:
freeTokens(argc, argv);
return ERROR;
} }
static char *getport(char *data, unsigned short *ret) { static char *getport(char *data, unsigned short *ret) {
@@ -247,27 +211,30 @@ Sitefile *parseSitefile(char *path) {
contenttype = xstrdup("text/html"); contenttype = xstrdup("text/html");
for (;;) { for (;;) {
ReturnCode status = getCommand(file, &argc, &argv); int i;
switch (status) { CommandType commandtype;
int i; commandtype = getcommand(file, &argc, &argv);
case FILE_END: switch (commandtype) {
free(ports); case PAST_END:
for (i = 0; i < ret->portcount; ++i) { free(ports);
Port *port = ret->ports + i; for (i = 0; i < ret->portcount; ++i) {
if (port->type == TLS && Port *port = ret->ports + i;
(port->key == NULL || if (port->type == TLS &&
port->cert == NULL)) { (port->key == NULL ||
fprintf(stderr, port->cert == NULL)) {
"Port %hu declared as TLS without proper TLS files\n", port->num); fprintf(stderr,
goto nterror; "Port %hu declarS without proper TLS files\n", port->num);
} goto nterror;
} }
fclose(file); }
return ret; free(contenttype);
case ERROR: case LINE_END: free(host);
goto nterror; fclose(file);
case SUCCESS: return ret;
break; case COMMAND_ERROR:
goto nterror;
case NORMAL:
break;
} }
if (strcmp(argv[0], "set") == 0) { if (strcmp(argv[0], "set") == 0) {
if (argc < 3) if (argc < 3)
@@ -315,7 +282,6 @@ Sitefile *parseSitefile(char *path) {
} }
else if (strcmp(argv[0], "declare") == 0) { else if (strcmp(argv[0], "declare") == 0) {
Port newport; Port newport;
int i;
if (argc < 3) { if (argc < 3) {
fputs( fputs(
"Usage: declare [transport] [port]\n", stderr); "Usage: declare [transport] [port]\n", stderr);
@@ -354,7 +320,6 @@ Sitefile *parseSitefile(char *path) {
} }
#define PORT_ATTRIBUTE(name, func) \ #define PORT_ATTRIBUTE(name, func) \
else if (strcmp(argv[0], #name) == 0) { \ else if (strcmp(argv[0], #name) == 0) { \
int i; \
unsigned short port; \ unsigned short port; \
if (argc < 3) { \ if (argc < 3) { \
fputs("Usage: " #name " [" #name "] [port]\n", \ fputs("Usage: " #name " [" #name "] [port]\n", \
@@ -414,7 +379,7 @@ Sitefile *parseSitefile(char *path) {
fprintf(stderr, "Unknown sitefile command %s", argv[0]); fprintf(stderr, "Unknown sitefile command %s", argv[0]);
goto error; goto error;
} }
freeTokens(argc, argv); freecommand(argc, argv);
ret->content[ret->size].respondto = respondto; ret->content[ret->size].respondto = respondto;
if (host == NULL) if (host == NULL)
regcomp(&ret->content[ret->size].host, ".*", cflags); regcomp(&ret->content[ret->size].host, ".*", cflags);
@@ -431,7 +396,7 @@ Sitefile *parseSitefile(char *path) {
ret->size++; ret->size++;
} }
error: error:
freeTokens(argc, argv); freecommand(argc, argv);
nterror: nterror:
freeSitefile(ret); freeSitefile(ret);
return NULL; return NULL;
@@ -439,11 +404,20 @@ nterror:
void freeSitefile(Sitefile *site) { void freeSitefile(Sitefile *site) {
long i; long i;
for (i = 0; i < site->size; i++) { for (i = 0; i < site->size; ++i) {
regfree(&site->content[i].path); regfree(&site->content[i].path);
regfree(&site->content[i].host); regfree(&site->content[i].host);
/* This doesn't break because free(NULL) is harmless. */
free(site->content[i].arg); free(site->content[i].arg);
free(site->content[i].ports);
free(site->content[i].contenttype);
} }
free(site->content); free(site->content);
for (i = 0; i < site->portcount; ++i) {
free(site->ports[i].key);
free(site->ports[i].cert);
}
free(site->ports);
free(site); free(site);
} }