Proper raw html handling

This commit is contained in:
Nate Choe
2022-05-11 12:17:37 -05:00
parent cda87f2ac0
commit 9d0d998b71
3 changed files with 33 additions and 69 deletions

View File

@@ -47,7 +47,7 @@ struct linedata {
enum linetype type; enum linetype type;
union { union {
int intensity; int intensity;
int isfirst; int islast;
} data; } data;
}; };

View File

@@ -40,8 +40,6 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out);
static int endpara(struct parsestate *state, FILE *out); static int endpara(struct parsestate *state, FILE *out);
static void handlehtmlcase(struct linedata *data, struct parsestate *state, static void handlehtmlcase(struct linedata *data, struct parsestate *state,
char *line, FILE *out); char *line, FILE *out);
static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
char *line, FILE *out);
int parsemarkdown(FILE *infile, FILE *outfile) { int parsemarkdown(FILE *infile, FILE *outfile) {
struct linefile *realin; struct linefile *realin;
@@ -88,25 +86,25 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
currstate->isfirst = 0; currstate->isfirst = 0;
return 0; return 0;
case HTMLCONCRETE: case HTMLCONCRETE:
handlehtmlmiddle(&type, currstate, line, out); handlehtmlcase(&type, currstate, line, out);
return 0; return 0;
case COMMENTLONG: case COMMENTLONG:
handlehtmlmiddle(&type, currstate, line, out); handlehtmlcase(&type, currstate, line, out);
return 0; return 0;
case PHP: case PHP:
handlehtmlmiddle(&type, currstate, line, out); handlehtmlcase(&type, currstate, line, out);
return 0; return 0;
case COMMENTSHORT: case COMMENTSHORT:
handlehtmlmiddle(&type, currstate, line, out); handlehtmlcase(&type, currstate, line, out);
return 0; return 0;
case CDATA: case CDATA:
handlehtmlmiddle(&type, currstate, line, out); handlehtmlcase(&type, currstate, line, out);
return 0; return 0;
case SKELETON: case SKELETON:
handlehtmlmiddle(&type, currstate, line, out); handlehtmlcase(&type, currstate, line, out);
return 0; return 0;
case GENERICTAG: case GENERICTAG:
handlehtmlmiddle(&type, currstate, line, out); handlehtmlcase(&type, currstate, line, out);
return 0; return 0;
case EMPTY: case PLAIN: case SPACECODE: case HR: case EMPTY: case PLAIN: case SPACECODE: case HR:
case SETEXT1: case SETEXT2: case HEADER: case SETEXT1: case SETEXT2: case HEADER:
@@ -157,7 +155,7 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
* Should NOT compile to this: * Should NOT compile to this:
<p>Chapter 1</p><hr> <p>Chapter 1</p><hr />
* but rather to this * but rather to this
@@ -249,14 +247,8 @@ static void handlehtmlcase(struct linedata *data, struct parsestate *state,
fputs(line, out); fputs(line, out);
fputc('\n', out); fputc('\n', out);
state->prev.type = data->type; state->prev.type = data->type;
} if (state->prev.type == data->type && data->data.islast) {
static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
char *line, FILE *out) {
if (state->prev.type == data->type && !data->data.isfirst) {
state->prev.type = EMPTY; state->prev.type = EMPTY;
return; return;
} }
fputs(line, out);
fputc('\n', out);
} }

View File

@@ -25,7 +25,7 @@
static char *truncate(char *str); static char *truncate(char *str);
static char *after(char *begin, char *str); static char *after(char *begin, char *str);
static void identifyend(char *line, enum linetype prev, struct linedata *ret); static int isend(char *line, enum linetype prev);
static char *concretetags[] = { "pre", "script", "style", "textarea" }; static char *concretetags[] = { "pre", "script", "style", "textarea" };
static char *skeletontags[] = { static char *skeletontags[] = {
@@ -42,7 +42,8 @@ static char *skeletontags[] = {
void identifyline(char *line, struct linedata *prev, struct linedata *ret) { void identifyline(char *line, struct linedata *prev, struct linedata *ret) {
int i; int i;
if (HTMLSTART <= prev->type && prev->type <= HTMLEND) { if (HTMLSTART <= prev->type && prev->type <= HTMLEND) {
identifyend(truncate(line), prev->type, ret); ret->type = prev->type;
ret->data.islast = isend(truncate(line), prev->type);
return; return;
} }
if (prev->type != PLAIN) { if (prev->type != PLAIN) {
@@ -117,7 +118,7 @@ notheader:
#define HTMLSTARTCASE(start, rettype) \ #define HTMLSTARTCASE(start, rettype) \
if (after(start, line) != NULL) { \ if (after(start, line) != NULL) { \
ret->type = rettype; \ ret->type = rettype; \
ret->data.isfirst = 1; \ ret->data.islast = isend(line, rettype); \
return; \ return; \
} }
HTMLSTARTCASE("<!--", COMMENTLONG); HTMLSTARTCASE("<!--", COMMENTLONG);
@@ -128,6 +129,8 @@ notheader:
if (line[0] == '<') { if (line[0] == '<') {
char *testline; char *testline;
testline = line + 1; testline = line + 1;
if (testline[0] == '/')
++testline;
for (i = 0; i < LEN(concretetags); ++i) { for (i = 0; i < LEN(concretetags); ++i) {
char *aftertag; char *aftertag;
aftertag = after(concretetags[i], testline); aftertag = after(concretetags[i], testline);
@@ -135,35 +138,33 @@ notheader:
continue; continue;
if (aftertag[0] == '\0' || strchr(" >", aftertag[0])) { if (aftertag[0] == '\0' || strchr(" >", aftertag[0])) {
ret->type = HTMLCONCRETE; ret->type = HTMLCONCRETE;
ret->data.isfirst = 1; ret->data.islast = 0;
return; return;
} }
} }
if (testline[0] == '/')
++testline;
for (i = 0; i < LEN(skeletontags); ++i) { for (i = 0; i < LEN(skeletontags); ++i) {
char *aftertag; char *aftertag;
aftertag = after(skeletontags[i], testline); aftertag = after(skeletontags[i], testline);
if (aftertag == NULL) if (aftertag == NULL)
continue; continue;
if (aftertag[0] == '\0' || if (aftertag[0] == '\0' ||
strchr(" >", aftertag[0]) || strchr(" \t>", aftertag[0]) ||
after("/>", aftertag) != NULL) { after("/>", aftertag) != NULL) {
ret->type = SKELETON; ret->type = SKELETON;
ret->data.isfirst = 1; ret->data.islast = 0;
return; return;
} }
} }
if (isgenerictag(line)) { if (isgenerictag(line)) {
ret->type = GENERICTAG; ret->type = GENERICTAG;
ret->data.isfirst = 1; ret->data.islast = 0;
return; return;
} }
} }
ret->type = PLAIN; ret->type = PLAIN;
ret->data.isfirst = 1; ret->data.islast = 0;
return; return;
} }
@@ -275,61 +276,32 @@ static char *after(char *begin, char *str) {
return str; return str;
} }
static void identifyend(char *line, enum linetype prev, struct linedata *ret) { static int isend(char *line, enum linetype prev) {
int i; int i;
ret->type = EMPTY;
switch (prev) { switch (prev) {
case EMPTY: case PLAIN: case SPACECODE: case FENCECODE: case HR: case EMPTY: case PLAIN: case SPACECODE: case FENCECODE: case HR:
case SETEXT1: case SETEXT2: case HEADER: case SETEXT1: case SETEXT2: case HEADER:
return; return 1;
/* In this case, something has gone terribly wrong. */ /* In this case, something has gone terribly wrong. */
case HTMLCONCRETE: case HTMLCONCRETE:
for (i = 0; i < LEN(concretetags); ++i) { for (i = 0; i < LEN(concretetags); ++i) {
char endtag[30]; char endtag[30];
sprintf(endtag, "</%s>", concretetags[i]); sprintf(endtag, "</%s>", concretetags[i]);
if (strstr(line, endtag) != NULL) { return strstr(line, endtag) != NULL;
ret->type = HTMLCONCRETE;
ret->data.isfirst = 0;
return;
}
} }
return; return 0;
case COMMENTLONG: case COMMENTLONG:
if (strstr(line, "-->") != NULL) { return strstr(line, "-->") != NULL;
ret->type = COMMENTLONG;
ret->data.isfirst = 0;
}
return;
case PHP: case PHP:
if (strstr(line, "?>") != NULL) { return strstr(line, "?>") != NULL;
ret->type = PHP;
ret->data.isfirst = 0;
}
return;
case COMMENTSHORT: case COMMENTSHORT:
if (strchr(line, '>') != NULL) { return strchr(line, '>') != NULL;
ret->type = COMMENTSHORT;
ret->data.isfirst = 0;
}
return;
case CDATA: case CDATA:
if (strstr(line, "]]>") != NULL) { return strstr(line, "]]>") != NULL;
ret->type = CDATA; case SKELETON: case GENERICTAG:
ret->data.isfirst = 0; return line[0] == '\0';
}
return;
case SKELETON:
if (line[0] == '\0') {
ret->type = SKELETON;
ret->data.isfirst = 0;
}
return;
case GENERICTAG:
if (line[0] == '\0') {
ret->type = GENERICTAG;
ret->data.isfirst = 0;
}
return;
} }
return 1;
} }