From 6319c4be5d56e8091601976e9d00171176dbe76a Mon Sep 17 00:00:00 2001 From: Nate Choe Date: Sun, 24 Apr 2022 21:34:44 -0500 Subject: [PATCH] Added HTML blocks --- src/include/mdutil.h | 29 ++++--- src/include/util.h | 2 + src/mdutil.c | 178 +++++++++++++++++++++++++++++++++++++------ src/template.c | 142 ++++++++++++++++++++++++++-------- test.md | 5 ++ 5 files changed, 292 insertions(+), 64 deletions(-) create mode 100644 test.md diff --git a/src/include/mdutil.h b/src/include/mdutil.h index 96efd95..af1a4c6 100644 --- a/src/include/mdutil.h +++ b/src/include/mdutil.h @@ -29,24 +29,29 @@ enum linetype { /* === */ SETEXT2, /* --- */ - HEADER + HEADER, + + HTMLCONCRETE, + COMMENTLONG, + PHP, + COMMENTSHORT, + CDATA, + SKELETON, + GENERICTAG }; +#define HTMLSTART HTMLCONCRETE +#define HTMLEND GENERICTAG + struct linedata { enum linetype type; - int intensity; + union { + int intensity; + int isfirst; + } data; }; -enum nodetype { - PARAGRAPH, - CODE, - /* Used for code that starts with spaces */ - CODEBLOCK, - /* Used for triple backtick code */ - NONE -}; - -void identifyline(char *line, enum nodetype prev, struct linedata *ret); +void identifyline(char *line, struct linedata *prev, struct linedata *ret); /* prev is almost never used, but sometimes it is. */ char *realcontent(char *line, struct linedata *data); diff --git a/src/include/util.h b/src/include/util.h index d58ef5f..20d2fd0 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -20,6 +20,8 @@ #include +#define LEN(arr) (sizeof(arr) / sizeof *(arr)) + struct string { size_t len; size_t alloc; diff --git a/src/mdutil.c b/src/mdutil.c index 98a7731..3be66c5 100644 --- a/src/mdutil.c +++ b/src/mdutil.c @@ -17,16 +17,35 @@ */ #include +#include #include -#include +#include #include static char *truncate(char *str); +static char *after(char *begin, char *str); +static void identifyend(char *line, enum linetype prev, struct linedata *ret); -void identifyline(char *line, enum nodetype prev, struct linedata *ret) { +static char *concretetags[] = { "pre", "script", "style", "textarea" }; +static char *skeletontags[] = { + "address", "article", "aside", "base", "basefont", "blockquote", "body", + "caption", "center", "col", "colgroup", "dd", "details", "dialog", + "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", + "form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", + "header", "hr", "html", "iframe", "legend", "li", "link", "main", + "menu", "menuitem", "nav", "noframes", "ol", "optgroup", "option", "p", + "param", "section", "source", "summary", "table", "tbody", "td", + "tfoot", "th", "thead", "title", "tr", "track", "ul", +}; + +void identifyline(char *line, struct linedata *prev, struct linedata *ret) { int i; - if (prev != PARAGRAPH) { + if (HTMLSTART <= prev->type && prev->type <= HTMLEND) { + identifyend(truncate(line), prev->type, ret); + return; + } + if (prev->type != PLAIN) { for (i = 0; i < 4; ++i) { if (!isspace(line[i])) goto notspacecode; @@ -71,13 +90,15 @@ notspacecode: /* There has to be at least 3 delimiter characters */ } nothr: - for (i = 0; i < 3; ++i) { - if (line[i] != '`') - goto notfencedcode; + for (i = 0; line[i] == '`'; ++i) ; + if (i >= 3) { + ret->type = FENCECODE; + ret->data.intensity = i; + /* The last line of a fenced code block must have at least the + * same number of backticks as the first. */ + return; } - ret->type = FENCECODE; - return; -notfencedcode: +/* notfencedcode: */ if (line[0] == '#') { int pcount; @@ -85,15 +106,73 @@ notfencedcode: if (line[pcount] != ' ' && line[pcount] != '\0') goto notheader; ret->type = HEADER; - ret->intensity = pcount; + ret->data.intensity = pcount; return; } notheader: + +#define HTMLSTARTCASE(start, rettype) \ + if (after(start, line) != NULL) { \ + ret->type = rettype; \ + ret->data.isfirst = 1; \ + return; \ + } + HTMLSTARTCASE("") != NULL) { + ret->type = COMMENTLONG; + ret->data.isfirst = 0; + } + return; + case PHP: + if (strstr(line, "?>") != NULL) { + ret->type = PHP; + ret->data.isfirst = 0; + } + return; + case COMMENTSHORT: + if (strchr(line, '>') != NULL) { + ret->type = COMMENTSHORT; + ret->data.isfirst = 0; + } + return; + case CDATA: + if (strstr(line, "]]>") != NULL) { + ret->type = CDATA; + ret->data.isfirst = 0; + } + return; + case SKELETON: + if (line[0] == '\0') { + ret->type = SKELETON; + ret->data.isfirst = 0; + } + return; + case GENERICTAG: + if (line[0] == '\0') { + ret->type = GENERICTAG; + ret->data.isfirst = 0; + } + return; } - return NULL; } diff --git a/src/template.c b/src/template.c index 5ca2717..324a556 100644 --- a/src/template.c +++ b/src/template.c @@ -26,21 +26,28 @@ #include struct parsestate { - enum nodetype type; + struct linedata prev; struct string *para; + int isfirst; - /* Used to insert
tags. Currently onlu used for FENCECODE. */ + int intensity; + /* Similar to the intensity field in the linedata struct. Currently + * stores the number of backticks used in FENCECODE.*/ }; static int parseline(char *line, struct parsestate *currstate, FILE *out); static int endpara(struct parsestate *state, FILE *out); +static void handlehtmlcase(struct linedata *data, struct parsestate *state, + char *line, FILE *out); +static void handlehtmlmiddle(struct linedata *data, struct parsestate *state, + char *line, FILE *out); int parsetemplate(FILE *infile, FILE *outfile) { struct linefile *realin; struct parsestate currstate; int code; - currstate.type = NONE; + currstate.prev.type = EMPTY; currstate.para = newstring(); realin = newlinefile(infile); @@ -65,11 +72,14 @@ int parsetemplate(FILE *infile, FILE *outfile) { static int parseline(char *line, struct parsestate *currstate, FILE *out) { struct linedata type; - identifyline(line, currstate->type, &type); + identifyline(line, &currstate->prev, &type); - if (currstate->type == CODEBLOCK) { - if (type.type == FENCECODE) { - currstate->type = NONE; + switch (currstate->prev.type) { + case FENCECODE: + if (type.type == FENCECODE && + type.data.intensity >= + currstate->intensity) { + currstate->prev.type = EMPTY; fputs("", out); return 0; } @@ -78,45 +88,66 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) { fputs(line, out); currstate->isfirst = 0; return 0; + case HTMLCONCRETE: + handlehtmlmiddle(&type, currstate, line, out); + return 0; + case COMMENTLONG: + handlehtmlmiddle(&type, currstate, line, out); + return 0; + case PHP: + handlehtmlmiddle(&type, currstate, line, out); + return 0; + case COMMENTSHORT: + handlehtmlmiddle(&type, currstate, line, out); + return 0; + case CDATA: + handlehtmlmiddle(&type, currstate, line, out); + return 0; + case SKELETON: + handlehtmlmiddle(&type, currstate, line, out); + return 0; + case EMPTY: case PLAIN: case SPACECODE: case HR: + case SETEXT1: case SETEXT2: case HEADER: case GENERICTAG: + break; } switch (type.type) { case EMPTY: endpara(currstate, out); - currstate->type = NONE; - return 0; + currstate->prev.type = EMPTY; + break; case SETEXT1: - if (currstate->type != PARAGRAPH) + if (currstate->prev.type != PLAIN) return 1; - currstate->type = NONE; + currstate->prev.type = EMPTY; fputs("

", out); fwrite(currstate->para->data, 1, currstate->para->len, out); fputs("

", out); resetstring(currstate->para); - return 0; + break; case SETEXT2: - if (currstate->type != PARAGRAPH) + if (currstate->prev.type != PLAIN) goto hr; - currstate->type = NONE; + currstate->prev.type = EMPTY; fputs("

", out); fwrite(currstate->para->data, 1, currstate->para->len, out); fputs("

", out); resetstring(currstate->para); - return 0; + break; case HR: hr: endpara(currstate, out); - currstate->type = NONE; + currstate->prev.type = EMPTY; fputs("
", out); - return 0; + break; case PLAIN: - if (currstate->type != PARAGRAPH) { + if (currstate->prev.type != PLAIN) { endpara(currstate, out); - currstate->type = PARAGRAPH; + currstate->prev.type = PLAIN; } else appendcharstring(currstate->para, ' '); appendstrstring(currstate->para, realcontent(line, &type)); - return 0; + break; /* According to the commonmark spec, this markdown: Chapter 1 @@ -136,13 +167,14 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) { * */ case FENCECODE: fputs("", out); - currstate->type = CODEBLOCK; + currstate->prev.type = FENCECODE; currstate->isfirst = 1; + currstate->intensity = type.data.intensity; break; case SPACECODE: - if (currstate->type != CODE) { + if (currstate->prev.type != SPACECODE) { endpara(currstate, out); - currstate->type = CODE; + currstate->prev.type = SPACECODE; fputs("", out); } else @@ -152,28 +184,78 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) { case HEADER: endpara(currstate, out); fprintf(out, "%s", - type.intensity, + type.data.intensity, realcontent(line, &type), - type.intensity); - currstate->type = NONE; + type.data.intensity); + currstate->prev.type = EMPTY; + break; + case HTMLCONCRETE: + handlehtmlcase(&type, currstate, line, out); + break; + case COMMENTLONG: + handlehtmlcase(&type, currstate, line, out); + break; + case PHP: + handlehtmlcase(&type, currstate, line, out); + break; + case COMMENTSHORT: + handlehtmlcase(&type, currstate, line, out); + break; + case CDATA: + handlehtmlcase(&type, currstate, line, out); + break; + case SKELETON: + handlehtmlcase(&type, currstate, line, out); + break; + case GENERICTAG: + handlehtmlcase(&type, currstate, line, out); break; } return 0; } static int endpara(struct parsestate *state, FILE *out) { - switch (state->type) { - case PARAGRAPH: + switch (state->prev.type) { + case EMPTY: case HR: + case HTMLCONCRETE: case COMMENTLONG: case PHP: case COMMENTSHORT: + case CDATA: case SKELETON: case GENERICTAG: + return 0; + case PLAIN: fputs("

", out); fwrite(state->para->data, 1, state->para->len, out); fputs("

", out); resetstring(state->para); return 0; - case CODE: case CODEBLOCK: + case SPACECODE: case FENCECODE: fputs("
", out); return 0; - case NONE: + case HEADER: + fprintf(out, "", state->prev.data.intensity); return 0; + case SETEXT1: + fputs("", out); + break; + case SETEXT2: + fputs("", out); + break; } return 1; } + +static void handlehtmlcase(struct linedata *data, struct parsestate *state, + char *line, FILE *out) { + endpara(state, out); + fputs(line, out); + fputc('\n', out); + state->prev.type = data->type; +} + +static void handlehtmlmiddle(struct linedata *data, struct parsestate *state, + char *line, FILE *out) { + if (state->prev.type == data->type && !data->data.isfirst) { + state->prev.type = EMPTY; + return; + } + fputs(line, out); + fputc('\n', out); +} diff --git a/test.md b/test.md new file mode 100644 index 0000000..7470180 --- /dev/null +++ b/test.md @@ -0,0 +1,5 @@ +

+ This should just be raw HTML +

+ +This should not be