Added HTML blocks

This commit is contained in:
Nate Choe
2022-04-24 21:34:44 -05:00
parent ad26f3e7be
commit 6319c4be5d
5 changed files with 292 additions and 64 deletions

View File

@@ -29,24 +29,29 @@ enum linetype {
/* === */ /* === */
SETEXT2, SETEXT2,
/* --- */ /* --- */
HEADER HEADER,
HTMLCONCRETE,
COMMENTLONG,
PHP,
COMMENTSHORT,
CDATA,
SKELETON,
GENERICTAG
}; };
#define HTMLSTART HTMLCONCRETE
#define HTMLEND GENERICTAG
struct linedata { struct linedata {
enum linetype type; enum linetype type;
union {
int intensity; int intensity;
int isfirst;
} data;
}; };
enum nodetype { void identifyline(char *line, struct linedata *prev, struct linedata *ret);
PARAGRAPH,
CODE,
/* Used for code that starts with spaces */
CODEBLOCK,
/* Used for triple backtick code */
NONE
};
void identifyline(char *line, enum nodetype prev, struct linedata *ret);
/* prev is almost never used, but sometimes it is. */ /* prev is almost never used, but sometimes it is. */
char *realcontent(char *line, struct linedata *data); char *realcontent(char *line, struct linedata *data);

View File

@@ -20,6 +20,8 @@
#include <stddef.h> #include <stddef.h>
#define LEN(arr) (sizeof(arr) / sizeof *(arr))
struct string { struct string {
size_t len; size_t len;
size_t alloc; size_t alloc;

View File

@@ -17,16 +17,35 @@
*/ */
#include <ctype.h> #include <ctype.h>
#include <stdio.h>
#include <string.h> #include <string.h>
#include <stddef.h>
#include <util.h>
#include <mdutil.h> #include <mdutil.h>
static char *truncate(char *str); static char *truncate(char *str);
static char *after(char *begin, char *str);
static void identifyend(char *line, enum linetype prev, struct linedata *ret);
void identifyline(char *line, enum nodetype prev, struct linedata *ret) { static char *concretetags[] = { "pre", "script", "style", "textarea" };
static char *skeletontags[] = {
"address", "article", "aside", "base", "basefont", "blockquote", "body",
"caption", "center", "col", "colgroup", "dd", "details", "dialog",
"dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer",
"form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head",
"header", "hr", "html", "iframe", "legend", "li", "link", "main",
"menu", "menuitem", "nav", "noframes", "ol", "optgroup", "option", "p",
"param", "section", "source", "summary", "table", "tbody", "td",
"tfoot", "th", "thead", "title", "tr", "track", "ul",
};
void identifyline(char *line, struct linedata *prev, struct linedata *ret) {
int i; int i;
if (prev != PARAGRAPH) { if (HTMLSTART <= prev->type && prev->type <= HTMLEND) {
identifyend(truncate(line), prev->type, ret);
return;
}
if (prev->type != PLAIN) {
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
if (!isspace(line[i])) if (!isspace(line[i]))
goto notspacecode; goto notspacecode;
@@ -71,13 +90,15 @@ notspacecode:
/* There has to be at least 3 delimiter characters */ /* There has to be at least 3 delimiter characters */
} }
nothr: nothr:
for (i = 0; i < 3; ++i) { for (i = 0; line[i] == '`'; ++i) ;
if (line[i] != '`') if (i >= 3) {
goto notfencedcode;
}
ret->type = FENCECODE; ret->type = FENCECODE;
ret->data.intensity = i;
/* The last line of a fenced code block must have at least the
* same number of backticks as the first. */
return; return;
notfencedcode: }
/* notfencedcode: */
if (line[0] == '#') { if (line[0] == '#') {
int pcount; int pcount;
@@ -85,15 +106,73 @@ notfencedcode:
if (line[pcount] != ' ' && line[pcount] != '\0') if (line[pcount] != ' ' && line[pcount] != '\0')
goto notheader; goto notheader;
ret->type = HEADER; ret->type = HEADER;
ret->intensity = pcount; ret->data.intensity = pcount;
return; return;
} }
notheader: notheader:
#define HTMLSTARTCASE(start, rettype) \
if (after(start, line) != NULL) { \
ret->type = rettype; \
ret->data.isfirst = 1; \
return; \
}
HTMLSTARTCASE("<!--", COMMENTLONG);
HTMLSTARTCASE("<![CDATA[", CDATA);
HTMLSTARTCASE("<?", PHP);
HTMLSTARTCASE("<!", COMMENTSHORT);
if (line[0] == '<') {
char *testline;
testline = line + 1;
for (i = 0; i < LEN(concretetags); ++i) {
char *aftertag;
aftertag = after(concretetags[i], testline);
if (aftertag == NULL)
continue;
if (aftertag[0] == '\0' || strchr(" >", aftertag[0])) {
ret->type = HTMLCONCRETE;
ret->data.isfirst = 1;
return;
}
}
if (testline[0] == '/')
++testline;
for (i = 0; i < LEN(skeletontags); ++i) {
char *aftertag;
aftertag = after(skeletontags[i], testline);
if (aftertag == NULL)
continue;
if (aftertag[0] == '\0' ||
strchr(" >", aftertag[0]) ||
after("/>", aftertag) != NULL) {
ret->type = SKELETON;
ret->data.isfirst = 1;
return;
}
}
}
ret->type = PLAIN; ret->type = PLAIN;
return; return;
} }
/* TODO: Finish this */
char *realcontent(char *line, struct linedata *data) {
switch (data->type) {
case EMPTY: case HR: case SETEXT1: case SETEXT2: case FENCECODE:
case HTMLCONCRETE: case COMMENTLONG: case PHP: case CDATA:
case SKELETON: case COMMENTSHORT: case GENERICTAG:
return NULL;
case PLAIN:
return line;
case SPACECODE:
return line + 4;
case HEADER:
return truncate(line + data->data.intensity);
}
return NULL;
}
static char *truncate(char *str) { static char *truncate(char *str) {
while (isspace(str[0])) while (isspace(str[0]))
@@ -101,16 +180,71 @@ static char *truncate(char *str) {
return str; return str;
} }
char *realcontent(char *line, struct linedata *data) { static char *after(char *begin, char *str) {
switch (data->type) { int i;
case EMPTY: case HR: case SETEXT1: case SETEXT2: case FENCECODE: for (i = 0; begin[i]; ++i) {
if (begin[i] != str[0])
return NULL; return NULL;
case PLAIN: ++str;
return line; }
case SPACECODE: return str;
return line + 4; }
case HEADER:
return truncate(line + data->intensity); static void identifyend(char *line, enum linetype prev, struct linedata *ret) {
int i;
ret->type = EMPTY;
switch (prev) {
case EMPTY: case PLAIN: case SPACECODE: case FENCECODE: case HR:
case SETEXT1: case SETEXT2: case HEADER:
return;
/* In this case, something has gone terribly wrong. */
case HTMLCONCRETE:
for (i = 0; i < LEN(concretetags); ++i) {
char endtag[30];
sprintf(endtag, "</%s>", concretetags[i]);
if (strstr(line, endtag) != NULL) {
ret->type = HTMLCONCRETE;
ret->data.isfirst = 0;
return;
}
}
return;
case COMMENTLONG:
if (strstr(line, "-->") != NULL) {
ret->type = COMMENTLONG;
ret->data.isfirst = 0;
}
return;
case PHP:
if (strstr(line, "?>") != NULL) {
ret->type = PHP;
ret->data.isfirst = 0;
}
return;
case COMMENTSHORT:
if (strchr(line, '>') != NULL) {
ret->type = COMMENTSHORT;
ret->data.isfirst = 0;
}
return;
case CDATA:
if (strstr(line, "]]>") != NULL) {
ret->type = CDATA;
ret->data.isfirst = 0;
}
return;
case SKELETON:
if (line[0] == '\0') {
ret->type = SKELETON;
ret->data.isfirst = 0;
}
return;
case GENERICTAG:
if (line[0] == '\0') {
ret->type = GENERICTAG;
ret->data.isfirst = 0;
}
return;
} }
return NULL;
} }

View File

@@ -26,21 +26,28 @@
#include <template.h> #include <template.h>
struct parsestate { struct parsestate {
enum nodetype type; struct linedata prev;
struct string *para; struct string *para;
int isfirst; int isfirst;
/* Used to insert <br> tags. Currently onlu used for FENCECODE. */ int intensity;
/* Similar to the intensity field in the linedata struct. Currently
* stores the number of backticks used in FENCECODE.*/
}; };
static int parseline(char *line, struct parsestate *currstate, FILE *out); static int parseline(char *line, struct parsestate *currstate, FILE *out);
static int endpara(struct parsestate *state, FILE *out); static int endpara(struct parsestate *state, FILE *out);
static void handlehtmlcase(struct linedata *data, struct parsestate *state,
char *line, FILE *out);
static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
char *line, FILE *out);
int parsetemplate(FILE *infile, FILE *outfile) { int parsetemplate(FILE *infile, FILE *outfile) {
struct linefile *realin; struct linefile *realin;
struct parsestate currstate; struct parsestate currstate;
int code; int code;
currstate.type = NONE; currstate.prev.type = EMPTY;
currstate.para = newstring(); currstate.para = newstring();
realin = newlinefile(infile); realin = newlinefile(infile);
@@ -65,11 +72,14 @@ int parsetemplate(FILE *infile, FILE *outfile) {
static int parseline(char *line, struct parsestate *currstate, FILE *out) { static int parseline(char *line, struct parsestate *currstate, FILE *out) {
struct linedata type; struct linedata type;
identifyline(line, currstate->type, &type); identifyline(line, &currstate->prev, &type);
if (currstate->type == CODEBLOCK) { switch (currstate->prev.type) {
if (type.type == FENCECODE) { case FENCECODE:
currstate->type = NONE; if (type.type == FENCECODE &&
type.data.intensity >=
currstate->intensity) {
currstate->prev.type = EMPTY;
fputs("</code>", out); fputs("</code>", out);
return 0; return 0;
} }
@@ -78,45 +88,66 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
fputs(line, out); fputs(line, out);
currstate->isfirst = 0; currstate->isfirst = 0;
return 0; return 0;
case HTMLCONCRETE:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case COMMENTLONG:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case PHP:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case COMMENTSHORT:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case CDATA:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case SKELETON:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case EMPTY: case PLAIN: case SPACECODE: case HR:
case SETEXT1: case SETEXT2: case HEADER: case GENERICTAG:
break;
} }
switch (type.type) { switch (type.type) {
case EMPTY: case EMPTY:
endpara(currstate, out); endpara(currstate, out);
currstate->type = NONE; currstate->prev.type = EMPTY;
return 0; break;
case SETEXT1: case SETEXT1:
if (currstate->type != PARAGRAPH) if (currstate->prev.type != PLAIN)
return 1; return 1;
currstate->type = NONE; currstate->prev.type = EMPTY;
fputs("<h1>", out); fputs("<h1>", out);
fwrite(currstate->para->data, 1, currstate->para->len, out); fwrite(currstate->para->data, 1, currstate->para->len, out);
fputs("</h1>", out); fputs("</h1>", out);
resetstring(currstate->para); resetstring(currstate->para);
return 0; break;
case SETEXT2: case SETEXT2:
if (currstate->type != PARAGRAPH) if (currstate->prev.type != PLAIN)
goto hr; goto hr;
currstate->type = NONE; currstate->prev.type = EMPTY;
fputs("<h2>", out); fputs("<h2>", out);
fwrite(currstate->para->data, 1, currstate->para->len, out); fwrite(currstate->para->data, 1, currstate->para->len, out);
fputs("</h2>", out); fputs("</h2>", out);
resetstring(currstate->para); resetstring(currstate->para);
return 0; break;
case HR: hr: case HR: hr:
endpara(currstate, out); endpara(currstate, out);
currstate->type = NONE; currstate->prev.type = EMPTY;
fputs("<hr>", out); fputs("<hr>", out);
return 0; break;
case PLAIN: case PLAIN:
if (currstate->type != PARAGRAPH) { if (currstate->prev.type != PLAIN) {
endpara(currstate, out); endpara(currstate, out);
currstate->type = PARAGRAPH; currstate->prev.type = PLAIN;
} }
else else
appendcharstring(currstate->para, ' '); appendcharstring(currstate->para, ' ');
appendstrstring(currstate->para, realcontent(line, &type)); appendstrstring(currstate->para, realcontent(line, &type));
return 0; break;
/* According to the commonmark spec, this markdown: /* According to the commonmark spec, this markdown:
Chapter 1 Chapter 1
@@ -136,13 +167,14 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
* */ * */
case FENCECODE: case FENCECODE:
fputs("<code class='block'>", out); fputs("<code class='block'>", out);
currstate->type = CODEBLOCK; currstate->prev.type = FENCECODE;
currstate->isfirst = 1; currstate->isfirst = 1;
currstate->intensity = type.data.intensity;
break; break;
case SPACECODE: case SPACECODE:
if (currstate->type != CODE) { if (currstate->prev.type != SPACECODE) {
endpara(currstate, out); endpara(currstate, out);
currstate->type = CODE; currstate->prev.type = SPACECODE;
fputs("<code class='block'>", out); fputs("<code class='block'>", out);
} }
else else
@@ -152,28 +184,78 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
case HEADER: case HEADER:
endpara(currstate, out); endpara(currstate, out);
fprintf(out, "<h%d>%s</h%d>", fprintf(out, "<h%d>%s</h%d>",
type.intensity, type.data.intensity,
realcontent(line, &type), realcontent(line, &type),
type.intensity); type.data.intensity);
currstate->type = NONE; currstate->prev.type = EMPTY;
break;
case HTMLCONCRETE:
handlehtmlcase(&type, currstate, line, out);
break;
case COMMENTLONG:
handlehtmlcase(&type, currstate, line, out);
break;
case PHP:
handlehtmlcase(&type, currstate, line, out);
break;
case COMMENTSHORT:
handlehtmlcase(&type, currstate, line, out);
break;
case CDATA:
handlehtmlcase(&type, currstate, line, out);
break;
case SKELETON:
handlehtmlcase(&type, currstate, line, out);
break;
case GENERICTAG:
handlehtmlcase(&type, currstate, line, out);
break; break;
} }
return 0; return 0;
} }
static int endpara(struct parsestate *state, FILE *out) { static int endpara(struct parsestate *state, FILE *out) {
switch (state->type) { switch (state->prev.type) {
case PARAGRAPH: case EMPTY: case HR:
case HTMLCONCRETE: case COMMENTLONG: case PHP: case COMMENTSHORT:
case CDATA: case SKELETON: case GENERICTAG:
return 0;
case PLAIN:
fputs("<p>", out); fputs("<p>", out);
fwrite(state->para->data, 1, state->para->len, out); fwrite(state->para->data, 1, state->para->len, out);
fputs("</p>", out); fputs("</p>", out);
resetstring(state->para); resetstring(state->para);
return 0; return 0;
case CODE: case CODEBLOCK: case SPACECODE: case FENCECODE:
fputs("</code>", out); fputs("</code>", out);
return 0; return 0;
case NONE: case HEADER:
fprintf(out, "</h%d>", state->prev.data.intensity);
return 0; return 0;
case SETEXT1:
fputs("</h1>", out);
break;
case SETEXT2:
fputs("</h2>", out);
break;
} }
return 1; return 1;
} }
static void handlehtmlcase(struct linedata *data, struct parsestate *state,
char *line, FILE *out) {
endpara(state, out);
fputs(line, out);
fputc('\n', out);
state->prev.type = data->type;
}
static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
char *line, FILE *out) {
if (state->prev.type == data->type && !data->data.isfirst) {
state->prev.type = EMPTY;
return;
}
fputs(line, out);
fputc('\n', out);
}

5
test.md Normal file
View File

@@ -0,0 +1,5 @@
<h1>
This should just be raw HTML
</h1>
This should not be