Added HTML blocks

This commit is contained in:
Nate Choe
2022-04-24 21:34:44 -05:00
parent ad26f3e7be
commit 6319c4be5d
5 changed files with 292 additions and 64 deletions

View File

@@ -29,24 +29,29 @@ enum linetype {
/* === */
SETEXT2,
/* --- */
HEADER
HEADER,
HTMLCONCRETE,
COMMENTLONG,
PHP,
COMMENTSHORT,
CDATA,
SKELETON,
GENERICTAG
};
#define HTMLSTART HTMLCONCRETE
#define HTMLEND GENERICTAG
struct linedata {
enum linetype type;
int intensity;
union {
int intensity;
int isfirst;
} data;
};
enum nodetype {
PARAGRAPH,
CODE,
/* Used for code that starts with spaces */
CODEBLOCK,
/* Used for triple backtick code */
NONE
};
void identifyline(char *line, enum nodetype prev, struct linedata *ret);
void identifyline(char *line, struct linedata *prev, struct linedata *ret);
/* prev is almost never used, but sometimes it is. */
char *realcontent(char *line, struct linedata *data);

View File

@@ -20,6 +20,8 @@
#include <stddef.h>
#define LEN(arr) (sizeof(arr) / sizeof *(arr))
struct string {
size_t len;
size_t alloc;

View File

@@ -17,16 +17,35 @@
*/
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include <util.h>
#include <mdutil.h>
static char *truncate(char *str);
static char *after(char *begin, char *str);
static void identifyend(char *line, enum linetype prev, struct linedata *ret);
void identifyline(char *line, enum nodetype prev, struct linedata *ret) {
static char *concretetags[] = { "pre", "script", "style", "textarea" };
static char *skeletontags[] = {
"address", "article", "aside", "base", "basefont", "blockquote", "body",
"caption", "center", "col", "colgroup", "dd", "details", "dialog",
"dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer",
"form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head",
"header", "hr", "html", "iframe", "legend", "li", "link", "main",
"menu", "menuitem", "nav", "noframes", "ol", "optgroup", "option", "p",
"param", "section", "source", "summary", "table", "tbody", "td",
"tfoot", "th", "thead", "title", "tr", "track", "ul",
};
void identifyline(char *line, struct linedata *prev, struct linedata *ret) {
int i;
if (prev != PARAGRAPH) {
if (HTMLSTART <= prev->type && prev->type <= HTMLEND) {
identifyend(truncate(line), prev->type, ret);
return;
}
if (prev->type != PLAIN) {
for (i = 0; i < 4; ++i) {
if (!isspace(line[i]))
goto notspacecode;
@@ -71,13 +90,15 @@ notspacecode:
/* There has to be at least 3 delimiter characters */
}
nothr:
for (i = 0; i < 3; ++i) {
if (line[i] != '`')
goto notfencedcode;
for (i = 0; line[i] == '`'; ++i) ;
if (i >= 3) {
ret->type = FENCECODE;
ret->data.intensity = i;
/* The last line of a fenced code block must have at least the
* same number of backticks as the first. */
return;
}
ret->type = FENCECODE;
return;
notfencedcode:
/* notfencedcode: */
if (line[0] == '#') {
int pcount;
@@ -85,15 +106,73 @@ notfencedcode:
if (line[pcount] != ' ' && line[pcount] != '\0')
goto notheader;
ret->type = HEADER;
ret->intensity = pcount;
ret->data.intensity = pcount;
return;
}
notheader:
#define HTMLSTARTCASE(start, rettype) \
if (after(start, line) != NULL) { \
ret->type = rettype; \
ret->data.isfirst = 1; \
return; \
}
HTMLSTARTCASE("<!--", COMMENTLONG);
HTMLSTARTCASE("<![CDATA[", CDATA);
HTMLSTARTCASE("<?", PHP);
HTMLSTARTCASE("<!", COMMENTSHORT);
if (line[0] == '<') {
char *testline;
testline = line + 1;
for (i = 0; i < LEN(concretetags); ++i) {
char *aftertag;
aftertag = after(concretetags[i], testline);
if (aftertag == NULL)
continue;
if (aftertag[0] == '\0' || strchr(" >", aftertag[0])) {
ret->type = HTMLCONCRETE;
ret->data.isfirst = 1;
return;
}
}
if (testline[0] == '/')
++testline;
for (i = 0; i < LEN(skeletontags); ++i) {
char *aftertag;
aftertag = after(skeletontags[i], testline);
if (aftertag == NULL)
continue;
if (aftertag[0] == '\0' ||
strchr(" >", aftertag[0]) ||
after("/>", aftertag) != NULL) {
ret->type = SKELETON;
ret->data.isfirst = 1;
return;
}
}
}
ret->type = PLAIN;
return;
}
/* TODO: Finish this */
char *realcontent(char *line, struct linedata *data) {
switch (data->type) {
case EMPTY: case HR: case SETEXT1: case SETEXT2: case FENCECODE:
case HTMLCONCRETE: case COMMENTLONG: case PHP: case CDATA:
case SKELETON: case COMMENTSHORT: case GENERICTAG:
return NULL;
case PLAIN:
return line;
case SPACECODE:
return line + 4;
case HEADER:
return truncate(line + data->data.intensity);
}
return NULL;
}
static char *truncate(char *str) {
while (isspace(str[0]))
@@ -101,16 +180,71 @@ static char *truncate(char *str) {
return str;
}
char *realcontent(char *line, struct linedata *data) {
switch (data->type) {
case EMPTY: case HR: case SETEXT1: case SETEXT2: case FENCECODE:
return NULL;
case PLAIN:
return line;
case SPACECODE:
return line + 4;
case HEADER:
return truncate(line + data->intensity);
static char *after(char *begin, char *str) {
int i;
for (i = 0; begin[i]; ++i) {
if (begin[i] != str[0])
return NULL;
++str;
}
return str;
}
static void identifyend(char *line, enum linetype prev, struct linedata *ret) {
int i;
ret->type = EMPTY;
switch (prev) {
case EMPTY: case PLAIN: case SPACECODE: case FENCECODE: case HR:
case SETEXT1: case SETEXT2: case HEADER:
return;
/* In this case, something has gone terribly wrong. */
case HTMLCONCRETE:
for (i = 0; i < LEN(concretetags); ++i) {
char endtag[30];
sprintf(endtag, "</%s>", concretetags[i]);
if (strstr(line, endtag) != NULL) {
ret->type = HTMLCONCRETE;
ret->data.isfirst = 0;
return;
}
}
return;
case COMMENTLONG:
if (strstr(line, "-->") != NULL) {
ret->type = COMMENTLONG;
ret->data.isfirst = 0;
}
return;
case PHP:
if (strstr(line, "?>") != NULL) {
ret->type = PHP;
ret->data.isfirst = 0;
}
return;
case COMMENTSHORT:
if (strchr(line, '>') != NULL) {
ret->type = COMMENTSHORT;
ret->data.isfirst = 0;
}
return;
case CDATA:
if (strstr(line, "]]>") != NULL) {
ret->type = CDATA;
ret->data.isfirst = 0;
}
return;
case SKELETON:
if (line[0] == '\0') {
ret->type = SKELETON;
ret->data.isfirst = 0;
}
return;
case GENERICTAG:
if (line[0] == '\0') {
ret->type = GENERICTAG;
ret->data.isfirst = 0;
}
return;
}
return NULL;
}

View File

@@ -26,21 +26,28 @@
#include <template.h>
struct parsestate {
enum nodetype type;
struct linedata prev;
struct string *para;
int isfirst;
/* Used to insert <br> tags. Currently onlu used for FENCECODE. */
int intensity;
/* Similar to the intensity field in the linedata struct. Currently
* stores the number of backticks used in FENCECODE.*/
};
static int parseline(char *line, struct parsestate *currstate, FILE *out);
static int endpara(struct parsestate *state, FILE *out);
static void handlehtmlcase(struct linedata *data, struct parsestate *state,
char *line, FILE *out);
static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
char *line, FILE *out);
int parsetemplate(FILE *infile, FILE *outfile) {
struct linefile *realin;
struct parsestate currstate;
int code;
currstate.type = NONE;
currstate.prev.type = EMPTY;
currstate.para = newstring();
realin = newlinefile(infile);
@@ -65,11 +72,14 @@ int parsetemplate(FILE *infile, FILE *outfile) {
static int parseline(char *line, struct parsestate *currstate, FILE *out) {
struct linedata type;
identifyline(line, currstate->type, &type);
identifyline(line, &currstate->prev, &type);
if (currstate->type == CODEBLOCK) {
if (type.type == FENCECODE) {
currstate->type = NONE;
switch (currstate->prev.type) {
case FENCECODE:
if (type.type == FENCECODE &&
type.data.intensity >=
currstate->intensity) {
currstate->prev.type = EMPTY;
fputs("</code>", out);
return 0;
}
@@ -78,45 +88,66 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
fputs(line, out);
currstate->isfirst = 0;
return 0;
case HTMLCONCRETE:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case COMMENTLONG:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case PHP:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case COMMENTSHORT:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case CDATA:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case SKELETON:
handlehtmlmiddle(&type, currstate, line, out);
return 0;
case EMPTY: case PLAIN: case SPACECODE: case HR:
case SETEXT1: case SETEXT2: case HEADER: case GENERICTAG:
break;
}
switch (type.type) {
case EMPTY:
endpara(currstate, out);
currstate->type = NONE;
return 0;
currstate->prev.type = EMPTY;
break;
case SETEXT1:
if (currstate->type != PARAGRAPH)
if (currstate->prev.type != PLAIN)
return 1;
currstate->type = NONE;
currstate->prev.type = EMPTY;
fputs("<h1>", out);
fwrite(currstate->para->data, 1, currstate->para->len, out);
fputs("</h1>", out);
resetstring(currstate->para);
return 0;
break;
case SETEXT2:
if (currstate->type != PARAGRAPH)
if (currstate->prev.type != PLAIN)
goto hr;
currstate->type = NONE;
currstate->prev.type = EMPTY;
fputs("<h2>", out);
fwrite(currstate->para->data, 1, currstate->para->len, out);
fputs("</h2>", out);
resetstring(currstate->para);
return 0;
break;
case HR: hr:
endpara(currstate, out);
currstate->type = NONE;
currstate->prev.type = EMPTY;
fputs("<hr>", out);
return 0;
break;
case PLAIN:
if (currstate->type != PARAGRAPH) {
if (currstate->prev.type != PLAIN) {
endpara(currstate, out);
currstate->type = PARAGRAPH;
currstate->prev.type = PLAIN;
}
else
appendcharstring(currstate->para, ' ');
appendstrstring(currstate->para, realcontent(line, &type));
return 0;
break;
/* According to the commonmark spec, this markdown:
Chapter 1
@@ -136,13 +167,14 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
* */
case FENCECODE:
fputs("<code class='block'>", out);
currstate->type = CODEBLOCK;
currstate->prev.type = FENCECODE;
currstate->isfirst = 1;
currstate->intensity = type.data.intensity;
break;
case SPACECODE:
if (currstate->type != CODE) {
if (currstate->prev.type != SPACECODE) {
endpara(currstate, out);
currstate->type = CODE;
currstate->prev.type = SPACECODE;
fputs("<code class='block'>", out);
}
else
@@ -152,28 +184,78 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
case HEADER:
endpara(currstate, out);
fprintf(out, "<h%d>%s</h%d>",
type.intensity,
type.data.intensity,
realcontent(line, &type),
type.intensity);
currstate->type = NONE;
type.data.intensity);
currstate->prev.type = EMPTY;
break;
case HTMLCONCRETE:
handlehtmlcase(&type, currstate, line, out);
break;
case COMMENTLONG:
handlehtmlcase(&type, currstate, line, out);
break;
case PHP:
handlehtmlcase(&type, currstate, line, out);
break;
case COMMENTSHORT:
handlehtmlcase(&type, currstate, line, out);
break;
case CDATA:
handlehtmlcase(&type, currstate, line, out);
break;
case SKELETON:
handlehtmlcase(&type, currstate, line, out);
break;
case GENERICTAG:
handlehtmlcase(&type, currstate, line, out);
break;
}
return 0;
}
static int endpara(struct parsestate *state, FILE *out) {
switch (state->type) {
case PARAGRAPH:
switch (state->prev.type) {
case EMPTY: case HR:
case HTMLCONCRETE: case COMMENTLONG: case PHP: case COMMENTSHORT:
case CDATA: case SKELETON: case GENERICTAG:
return 0;
case PLAIN:
fputs("<p>", out);
fwrite(state->para->data, 1, state->para->len, out);
fputs("</p>", out);
resetstring(state->para);
return 0;
case CODE: case CODEBLOCK:
case SPACECODE: case FENCECODE:
fputs("</code>", out);
return 0;
case NONE:
case HEADER:
fprintf(out, "</h%d>", state->prev.data.intensity);
return 0;
case SETEXT1:
fputs("</h1>", out);
break;
case SETEXT2:
fputs("</h2>", out);
break;
}
return 1;
}
static void handlehtmlcase(struct linedata *data, struct parsestate *state,
char *line, FILE *out) {
endpara(state, out);
fputs(line, out);
fputc('\n', out);
state->prev.type = data->type;
}
static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
char *line, FILE *out) {
if (state->prev.type == data->type && !data->data.isfirst) {
state->prev.type = EMPTY;
return;
}
fputs(line, out);
fputc('\n', out);
}

5
test.md Normal file
View File

@@ -0,0 +1,5 @@
<h1>
This should just be raw HTML
</h1>
This should not be