commit a64c5d5c261b1b99ac4e1ead9802f727bf93df60
parent 6f0276816f9ee6296c23bd2f03e06f37a6723f57
Author: noone <vazkats@gmail.com>
Date: Tue, 6 Jan 2026 13:45:48 +0200
Single pass parser. A lot simpler than I imagined. No error checking. Trivially implemented
Diffstat:
5 files changed, 53 insertions(+), 76 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+a.out
+test.txt
diff --git a/a.out b/a.out
Binary files differ.
diff --git a/main.c b/main.c
@@ -2,6 +2,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
typedef uint8_t u8;
typedef uint32_t u32;
@@ -14,9 +15,17 @@ typedef int32_t i32;
} while (0);
typedef enum {
- CMD_BOLD, CMD_ITALIC, CMD_H1, CMD_H2, CMD_CODE, CMD_QUOTE, CMD_STYLE
+ CMD_BOLD, CMD_ITALIC, CMD_H1, CMD_H2, CMD_HTML
} CMD_TYPE;
+const char opening_tags[20][20] = {
+ {"<b>"}, {"<i>"}, {"<h1>"}, {"<h2>"}, {"<html>"}
+};
+
+const char closing_tags[20][20] = {
+ {"</b>"}, {"</i>"}, {"</h1>"}, {"</h2>"}, {"</html>"}
+};
+
typedef struct {
i8 top;
CMD_TYPE arr[100];
@@ -55,62 +64,18 @@ file_open(const char* flname)
i32
cmd_get(const char* cmd)
{
- if (strcmp(cmd, "b") == 0) return CMD_BOLD;
- if (strcmp(cmd, "h1") == 0) return CMD_H1;
- if (strcmp(cmd, "h2") == 0) return CMD_H2;
- if (strcmp(cmd, "i") == 0) return CMD_ITALIC;
+ if (strcmp(cmd, "~b") == 0) return CMD_BOLD;
+ if (strcmp(cmd, "~h1") == 0) return CMD_H1;
+ if (strcmp(cmd, "~h2") == 0) return CMD_H2;
+ if (strcmp(cmd, "~i") == 0) return CMD_ITALIC;
+ if (strcmp(cmd, "~html") == 0) return CMD_HTML;
return -2;
}
-char*
-parse(char* token, stack *s)
+void
+parse_cmd(char* cur)
{
- const char *cmd;
-
- if(strstr(token, "~") && (strlen(token) > 1)) {
- cmd = token + 1;
- switch (cmd_get(cmd)) {
- case CMD_BOLD:
- stack_push(s, CMD_BOLD);
- token = "<b>";
- break;
- case CMD_H1:
- stack_push(s, CMD_H1);
- token = "<h1>";
- break;
- case CMD_H2:
- stack_push(s, CMD_H2);
- token = "<h2>";
- break;
- case CMD_ITALIC:
- stack_push(s, CMD_ITALIC);
- token = "<i>";
- break;
- default:
- break;
- }
- }
-
- if(strcmp(token, "~") == 0) {
- switch(stack_pop(s)) {
- case CMD_BOLD:
- token = "</b>";
- break;
- case CMD_H1:
- token = "</h1>";
- break;
- case CMD_H2:
- token = "</h2>";
- break;
- case CMD_ITALIC:
- token = "</i>";
- break;
- default:
- break;
- }
- }
- return token;
}
int
@@ -119,23 +84,41 @@ main(int argc, char **argv)
char *in;
char *out;
FILE *fp_out;
- char *token;
- char *peek;
- int res = 0;
+ char *cur;
+ char *looker;
+ char token[20];
+
stack s = stack_init();
- in = file_open(argv[1]);
- fp_out = fopen("out.html", "w");
+ in = file_open("test.txt");
+ fp_out = fopen("index.html", "w");
+
+ cur = &in[0];
- token = strsep(&in, " ");
- while (token != NULL) {
- if (!strpbrk(token, "\n"))
- token = parse(token, &s);
- fwrite(token, strlen(token), 1, fp_out);
- fwrite(" ", 1, 1, fp_out);
-
- printf("Token is: %s, len is: %d\n", token, strlen(token));
- token = strsep(&in, " ");
+ while (*cur != '\0') {
+ if (*cur == '~') {
+ looker = cur;
+
+ while (*(++looker) >= '0' && *(looker) <= 'z');
+
+ /* When it's just ~ */
+ if (looker - cur == 1) {
+ const char* ch = closing_tags[stack_pop(&s)];
+ fprintf(fp_out, "%s", ch);
+ } else {
+ /* So the token here is (cur) - (looker - cur) */
+ sprintf(token, "%.*s", (int)(looker-cur), cur);
+
+ const char *cmd = opening_tags[cmd_get(token)];
+ fprintf(fp_out, "%s", cmd);
+
+ stack_push(&s, cmd_get(token));
+ }
+
+ cur = looker;
+ } else {
+ fputc(*cur++, fp_out);
+ }
}
free(in);
diff --git a/out.html b/out.html
@@ -1,8 +0,0 @@
-Hello there <b> you know </b> of <i> course </i> <b> yeap ~
-
-
-
-
-
-~h1 hi ~
-
-\ No newline at end of file
diff --git a/test.txt b/test.txt
@@ -1,7 +1,8 @@
+~html
Hello there ~b you know ~ of ~i course ~ ~b yeap ~
-~h1 hi ~
+~h1 ~b hi ~ ~ ~