From 9c751437c669b44aafec7b576797ba29cb3aeba3 Mon Sep 17 00:00:00 2001 From: falsycat Date: Sun, 20 Dec 2020 00:00:00 +0000 Subject: [PATCH] Adds lexer and parser. --- CMakeLists.txt | 22 +++++ TODO.TXT | 8 ++ biner.l | 52 ++++++++++++ biner.y | 213 +++++++++++++++++++++++++++++++++++++++++++++++++ main.c | 27 +++++++ tree.c | 36 +++++++++ tree.h | 107 +++++++++++++++++++++++++ zone.h | 52 ++++++++++++ 8 files changed, 517 insertions(+) create mode 100644 TODO.TXT create mode 100644 biner.l create mode 100644 biner.y create mode 100644 main.c create mode 100644 tree.c create mode 100644 tree.h create mode 100644 zone.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ce98c8..b955dce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,3 +9,25 @@ set(CMAKE_C_STANDARD "11") add_compile_options( -Wall -Wextra -pedantic -Werror -Wno-missing-field-initializers ) + +set(BINER_GENRATED_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated") + +find_package(BISON REQUIRED) +find_package(FLEX REQUIRED) + +file(MAKE_DIRECTORY ${BINER_GENRATED_DIR}) +bison_target(biner-parser biner.y ${BINER_GENRATED_DIR}/biner.y.c) +flex_target(biner-scanner biner.l ${BINER_GENRATED_DIR}/biner.l.c) +add_flex_bison_dependency(biner-scanner biner-parser) + +add_executable(biner) +target_sources(biner + PRIVATE + main.c + tree.c + ${BISON_biner-parser_OUTPUTS} + ${FLEX_biner-scanner_OUTPUTS} + PUBLIC + zone.h +) +target_include_directories(biner PRIVATE . ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/TODO.TXT b/TODO.TXT new file mode 100644 index 0000000..b21f012 --- /dev/null +++ b/TODO.TXT @@ -0,0 +1,8 @@ +X parser + error handling (compiler message) + expression resolving + enum support + union support + constant support + transpiler for C + release 1.0.0 diff --git a/biner.l b/biner.l new file mode 100644 index 0000000..a149e97 --- /dev/null +++ b/biner.l @@ -0,0 +1,52 @@ +%option noinput nounput noyywrap + +%{ + +#include +#include +#include + +#include "./tree.h" +#include "./zone.h" + +#include "generated/biner.y.h" + +#define ctx (biner_tree_parse_context_) + +static inline uintptr_t strnew_(const char* str) { + return biner_zone_strnew(&ctx.zone, str); +} +static inline intmax_t parse_int_(int base) { + char* end = NULL; + const intmax_t v = strtoimax(yytext, &end, base); + + /* TODO: replace asserts with throwing error */ + assert((v != INTMAX_MIN && v != INTMAX_MAX) || errno != ERANGE); + assert(INT64_MIN <= v && v <= INT64_MAX); + assert(end != NULL && *end == 0); + + return v; +} +%} + +D [0-9] +I [A-Za-z_] +H [0-9A-Fa-f] + +%% + +"//".* ; +[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] ; /* TODO: detect unterminated comment */ + +"struct" return STRUCT; + +{I}({I}|{D})* { yylval.ptr = strnew_(yytext); return IDENT; } + +{D}+ { yylval.i = parse_int_(10); return INTEGER; } +0[xX]{H}+ { yylval.i = parse_int_(16); return INTEGER; } + +[\+\-\*\/\.\(\)[\]\{\}\;] return yytext[0]; + +(.|\n) ; + +%% diff --git a/biner.y b/biner.y new file mode 100644 index 0000000..473cabd --- /dev/null +++ b/biner.y @@ -0,0 +1,213 @@ +%{ +#include +#include +#include +#include +#include +#include + +#include "./tree.h" +#include "./zone.h" + +#define ctx (biner_tree_parse_context_) + +#define alloc_(T) (biner_zone_alloc(&ctx.zone, sizeof(T))) +#define ref(T, p) ((T*) (ctx.zone.ptr+p)) + +extern int yylex(void); +extern void yyerror(const char*); + +static inline biner_zone_ptr(biner_tree_struct_member_t) +find_struct_member_( + biner_zone_ptr(biner_tree_struct_member_t) itr, + biner_zone_ptr(char) name) { + while (itr) { + const biner_tree_struct_member_t* m = ref(biner_tree_struct_member_t, itr); + if (strcmp(ref(char, m->name), ref(char, name)) == 0) { + return itr; + } + itr = m->prev; + } + yyerror("unknown member"); + return 0; +} +%} + +%union { + int64_t i; + uintptr_t ptr; +} + +%token STRUCT +%token IDENT +%token INTEGER; + +%type decl_list decl +%type struct_body struct_member struct_member_type struct_member_reference +%type expr add_expr mul_expr operand + +%start decl_list + +%% + +decl_list + : decl { + *ref(biner_tree_root_t, ctx.root) = (biner_tree_root_t) { + .decls = $1, + }; + $$ = ctx.root; + } + | decl_list decl { + ref(biner_tree_decl_t, $2)->prev = ref(biner_tree_root_t, $1)->decls; + ref(biner_tree_root_t, $1)->decls = $2; + $$ = $1; + } + ; + +decl + : STRUCT IDENT '{' struct_body '}' ';' { + $$ = alloc_(biner_tree_decl_t); + *ref(biner_tree_decl_t, $$) = (biner_tree_decl_t) { + .name = $2, + .member = $4, + }; + ctx.last_decl = $$; + ctx.last_member = 0; + } + ; + +struct_body + : struct_member { + $$ = ctx.last_member = $1; + } + | struct_body struct_member { + ref(biner_tree_struct_member_t, $2)->prev = $1; + $$ = ctx.last_member = $2; + } + ; + +struct_member + : struct_member_type IDENT ';' { + $$ = alloc_(biner_tree_struct_member_t); + *ref(biner_tree_struct_member_t, $$) = + (biner_tree_struct_member_t) { + .type = $1, + .name = $2, + }; + } + ; + +struct_member_type + : IDENT { + /* TODO: upgrade generic type to user-defined type. */ + $$ = alloc_(biner_tree_struct_member_type_t); + *ref(biner_tree_struct_member_type_t, $$) = + (biner_tree_struct_member_type_t) { + .kind = BINER_TREE_STRUCT_MEMBER_TYPE_KIND_GENERIC, + .generic = $1, + .qualifier = BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_NONE, + }; + } + | IDENT '[' expr ']' { + $$ = alloc_(biner_tree_struct_member_type_t); + *ref(biner_tree_struct_member_type_t, $$) = + (biner_tree_struct_member_type_t) { + .kind = BINER_TREE_STRUCT_MEMBER_TYPE_KIND_GENERIC, + .generic = $1, + .qualifier = BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_DYNAMIC_ARRAY, + .expr = $3, + }; + } + ; + +expr + : add_expr { $$ = $1; } + ; + +add_expr + : mul_expr + | add_expr '+' mul_expr { + $$ = alloc_(biner_tree_expr_t); + *ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) { + .type = BINER_TREE_EXPR_TYPE_OPERATOR_ADD, + .operands = {$1, $3}, + }; + } + | add_expr '-' mul_expr { + $$ = alloc_(biner_tree_expr_t); + *ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) { + .type = BINER_TREE_EXPR_TYPE_OPERATOR_SUB, + .operands = {$1, $3}, + }; + } + ; + +mul_expr + : operand + | mul_expr '*' operand { + $$ = alloc_(biner_tree_expr_t); + *ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) { + .type = BINER_TREE_EXPR_TYPE_OPERATOR_MUL, + .operands = {$1, $3}, + }; + } + | mul_expr '/' operand { + $$ = alloc_(biner_tree_expr_t); + *ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) { + .type = BINER_TREE_EXPR_TYPE_OPERATOR_DIV, + .operands = {$1, $3}, + }; + } + ; + +operand + : INTEGER { + $$ = alloc_(biner_tree_expr_t); + *ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) { + .type = BINER_TREE_EXPR_TYPE_OPERAND_INTEGER, + .i = $1, + }; + } + | struct_member_reference { + $$ = alloc_(biner_tree_expr_t); + *ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) { + .type = BINER_TREE_EXPR_TYPE_OPERAND_REFER, + .r = $1, + }; + } + | '(' expr ')' { $$ = $2; } + ; + +struct_member_reference + : IDENT { + $$ = alloc_(biner_tree_struct_member_reference_t); + *ref(biner_tree_struct_member_reference_t, $$) = + (biner_tree_struct_member_reference_t) { + .member = find_struct_member_(ctx.last_member, $1), + }; + } + | struct_member_reference '.' IDENT { + const biner_tree_struct_member_t* p = + ref(biner_tree_struct_member_t, $1); + + const biner_tree_struct_member_type_t* t = + ref(biner_tree_struct_member_type_t, p->type); + if (t->kind != BINER_TREE_STRUCT_MEMBER_TYPE_KIND_USER_DEFINED) { + yyerror("not user-defined data"); + YYABORT; + } + + const biner_tree_decl_t* d = ref(biner_tree_decl_t, t->decl); + if (d->type != BINER_TREE_DECL_TYPE_STRUCT) { + yyerror("not struct"); + YYABORT; + } + $$ = alloc_(biner_tree_struct_member_reference_t); + *ref(biner_tree_struct_member_reference_t, $$) = + (biner_tree_struct_member_reference_t) { + .member = find_struct_member_(d->member, $3), + }; + } + ; + +%% diff --git a/main.c b/main.c new file mode 100644 index 0000000..bd60d22 --- /dev/null +++ b/main.c @@ -0,0 +1,27 @@ +#include +#include +#include + +#include "./tree.h" + +int main(void) { + const uint8_t* zone = biner_tree_parse(stdin); + if (zone == NULL) return EXIT_FAILURE; + + const biner_tree_root_t* root = (const biner_tree_root_t*) zone; + + const biner_tree_decl_t* decl = + (const biner_tree_decl_t*) (zone + root->decls); + while ((uintptr_t) decl != (uintptr_t) zone) { + printf("%s:\n", zone + decl->name); + + const biner_tree_struct_member_t* member = + (const biner_tree_struct_member_t*) (zone + decl->member); + while ((uintptr_t) member != (uintptr_t) zone) { + printf(" %s\n", zone + member->name); + member = (const biner_tree_struct_member_t*) (zone + member->prev); + } + decl = (const biner_tree_decl_t*) (zone + decl->prev); + } + return EXIT_SUCCESS; +} diff --git a/tree.c b/tree.c new file mode 100644 index 0000000..ac5c848 --- /dev/null +++ b/tree.c @@ -0,0 +1,36 @@ +#include "./tree.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "./zone.h" + +#include "generated/biner.y.h" + +biner_tree_parse_context_t biner_tree_parse_context_ = {0}; + +int yyerror(char* str) { + extern char* yytext; + fprintf(stderr, "error: %s: %s\n", str, yytext); + return 0; +} + +const uint8_t* biner_tree_parse(FILE* fp) { + if (atomic_flag_test_and_set(&biner_tree_parse_context_.dirty)) { + fprintf(stderr, "parsing context is dirty now\n"); + abort(); + } + + extern FILE* yyin; + yyin = fp; + + biner_tree_parse_context_.root = biner_zone_alloc( + &biner_tree_parse_context_.zone, sizeof(biner_tree_root_t)); + assert(biner_tree_parse_context_.root == 0); + return yyparse()? NULL: biner_tree_parse_context_.zone.ptr; +} diff --git a/tree.h b/tree.h new file mode 100644 index 0000000..2ffea9c --- /dev/null +++ b/tree.h @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include +#include + +#include "./zone.h" + +typedef struct biner_tree_expr_t biner_tree_expr_t; +typedef struct biner_tree_struct_member_t biner_tree_struct_member_t; +typedef struct biner_tree_struct_member_reference_t biner_tree_struct_member_reference_t; +typedef struct biner_tree_struct_t biner_tree_struct_t; +typedef struct biner_tree_decl_t biner_tree_decl_t; + +typedef enum biner_tree_expr_type_t { + BINER_TREE_EXPR_TYPE_OPERAND_INTEGER, + BINER_TREE_EXPR_TYPE_OPERAND_REFER, + BINER_TREE_EXPR_TYPE_OPERATOR_ADD, + BINER_TREE_EXPR_TYPE_OPERATOR_SUB, + BINER_TREE_EXPR_TYPE_OPERATOR_MUL, + BINER_TREE_EXPR_TYPE_OPERATOR_DIV, +} biner_tree_expr_type_t; + +typedef struct biner_tree_expr_t { + biner_tree_expr_type_t type; + union { + int64_t i; + biner_zone_ptr(biner_tree_struct_member_reference_t) r; + struct { + biner_zone_ptr(biner_tree_expr_t) l; + biner_zone_ptr(biner_tree_expr_t) r; + } operands; + }; +} biner_tree_expr_t; + +typedef enum biner_tree_struct_member_type_kind_t { + BINER_TREE_STRUCT_MEMBER_TYPE_KIND_GENERIC, + BINER_TREE_STRUCT_MEMBER_TYPE_KIND_USER_DEFINED, +} biner_tree_struct_member_type_kind_t; + +typedef enum biner_tree_struct_member_type_qualifier_t { + BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_NONE, + BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_STATIC_ARRAY, + BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_DYNAMIC_ARRAY, +} biner_tree_struct_member_type_qualifier_t; + +typedef struct biner_tree_struct_member_type_t { + biner_tree_struct_member_type_kind_t kind; + union { + biner_zone_ptr(char) generic; + biner_zone_ptr(biner_tree_decl_t) decl; + }; + + biner_tree_struct_member_type_qualifier_t qualifier; + union { + size_t i; + biner_zone_ptr(biner_tree_expr_t) expr; + }; +} biner_tree_struct_member_type_t; + +typedef struct biner_tree_struct_member_t { + biner_zone_ptr(char) name; + biner_zone_ptr(biner_tree_struct_t) owner; + biner_zone_ptr(biner_tree_struct_member_type_t) type; + + biner_zone_ptr(biner_tree_struct_member_t) prev; +} biner_tree_struct_member_t; + +typedef struct biner_tree_struct_member_reference_t { + biner_zone_ptr(biner_tree_struct_member_t) member; + biner_zone_ptr(biner_tree_expr_t) index; + biner_zone_ptr(biner_tree_struct_member_reference_t) prev; +} biner_tree_struct_member_reference_t; + +typedef enum biner_tree_decl_type_t { + BINER_TREE_DECL_TYPE_STRUCT, +} biner_tree_decl_type_t; + +typedef struct biner_tree_decl_t { + biner_zone_ptr(char) name; + biner_tree_decl_type_t type; + union { + biner_zone_ptr(biner_tree_struct_member_t) member; + }; + biner_zone_ptr(biner_tree_decl_t) prev; +} biner_tree_decl_t; + +typedef struct biner_tree_root_t { + biner_zone_ptr(biner_tree_decl_t) decls; +} biner_tree_root_t; + +typedef struct biner_tree_parse_context_t { + atomic_flag dirty; + biner_zone_t zone; + + biner_zone_ptr(biner_tree_root_t) root; + biner_zone_ptr(biner_tree_decl_t) last_decl; + biner_zone_ptr(biner_tree_struct_member_t) last_member; +} biner_tree_parse_context_t; + +extern biner_tree_parse_context_t biner_tree_parse_context_; + +const uint8_t* +biner_tree_parse( + FILE* fp +); diff --git a/zone.h b/zone.h new file mode 100644 index 0000000..b0294a1 --- /dev/null +++ b/zone.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include +#include + +#define biner_zone_ptr(T) uintptr_t + +typedef struct biner_zone_t { + uintptr_t tail; + size_t size; + uint8_t* ptr; +} biner_zone_t; + +#define BINER_ZONE_RESERVE 1024 + +static inline uintptr_t biner_zone_alloc(biner_zone_t* z, size_t sz) { + assert(z != NULL); + assert(sz > 0); + + if (z->ptr == NULL) { + z->ptr = calloc(1, sz); + if (z->ptr == NULL) { + fprintf(stderr, "malloc failure\n"); + abort(); + } + } + const uintptr_t oldtail = z->tail; + z->tail += sz; + if (z->tail > z->size) { + z->size = z->tail; + z->ptr = realloc(z->ptr, z->size); + } + memset(z->ptr+oldtail, 0, sz); + return oldtail; +} + +static inline uintptr_t biner_zone_strnew(biner_zone_t* z, const char* str) { + assert(z != NULL); + + const uintptr_t ret = biner_zone_alloc(z, strlen(str)+1); + strcpy((char*) (z->ptr+ret), str); + return ret; +} + +static inline void biner_zone_deinitialize(biner_zone_t* z) { + assert(z != NULL); + + if (z->ptr != NULL) free(z->ptr); +}