Adds lexer and parser.

This commit is contained in:
falsycat 2020-12-20 00:00:00 +00:00
parent afb32776ef
commit 9c751437c6
8 changed files with 517 additions and 0 deletions

View File

@ -9,3 +9,25 @@ set(CMAKE_C_STANDARD "11")
add_compile_options(
-Wall -Wextra -pedantic -Werror -Wno-missing-field-initializers
)
set(BINER_GENRATED_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
find_package(BISON REQUIRED)
find_package(FLEX REQUIRED)
file(MAKE_DIRECTORY ${BINER_GENRATED_DIR})
bison_target(biner-parser biner.y ${BINER_GENRATED_DIR}/biner.y.c)
flex_target(biner-scanner biner.l ${BINER_GENRATED_DIR}/biner.l.c)
add_flex_bison_dependency(biner-scanner biner-parser)
add_executable(biner)
target_sources(biner
PRIVATE
main.c
tree.c
${BISON_biner-parser_OUTPUTS}
${FLEX_biner-scanner_OUTPUTS}
PUBLIC
zone.h
)
target_include_directories(biner PRIVATE . ${CMAKE_CURRENT_BINARY_DIR})

8
TODO.TXT Normal file
View File

@ -0,0 +1,8 @@
X parser
error handling (compiler message)
expression resolving
enum support
union support
constant support
transpiler for C
release 1.0.0

52
biner.l Normal file
View File

@ -0,0 +1,52 @@
%option noinput nounput noyywrap
%{
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include "./tree.h"
#include "./zone.h"
#include "generated/biner.y.h"
#define ctx (biner_tree_parse_context_)
static inline uintptr_t strnew_(const char* str) {
return biner_zone_strnew(&ctx.zone, str);
}
static inline intmax_t parse_int_(int base) {
char* end = NULL;
const intmax_t v = strtoimax(yytext, &end, base);
/* TODO: replace asserts with throwing error */
assert((v != INTMAX_MIN && v != INTMAX_MAX) || errno != ERANGE);
assert(INT64_MIN <= v && v <= INT64_MAX);
assert(end != NULL && *end == 0);
return v;
}
%}
D [0-9]
I [A-Za-z_]
H [0-9A-Fa-f]
%%
"//".* ;
[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] ; /* TODO: detect unterminated comment */
"struct" return STRUCT;
{I}({I}|{D})* { yylval.ptr = strnew_(yytext); return IDENT; }
{D}+ { yylval.i = parse_int_(10); return INTEGER; }
0[xX]{H}+ { yylval.i = parse_int_(16); return INTEGER; }
[\+\-\*\/\.\(\)[\]\{\}\;] return yytext[0];
(.|\n) ;
%%

213
biner.y Normal file
View File

@ -0,0 +1,213 @@
%{
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "./tree.h"
#include "./zone.h"
#define ctx (biner_tree_parse_context_)
#define alloc_(T) (biner_zone_alloc(&ctx.zone, sizeof(T)))
#define ref(T, p) ((T*) (ctx.zone.ptr+p))
extern int yylex(void);
extern void yyerror(const char*);
static inline biner_zone_ptr(biner_tree_struct_member_t)
find_struct_member_(
biner_zone_ptr(biner_tree_struct_member_t) itr,
biner_zone_ptr(char) name) {
while (itr) {
const biner_tree_struct_member_t* m = ref(biner_tree_struct_member_t, itr);
if (strcmp(ref(char, m->name), ref(char, name)) == 0) {
return itr;
}
itr = m->prev;
}
yyerror("unknown member");
return 0;
}
%}
%union {
int64_t i;
uintptr_t ptr;
}
%token STRUCT
%token <ptr> IDENT
%token <i> INTEGER;
%type <ptr> decl_list decl
%type <ptr> struct_body struct_member struct_member_type struct_member_reference
%type <ptr> expr add_expr mul_expr operand
%start decl_list
%%
decl_list
: decl {
*ref(biner_tree_root_t, ctx.root) = (biner_tree_root_t) {
.decls = $1,
};
$$ = ctx.root;
}
| decl_list decl {
ref(biner_tree_decl_t, $2)->prev = ref(biner_tree_root_t, $1)->decls;
ref(biner_tree_root_t, $1)->decls = $2;
$$ = $1;
}
;
decl
: STRUCT IDENT '{' struct_body '}' ';' {
$$ = alloc_(biner_tree_decl_t);
*ref(biner_tree_decl_t, $$) = (biner_tree_decl_t) {
.name = $2,
.member = $4,
};
ctx.last_decl = $$;
ctx.last_member = 0;
}
;
struct_body
: struct_member {
$$ = ctx.last_member = $1;
}
| struct_body struct_member {
ref(biner_tree_struct_member_t, $2)->prev = $1;
$$ = ctx.last_member = $2;
}
;
struct_member
: struct_member_type IDENT ';' {
$$ = alloc_(biner_tree_struct_member_t);
*ref(biner_tree_struct_member_t, $$) =
(biner_tree_struct_member_t) {
.type = $1,
.name = $2,
};
}
;
struct_member_type
: IDENT {
/* TODO: upgrade generic type to user-defined type. */
$$ = alloc_(biner_tree_struct_member_type_t);
*ref(biner_tree_struct_member_type_t, $$) =
(biner_tree_struct_member_type_t) {
.kind = BINER_TREE_STRUCT_MEMBER_TYPE_KIND_GENERIC,
.generic = $1,
.qualifier = BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_NONE,
};
}
| IDENT '[' expr ']' {
$$ = alloc_(biner_tree_struct_member_type_t);
*ref(biner_tree_struct_member_type_t, $$) =
(biner_tree_struct_member_type_t) {
.kind = BINER_TREE_STRUCT_MEMBER_TYPE_KIND_GENERIC,
.generic = $1,
.qualifier = BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_DYNAMIC_ARRAY,
.expr = $3,
};
}
;
expr
: add_expr { $$ = $1; }
;
add_expr
: mul_expr
| add_expr '+' mul_expr {
$$ = alloc_(biner_tree_expr_t);
*ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) {
.type = BINER_TREE_EXPR_TYPE_OPERATOR_ADD,
.operands = {$1, $3},
};
}
| add_expr '-' mul_expr {
$$ = alloc_(biner_tree_expr_t);
*ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) {
.type = BINER_TREE_EXPR_TYPE_OPERATOR_SUB,
.operands = {$1, $3},
};
}
;
mul_expr
: operand
| mul_expr '*' operand {
$$ = alloc_(biner_tree_expr_t);
*ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) {
.type = BINER_TREE_EXPR_TYPE_OPERATOR_MUL,
.operands = {$1, $3},
};
}
| mul_expr '/' operand {
$$ = alloc_(biner_tree_expr_t);
*ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) {
.type = BINER_TREE_EXPR_TYPE_OPERATOR_DIV,
.operands = {$1, $3},
};
}
;
operand
: INTEGER {
$$ = alloc_(biner_tree_expr_t);
*ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) {
.type = BINER_TREE_EXPR_TYPE_OPERAND_INTEGER,
.i = $1,
};
}
| struct_member_reference {
$$ = alloc_(biner_tree_expr_t);
*ref(biner_tree_expr_t, $$) = (biner_tree_expr_t) {
.type = BINER_TREE_EXPR_TYPE_OPERAND_REFER,
.r = $1,
};
}
| '(' expr ')' { $$ = $2; }
;
struct_member_reference
: IDENT {
$$ = alloc_(biner_tree_struct_member_reference_t);
*ref(biner_tree_struct_member_reference_t, $$) =
(biner_tree_struct_member_reference_t) {
.member = find_struct_member_(ctx.last_member, $1),
};
}
| struct_member_reference '.' IDENT {
const biner_tree_struct_member_t* p =
ref(biner_tree_struct_member_t, $1);
const biner_tree_struct_member_type_t* t =
ref(biner_tree_struct_member_type_t, p->type);
if (t->kind != BINER_TREE_STRUCT_MEMBER_TYPE_KIND_USER_DEFINED) {
yyerror("not user-defined data");
YYABORT;
}
const biner_tree_decl_t* d = ref(biner_tree_decl_t, t->decl);
if (d->type != BINER_TREE_DECL_TYPE_STRUCT) {
yyerror("not struct");
YYABORT;
}
$$ = alloc_(biner_tree_struct_member_reference_t);
*ref(biner_tree_struct_member_reference_t, $$) =
(biner_tree_struct_member_reference_t) {
.member = find_struct_member_(d->member, $3),
};
}
;
%%

27
main.c Normal file
View File

@ -0,0 +1,27 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "./tree.h"
int main(void) {
const uint8_t* zone = biner_tree_parse(stdin);
if (zone == NULL) return EXIT_FAILURE;
const biner_tree_root_t* root = (const biner_tree_root_t*) zone;
const biner_tree_decl_t* decl =
(const biner_tree_decl_t*) (zone + root->decls);
while ((uintptr_t) decl != (uintptr_t) zone) {
printf("%s:\n", zone + decl->name);
const biner_tree_struct_member_t* member =
(const biner_tree_struct_member_t*) (zone + decl->member);
while ((uintptr_t) member != (uintptr_t) zone) {
printf(" %s\n", zone + member->name);
member = (const biner_tree_struct_member_t*) (zone + member->prev);
}
decl = (const biner_tree_decl_t*) (zone + decl->prev);
}
return EXIT_SUCCESS;
}

36
tree.c Normal file
View File

@ -0,0 +1,36 @@
#include "./tree.h"
#include <assert.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "./zone.h"
#include "generated/biner.y.h"
biner_tree_parse_context_t biner_tree_parse_context_ = {0};
int yyerror(char* str) {
extern char* yytext;
fprintf(stderr, "error: %s: %s\n", str, yytext);
return 0;
}
const uint8_t* biner_tree_parse(FILE* fp) {
if (atomic_flag_test_and_set(&biner_tree_parse_context_.dirty)) {
fprintf(stderr, "parsing context is dirty now\n");
abort();
}
extern FILE* yyin;
yyin = fp;
biner_tree_parse_context_.root = biner_zone_alloc(
&biner_tree_parse_context_.zone, sizeof(biner_tree_root_t));
assert(biner_tree_parse_context_.root == 0);
return yyparse()? NULL: biner_tree_parse_context_.zone.ptr;
}

107
tree.h Normal file
View File

@ -0,0 +1,107 @@
#pragma once
#include <stdatomic.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include "./zone.h"
typedef struct biner_tree_expr_t biner_tree_expr_t;
typedef struct biner_tree_struct_member_t biner_tree_struct_member_t;
typedef struct biner_tree_struct_member_reference_t biner_tree_struct_member_reference_t;
typedef struct biner_tree_struct_t biner_tree_struct_t;
typedef struct biner_tree_decl_t biner_tree_decl_t;
typedef enum biner_tree_expr_type_t {
BINER_TREE_EXPR_TYPE_OPERAND_INTEGER,
BINER_TREE_EXPR_TYPE_OPERAND_REFER,
BINER_TREE_EXPR_TYPE_OPERATOR_ADD,
BINER_TREE_EXPR_TYPE_OPERATOR_SUB,
BINER_TREE_EXPR_TYPE_OPERATOR_MUL,
BINER_TREE_EXPR_TYPE_OPERATOR_DIV,
} biner_tree_expr_type_t;
typedef struct biner_tree_expr_t {
biner_tree_expr_type_t type;
union {
int64_t i;
biner_zone_ptr(biner_tree_struct_member_reference_t) r;
struct {
biner_zone_ptr(biner_tree_expr_t) l;
biner_zone_ptr(biner_tree_expr_t) r;
} operands;
};
} biner_tree_expr_t;
typedef enum biner_tree_struct_member_type_kind_t {
BINER_TREE_STRUCT_MEMBER_TYPE_KIND_GENERIC,
BINER_TREE_STRUCT_MEMBER_TYPE_KIND_USER_DEFINED,
} biner_tree_struct_member_type_kind_t;
typedef enum biner_tree_struct_member_type_qualifier_t {
BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_NONE,
BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_STATIC_ARRAY,
BINER_TREE_STRUCT_MEMBER_TYPE_QUALIFIER_DYNAMIC_ARRAY,
} biner_tree_struct_member_type_qualifier_t;
typedef struct biner_tree_struct_member_type_t {
biner_tree_struct_member_type_kind_t kind;
union {
biner_zone_ptr(char) generic;
biner_zone_ptr(biner_tree_decl_t) decl;
};
biner_tree_struct_member_type_qualifier_t qualifier;
union {
size_t i;
biner_zone_ptr(biner_tree_expr_t) expr;
};
} biner_tree_struct_member_type_t;
typedef struct biner_tree_struct_member_t {
biner_zone_ptr(char) name;
biner_zone_ptr(biner_tree_struct_t) owner;
biner_zone_ptr(biner_tree_struct_member_type_t) type;
biner_zone_ptr(biner_tree_struct_member_t) prev;
} biner_tree_struct_member_t;
typedef struct biner_tree_struct_member_reference_t {
biner_zone_ptr(biner_tree_struct_member_t) member;
biner_zone_ptr(biner_tree_expr_t) index;
biner_zone_ptr(biner_tree_struct_member_reference_t) prev;
} biner_tree_struct_member_reference_t;
typedef enum biner_tree_decl_type_t {
BINER_TREE_DECL_TYPE_STRUCT,
} biner_tree_decl_type_t;
typedef struct biner_tree_decl_t {
biner_zone_ptr(char) name;
biner_tree_decl_type_t type;
union {
biner_zone_ptr(biner_tree_struct_member_t) member;
};
biner_zone_ptr(biner_tree_decl_t) prev;
} biner_tree_decl_t;
typedef struct biner_tree_root_t {
biner_zone_ptr(biner_tree_decl_t) decls;
} biner_tree_root_t;
typedef struct biner_tree_parse_context_t {
atomic_flag dirty;
biner_zone_t zone;
biner_zone_ptr(biner_tree_root_t) root;
biner_zone_ptr(biner_tree_decl_t) last_decl;
biner_zone_ptr(biner_tree_struct_member_t) last_member;
} biner_tree_parse_context_t;
extern biner_tree_parse_context_t biner_tree_parse_context_;
const uint8_t*
biner_tree_parse(
FILE* fp
);

52
zone.h Normal file
View File

@ -0,0 +1,52 @@
#pragma once
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define biner_zone_ptr(T) uintptr_t
typedef struct biner_zone_t {
uintptr_t tail;
size_t size;
uint8_t* ptr;
} biner_zone_t;
#define BINER_ZONE_RESERVE 1024
static inline uintptr_t biner_zone_alloc(biner_zone_t* z, size_t sz) {
assert(z != NULL);
assert(sz > 0);
if (z->ptr == NULL) {
z->ptr = calloc(1, sz);
if (z->ptr == NULL) {
fprintf(stderr, "malloc failure\n");
abort();
}
}
const uintptr_t oldtail = z->tail;
z->tail += sz;
if (z->tail > z->size) {
z->size = z->tail;
z->ptr = realloc(z->ptr, z->size);
}
memset(z->ptr+oldtail, 0, sz);
return oldtail;
}
static inline uintptr_t biner_zone_strnew(biner_zone_t* z, const char* str) {
assert(z != NULL);
const uintptr_t ret = biner_zone_alloc(z, strlen(str)+1);
strcpy((char*) (z->ptr+ret), str);
return ret;
}
static inline void biner_zone_deinitialize(biner_zone_t* z) {
assert(z != NULL);
if (z->ptr != NULL) free(z->ptr);
}