From 53295526b38a2bac8320ec70c8c6cc1c1f08ceee Mon Sep 17 00:00:00 2001 From: falsycat Date: Fri, 4 Oct 2019 00:00:00 +0000 Subject: [PATCH] [update] Improved the tokenizing algorithm. --- sjscript/src/sjscript/Token.d | 87 +++++++++++++----------------- sjscript/src/sjscript/preprocess.d | 3 +- thirdparty/dast | 2 +- 3 files changed, 41 insertions(+), 51 deletions(-) diff --git a/sjscript/src/sjscript/Token.d b/sjscript/src/sjscript/Token.d index 6bc955f..ebae53e 100644 --- a/sjscript/src/sjscript/Token.d +++ b/sjscript/src/sjscript/Token.d @@ -7,58 +7,54 @@ import std.algorithm, import dast.tokenize; +/// +unittest { + import std; + + with (TokenType) { + assert("0 0.1 _hoge0_ $hoge". + Tokenize!TokenType. + map!"a.type". + filter!(x => x != Whitespace). + equal([Number, Number, Ident, PreprocessCommand])); + } +} + /// alias TokenPos = dast.tokenize.TokenPos; /// enum TokenType { - @TextFuncMatcher!((string text, string next) { - const point_index = text.countUntil('.'); - if (point_index < 0) { - if (text.all!isDigit) { - if (next.length == 1 && (next[0].isDigit || next[0] == '.')) { - return TextMatchResult.Probably; - } - return TextMatchResult.Completely; - } - } else { - if ((text[0..point_index]~text[point_index+1..$]).all!isDigit) { - if (next.length == 1 && next[0].isDigit) { - return TextMatchResult.Probably; - } - return TextMatchResult.Completely; - } - } - return TextMatchResult.Improbably; + @TextFuncMatcher!((string text) { + const integral_len = text.countUntil!(x => !x.isDigit); + if (integral_len < 0) return text.length; + if (integral_len == 0) return 0; + + if (text[integral_len] != '.') return integral_len; + if (text[integral_len+1..$] == "") return integral_len; + + const fraction_len = + text[integral_len+1..$].countUntil!(x => !x.isDigit); + if (fraction_len < 0) return text.length; + if (fraction_len == 0) return integral_len; + + return integral_len + 1 + fraction_len; }) Number, - @TextFuncMatcher!((string text, string next) { - const head = text[0].isAlpha || text[0] == '_'; - const body = text[1..$].all!( - x => x.isAlpha || x.isDigit || x == '_'); - const nexthead = next.length > 0 && ( - next[0].isAlpha || next[0].isDigit || next[0] == '_'); + @TextFuncMatcher!((string text) { + if (text.length == 0) return 0; + if (!text[0].isAlpha && text[0] != '_') return 0; - if (head && body && !nexthead) return TextMatchResult.Completely; - if (head && body && nexthead) return TextMatchResult.Probably; - return TextMatchResult.Improbably; + const index = text[1..$].countUntil!(x => !x.isAlpha && !x.isDigit && x != '_'); + return index >= 0? index.to!size_t+1: text.length; }) Ident, - @TextFuncMatcher!((string text, string next) { - const head = text[0] == '$'; - if (!head || text.length <= 1) { - return head? TextMatchResult.Probably: TextMatchResult.Improbably; - } - if (text[1] != '_' && !text[1].isAlpha) { - return TextMatchResult.Improbably; - } - if (text[1..$].all!(x => x.isAlpha || x.isDigit || x == '_')) { - if (next.length > 0 && (next[0].isAlpha || next[0].isDigit || next[0] == '_')) { - return TextMatchResult.Probably; - } - return TextMatchResult.Completely; - } - return TextMatchResult.Improbably; + @TextFuncMatcher!((string text) { + if (text.length < 2 || text[0] != '$') return 0; + if (!text[1].isAlpha && text[1] != '_') return 0; + + const index = text[2..$].countUntil!(x => !x.isAlpha && !x.isDigit && x != '_'); + return index >= 0? index.to!size_t+2: text.length; }) PreprocessCommand, @TextCompleteMatcher!"{" OpenBrace, @@ -87,13 +83,6 @@ enum TokenType { End, } -/// -unittest { - with (TokenType) { - "0 0.1 _hoge0_ $hoge".Tokenize!TokenType.map!"a.type".equal( - [Number, Whitespace, Number, Whitespace, Ident, Whitespace, PreprocessCommand]); - } -} /// alias Token = dast.tokenize.Token!(TokenType, string); diff --git a/sjscript/src/sjscript/preprocess.d b/sjscript/src/sjscript/preprocess.d index 1c9a5f4..d73d2fd 100644 --- a/sjscript/src/sjscript/preprocess.d +++ b/sjscript/src/sjscript/preprocess.d @@ -4,6 +4,7 @@ module sjscript.preprocess; import std.algorithm, std.array, std.conv, + std.format, std.range, std.range.primitives, std.typecons; @@ -212,7 +213,7 @@ private struct Preprocessor(R) const counter = GetCounterValue(name); if (counter.isNull) { (name in templates_). - enforce("the template is unknown", front); + enforce("the template (%s) is unknown".format(name), front); body = templates_[name]; } else { body = [Token(counter.get.to!string, TokenType.Number)]; diff --git a/thirdparty/dast b/thirdparty/dast index 1865e40..d99fcb9 160000 --- a/thirdparty/dast +++ b/thirdparty/dast @@ -1 +1 @@ -Subproject commit 1865e404a6f318d31b3655317141d998de6654eb +Subproject commit d99fcb9bf502683ab4e5f6fb19a64a3bda98c83e