From 891d25508f30149616cf9093ada859220ec9b443 Mon Sep 17 00:00:00 2001 From: Michiel Lachaert Date: Wed, 20 Nov 2024 11:02:51 +0100 Subject: [PATCH] add code suggestions --- cli/src/cli/commands/run.ts | 2 +- lib/src/lib/tokenizer/codeTokenizer.ts | 6 ++--- lib/src/lib/tokenizer/tokenizer.ts | 9 +------ lib/src/test/tokenizer.test.ts | 33 +++++++------------------- 4 files changed, 14 insertions(+), 36 deletions(-) diff --git a/cli/src/cli/commands/run.ts b/cli/src/cli/commands/run.ts index 53c6314a2..60038bf64 100644 --- a/cli/src/cli/commands/run.ts +++ b/cli/src/cli/commands/run.ts @@ -167,7 +167,7 @@ export function runCommand(program: Command): Command { Options.defaultKgramsInWindow ) .option( - "-ic, --include-comments", + "-C, --include-comments", Utils.indent( "Include the comments during the tokenization process." ) diff --git a/lib/src/lib/tokenizer/codeTokenizer.ts b/lib/src/lib/tokenizer/codeTokenizer.ts index d919969f8..e26610fb7 100644 --- a/lib/src/lib/tokenizer/codeTokenizer.ts +++ b/lib/src/lib/tokenizer/codeTokenizer.ts @@ -62,8 +62,8 @@ export class CodeTokenizer extends Tokenizer { node.endPosition.column ); - const isComment = node.type.includes("comment"); - if (!isComment || this.options.includeComments) { + const includeToken = !node.type.includes("comment") || this.options.includeComments; + if (includeToken) { tokens.push(this.newToken("(", location)); tokens.push(this.newToken(node.type, location)); } @@ -78,7 +78,7 @@ export class CodeTokenizer extends Tokenizer { } } - if (!isComment || this.options.includeComments) { + if (includeToken) { tokens.push(this.newToken(")", location)); } diff --git a/lib/src/lib/tokenizer/tokenizer.ts b/lib/src/lib/tokenizer/tokenizer.ts index c66967e74..c0e7677bb 100644 --- a/lib/src/lib/tokenizer/tokenizer.ts +++ b/lib/src/lib/tokenizer/tokenizer.ts @@ -12,14 +12,7 @@ export type TokenizerOptions = Partial<{ export abstract class Tokenizer { - protected options: TokenizerOptions = {}; - - constructor(public readonly language: Language, options?: TokenizerOptions) { - if (options !== undefined) { - this.options = options; - } - } - + constructor(public readonly language: Language, protected readonly options: TokenizerOptions = {}) {} /** * Runs the parser on a given string. Returns a list of Tokens diff --git a/lib/src/test/tokenizer.test.ts b/lib/src/test/tokenizer.test.ts index b4779b38b..f3609d079 100644 --- a/lib/src/test/tokenizer.test.ts +++ b/lib/src/test/tokenizer.test.ts @@ -26,29 +26,6 @@ const languageFiles = { "verilog": "../samples/verilog/module.v" } as {[key: string]: string}; -const tokenLength = { - "../samples/bash/caesar.sh": 1179, - "../samples/c/caesar.c": 582, - "../samples/c-sharp/Caesar.cs": 603, - "../samples/char/caesar.txt": 3700, - "../samples/cpp/caesar.cpp": 795, - "../samples/elm/Caesar.elm": 753, - "../samples/go/Caesar.go": 1032, - "../samples/groovy/caesar.groovy": 282, - "../samples/java/Caesar.java": 519, - "../samples/javascript/sample.js": 861, - "../samples/python/caesar.py": 306, - "../samples/php/caesar.php": 411, - "../samples/modelica/sample.mo": 7533, - "../samples/r/caesar.R": 585, - "../samples/rust/caesar.rs": 774, - "../samples/scala/Caesar.scala": 366, - "../samples/sql/sample.sql": 540, - "../samples/tsx/sample.tsx": 1656, - "../samples/typescript/caesar.ts": 375, - "../samples/verilog/module.v": 2448 -} as {[key: string]: number}; - for (const [languageName, languageFile] of Object.entries(languageFiles)) { test(`LanguagePicker can find ${languageName} correctly by name`, async t => { const language = await new LanguagePicker().findLanguage(languageName); @@ -74,7 +51,6 @@ for (const [languageName, languageFile] of Object.entries(languageFiles)) { const { tokens } = tokenizer.tokenizeFile(file); t.truthy(tokens); t.snapshot(tokens, "stable tokenization"); - t.is(tokens.length, tokenLength[languageFile]); }); } @@ -201,3 +177,12 @@ test("should be able to correctly tokenize a loop", async t => { ); }); + +test("tokens should contain comments when includeComments is true", async t => { + const file = new File("comments.js", "let i = 0;\nwhile (i < 10) { // comment\n i += 1;\n}"); + const language = await (new LanguagePicker().findLanguage("javascript")); + + const tokenizer = await language.createTokenizer({ includeComments: true }); + const { tokens } = tokenizer.tokenizeFile(file); + t.true(tokens.includes("comment")); +}); \ No newline at end of file