From 5a835dc742f6c0af4d84e1a4d5718e338341f43c Mon Sep 17 00:00:00 2001 From: mStar Date: Thu, 22 Aug 2024 19:54:25 +0200 Subject: [PATCH] sync for backup --- src/parser/mod.rs | 4 +- src/parser/preparser/clear_spaces.rs | 44 +++---- src/parser/preparser/insert_semicolons.rs | 2 +- src/parser/tokeniser.rs | 133 ++++++++++++++++++++++ 4 files changed, 162 insertions(+), 21 deletions(-) create mode 100644 src/parser/tokeniser.rs diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 938c790..a1618f6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1 +1,3 @@ -pub (crate) mod preparser; \ No newline at end of file +pub(crate) mod preparser; + +pub(crate) mod tokeniser; diff --git a/src/parser/preparser/clear_spaces.rs b/src/parser/preparser/clear_spaces.rs index 0ce3fc3..0a057f0 100644 --- a/src/parser/preparser/clear_spaces.rs +++ b/src/parser/preparser/clear_spaces.rs @@ -4,7 +4,7 @@ use std::str::Chars; enum QuoteMethod { Single, Double, - Backtick + Backtick, } // Normalised empty space in codeblocks, ensuring that all whitespace in codeblocks is one character wide @@ -13,7 +13,7 @@ pub fn clear_spaces(raw: &str) -> String { clear_in_code(&mut raw.chars()) } -fn clear_in_code<'a, 'b>(raw: &mut Chars<'a>) -> String { +fn clear_in_code(raw: &mut Chars) -> String { let mut cleared = String::new(); let mut last_char = 'a'; while let Some(c) = raw.next() { @@ -44,45 +44,45 @@ fn clear_in_code<'a, 'b>(raw: &mut Chars<'a>) -> String { } '}' => { cleared.push('}'); - return cleared + return cleared; } x => { if last_char.is_whitespace() && x.is_whitespace() { - last_char = x.clone(); + last_char = *x; continue; } - cleared.push(x.clone()); - last_char = x.clone(); + cleared.push(*x); + last_char = *x; } } - }; + } cleared } -fn clear_in_quotes<'a, 'b>(raw: &mut Chars<'a>, method: QuoteMethod) -> String { +fn clear_in_quotes(raw: &mut Chars, method: QuoteMethod) -> String { let mut cleared = String::new(); let mut last_char = 'a'; while let Some(c) = raw.next() { match c { '"' => { cleared.push('"'); - if method == QuoteMethod::Double && last_char != '\\' { - return cleared + if method == QuoteMethod::Double && last_char != '\\' { + return cleared; } last_char = '"'; } '\'' => { cleared.push('\''); - if method == QuoteMethod::Single && last_char != '\\' { - return cleared + if method == QuoteMethod::Single && last_char != '\\' { + return cleared; } last_char = '\''; } '`' => { cleared.push('`'); if method == QuoteMethod::Backtick && last_char != '\\' { - return cleared + return cleared; } last_char = '`'; } @@ -98,8 +98,8 @@ fn clear_in_quotes<'a, 'b>(raw: &mut Chars<'a>, method: QuoteMethod) -> String { } } x => { - cleared.push(x.clone()); - last_char = x.clone(); + cleared.push(x); + last_char = x; } } } @@ -130,19 +130,25 @@ fn test_clear_spaces3() { #[test] fn test_clear_spaces4() { let res = clear_spaces("'foo bar'"); - assert_eq!(res, "'foo bar'".to_owned()) + assert_eq!(res, "'foo bar'".to_owned()) } // Test to make sure that code blocks inside single quotes are treated as quoted as well #[test] fn test_clear_spaces5() { let res = clear_spaces("pre quote'foo {shouldn\\'t compress space} inside'post quote"); - assert_eq!(res, "pre quote'foo {shouldn\\'t compress space} inside'post quote".to_owned()) + assert_eq!( + res, + "pre quote'foo {shouldn\\'t compress space} inside'post quote".to_owned() + ) } // Test to make sure that code blocks inside double quotes are treated as non-quoted #[test] fn test_clear_spaces6() { let res = clear_spaces("pre quote`foo ${should compress space} inside`post quote"); - assert_eq!(res, "pre quote`foo ${should compress space} inside`post quote".to_owned()) -} \ No newline at end of file + assert_eq!( + res, + "pre quote`foo ${should compress space} inside`post quote".to_owned() + ) +} diff --git a/src/parser/preparser/insert_semicolons.rs b/src/parser/preparser/insert_semicolons.rs index da835b8..43e3fa4 100644 --- a/src/parser/preparser/insert_semicolons.rs +++ b/src/parser/preparser/insert_semicolons.rs @@ -3,7 +3,7 @@ use crate::utils::str_safe_split; // Takes space normalised source code and adds semicolons where needed // May not work properly if string isn't space normalised beforehand pub fn insert_semicolons(raw: &str) -> String { - let _splits = str_safe_split(raw, vec![' ','\t']); + let _splits = str_safe_split(raw, vec![' ','\t', '\n']); "".to_owned() } \ No newline at end of file diff --git a/src/parser/tokeniser.rs b/src/parser/tokeniser.rs new file mode 100644 index 0000000..bdc1aed --- /dev/null +++ b/src/parser/tokeniser.rs @@ -0,0 +1,133 @@ +use num_bigint::BigInt; + +const LINE_BREAKS: [char; 4] = ['\u{000A}', '\u{000D}', '\u{2028}', '\u{2029}']; + +#[derive(Debug)] +enum OldTokens { + MinusOne, + Minus, + MinusEquals, + Comma, + Semicolon, + Colon, + Not, + NotEquals, + StrongNotEquals, + DoubleQuestion, + QuestionQuestionEquals, + QuestionDot, + Question, + Dot, + TrippleDot, + BracketOpen, + BracketClose, + SquareBracketOpen, + SquareBracketClose, + CurlyBracketOpen, + CurlyBracketClose, + Multiply, + PowerOf, + PowerOfEquals, + MultiplyEquals, + Divide, + DivideEquals, + BitAnd, + LogicAnd, + LogicAndEquals, + BitAndEquals, + Hashtag, + Modulo, + ModuloEquals, + BitXOr, + BitXOrEquals, + Add, + AddOne, + AddEquals, + LessThan, + LeftShift, + LeftShitEquals, + LessOrEqual, + EqualAssign, + EqualCompare, + EqualStrongCompare, + Arrow, + MoreThan, + MoreOrEqual, + RightShift, + RightShiftEquals, + UnsignedRightShift, + UnsignedRightShiftEquals, + BitOr, + BitOrEquals, + LogicOr, + LogicOrEquals, + Bitflip, + KeyAwait, + KeyBreak, + KeyCase, + KeyCatch, + KeyClass, + KeyComment, + KeyConst, + KeyContinue, + KeyDebugger, + KeyDefault, + KeyDelete, + KeyDo, + KyeElse, + KeyEnum, + KeyExport, +} + +#[derive(Debug)] +pub enum Actions { + Assign, + Codeflow(Codeflow), + Raw(RawValue), +} + +#[derive(Debug)] +pub enum RawValue { + Bool(bool), + Float(f64), + Int(BigInt), +} + +#[derive(Debug)] +pub enum Codeflow { + Compare(Comparison), + TryCatch, + Loop, +} + +#[derive(Debug)] +pub enum Comparison { + WeakEquals, + StrongEquals, + WeakNotEquals, + StrongNotEquals, + LessThan, + LessOrEqual, + MoreThan, + MoreOrEqual, +} + +#[derive(Debug)] +pub enum Keywords { + Await, + Break, + Case, + Catch, + Class, + Const, + Continue, + Debugger, + Default, + Delete, + Do, + Else, + Enum, + Export, + Extends, + Finally, +}