diff --git a/Cargo.lock b/Cargo.lock index 6184637..5b6ac01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,6 +13,7 @@ name = "capybara-engine" version = "0.1.0" dependencies = [ "num-bigint", + "thiserror", "widestring", ] @@ -46,6 +47,61 @@ dependencies = [ "autocfg", ] +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + [[package]] name = "widestring" version = "1.0.2" diff --git a/Cargo.toml b/Cargo.toml index d52c6c9..3e0cc66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,4 +7,5 @@ edition = "2021" [dependencies] num-bigint = "0.4.4" +thiserror = "1.0.56" widestring = "1.0.2" diff --git a/src/parser/preparser.rs b/src/parser/preparser.rs index 07b6fd0..ddb5127 100644 --- a/src/parser/preparser.rs +++ b/src/parser/preparser.rs @@ -1,111 +1,11 @@ -use std::fmt::Write; +mod clear_spaces; +mod insert_semicolons; // Prepares the given raw code to be consumed by the tokeniser // It achieves this by adding missing semicolons and adjusting whitespace (normalising to one space for everything outside of strings) pub fn preparse(raw: &str) -> String { - let result = clear_spaces(raw); + let result = clear_spaces::clear_spaces(raw); + let result = insert_semicolons::insert_semicolons(&result); result } - -fn clear_spaces(raw: &str) -> String { - let mut cleared = String::new(); - - let mut currently_quoted = false; - let mut quote_is_single = false; - let mut inside_quoted_codeblock = false; - let mut previous_char = 'a'; - - - for c in raw.chars() { - if currently_quoted { - cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); - previous_char = c; - } else { - match c { - '"' => { - if currently_quoted && !quote_is_single && !inside_quoted_codeblock { - currently_quoted = false; - } else if !currently_quoted { - currently_quoted = true; - quote_is_single = false; - // Just overwrite it here already in case of some weird stuff - // Assume that only ever one codeblock will be nested in a string - // If there's more, this code will explode - // Not really, but spaces might get messed up - inside_quoted_codeblock = false; - } - cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); - } - '\'' => { - if currently_quoted && quote_is_single { - currently_quoted = false; - } else { - currently_quoted = true; - quote_is_single = true; - } - cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); - } - '{' => { - if currently_quoted && !inside_quoted_codeblock && !quote_is_single { - inside_quoted_codeblock = true; - } - cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); - } - '}' => { - if currently_quoted && !inside_quoted_codeblock && !quote_is_single { - inside_quoted_codeblock = false; - } - cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); - } - x => { - if !(previous_char.is_whitespace() && x.is_whitespace()) { - cleared.write_char(x.clone()).expect("Failed to write character to out string during whitespace normalisation"); - } else { - } - previous_char = x; - - } - } - } - } - - cleared -} - -// Test to make sure that multiple spaces get grouped into one -#[test] -fn test_clear_spaces1() { - assert_eq!(clear_spaces(" \n \t"), " ".to_owned()) -} - -// Test to make sure that only spaces are affected and not normal text -#[test] -fn test_clear_spaces2() { - let res = clear_spaces("foo bar"); - assert_eq!(res, "foo bar".to_owned()) -} - -// Test to make sure that double quoted test keeps spaces -#[test] -fn test_clear_spaces3() { - assert_eq!(clear_spaces("\"foo bar\""), "\"foo bar\"".to_owned()) -} - -// Same as previous, but for single quotes -#[test] -fn test_clear_spaces4() { - assert_eq!(clear_spaces("'foo bar'"), "'foo bar'".to_owned()) -} - -// Test to make sure that code blocks inside single quotes are treated as quoted as well -#[test] -fn test_clear_spaces5() { - assert_eq!(clear_spaces("'foo {shouldn't compress space} inside'"), "'foo {shouldn't compress space} inside'".to_owned()) -} - -// Test to make sure that code blocks inside double quotes are treated as non-quoted -#[test] -fn test_clear_spaces6() { - assert_eq!(clear_spaces("\"foo {should compress space} inside\""), "\"foo {should compress space} inside\"".to_owned()) -} \ No newline at end of file diff --git a/src/parser/preparser/clear_spaces.rs b/src/parser/preparser/clear_spaces.rs new file mode 100644 index 0000000..9ef9c40 --- /dev/null +++ b/src/parser/preparser/clear_spaces.rs @@ -0,0 +1,221 @@ +use std::{fmt::Write, str::Chars}; + +#[derive(PartialEq)] +enum QuoteMethod { + Single, + Double, + Backtick +} + +// Normalised empty space in codeblocks, ensuring that all whitespace in codeblocks is one character wide +pub fn clear_spaces(raw: &str) -> String { + clear_in_code(&mut raw.chars()) +} + +fn clear_in_code<'a, 'b>(raw: &mut Chars<'a>) -> String { + let mut cleared = String::new(); + let mut last_char = 'a'; + while let Some(c) = raw.next() { + match &c { + '"' => { + cleared.push('"'); + println!("Entering double quoted with current \"{}\"", &cleared); + let res = clear_in_quotes(raw, QuoteMethod::Double); + cleared.push_str(&res); + last_char = '"'; + } + '\'' => { + cleared.push('\''); + println!("Entering single quoted with current \"{}\"", &cleared); + let res = clear_in_quotes(raw, QuoteMethod::Single); + cleared.push_str(&res); + last_char = '\''; + } + '`' => { + cleared.push('`'); + println!("Entering backtick quoted with current \"{}\"", &cleared); + let res = clear_in_quotes(raw, QuoteMethod::Backtick); + cleared.push_str(&res); + last_char = '`'; + } + '{' => { + cleared.push('{'); + println!("Entering codeblock with current \"{}\"", &cleared); + let res = clear_in_code(raw); + cleared.push_str(&res); + last_char = '}'; + } + '}' => { + cleared.push('}'); + println!("Exiting codeblock with current \"{}\"", &cleared); + return cleared + } + x => { + if last_char.is_whitespace() && x.is_whitespace() { + last_char = x.clone(); + continue; + } + cleared.push(x.clone()); + last_char = x.clone(); + } + } + }; + + cleared +} + +fn clear_in_quotes<'a, 'b>(raw: &mut Chars<'a>, method: QuoteMethod) -> String { + let mut cleared = String::new(); + let mut last_char = 'a'; + while let Some(c) = raw.next() { + match c { + '"' => { + cleared.push('"'); + if method == QuoteMethod::Double && last_char != '\\' { + println!("Exiting double quoted with current \"{}\"", &cleared); + return cleared + } + last_char = '"'; + } + '\'' => { + cleared.push('\''); + if method == QuoteMethod::Single && last_char != '\\' { + println!("Exiting single quoted with current \"{}\"", &cleared); + return cleared + } + last_char = '\''; + } + '`' => { + cleared.push('`'); + if method == QuoteMethod::Backtick && last_char != '\\' { + println!("Exiting backtick quoted with current \"{}\"", &cleared); + return cleared + } + last_char = '`'; + } + '{' => { + cleared.push('{'); + // Code blocks are only run inside backtick strings and with a $ before the curly braces + if method == QuoteMethod::Backtick && last_char == '$' { + println!("Entering codeblock with current \"{}\"", &cleared); + let res = clear_in_code(raw); + cleared.push_str(&res); + last_char = '}'; + } else { + last_char = '{'; + } + } + x => { + cleared.push(x.clone()); + last_char = x.clone(); + } + } + } + cleared +} + +fn old_clear_spaces(raw: &str) -> String { + let mut cleared = String::new(); + + let mut currently_quoted = false; + let mut quote_is_single = false; + let mut inside_quoted_codeblock = false; + let mut previous_char = 'a'; + + + for c in raw.chars() { + if currently_quoted { + cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); + previous_char = c; + } else { + match c { + '"' => { + if currently_quoted && !quote_is_single && !inside_quoted_codeblock { + currently_quoted = false; + } else if !currently_quoted { + currently_quoted = true; + quote_is_single = false; + // Just overwrite it here already in case of some weird stuff + // Assume that only ever one codeblock will be nested in a string + // If there's more, this code will explode + // Not really, but spaces might get messed up + inside_quoted_codeblock = false; + } + cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); + } + '\'' => { + if currently_quoted && quote_is_single { + currently_quoted = false; + } else { + currently_quoted = true; + quote_is_single = true; + } + cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); + } + '{' => { + if currently_quoted && !inside_quoted_codeblock && !quote_is_single { + inside_quoted_codeblock = true; + } + cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); + } + '}' => { + if currently_quoted && !inside_quoted_codeblock && !quote_is_single { + inside_quoted_codeblock = false; + } + cleared.write_char(c.clone()).expect("Failed to write character to out string during whitespace normalisation"); + } + x => { + if !(previous_char.is_whitespace() && x.is_whitespace()) { + cleared.write_char(x.clone()).expect("Failed to write character to out string during whitespace normalisation"); + } else { + } + previous_char = x; + + } + } + } + } + + cleared +} + +// Test to make sure that multiple spaces get grouped into one +#[test] +fn test_clear_spaces1() { + assert_eq!(clear_spaces(" \n \t"), " ".to_owned()) +} + +// Test to make sure that only spaces are affected and not normal text +#[test] +fn test_clear_spaces2() { + let res = clear_spaces("foo bar"); + assert_eq!(res, "foo bar".to_owned()) +} + +// Test to make sure that double quoted test keeps spaces +#[test] +fn test_clear_spaces3() { + let res = clear_spaces("\"foo bar\""); + assert_eq!(res, "\"foo bar\"".to_owned()) +} + +// Same as previous, but for single quotes +#[test] +fn test_clear_spaces4() { + let res = clear_spaces("'foo bar'"); + assert_eq!(res, "'foo bar'".to_owned()) +} + +// Test to make sure that code blocks inside single quotes are treated as quoted as well +#[test] +fn test_clear_spaces5() { + let res = clear_spaces("pre quote'foo {shouldn\\'t compress space} inside'post quote"); + assert_eq!(res, "pre quote'foo {shouldn\\'t compress space} inside'post quote".to_owned()) +} + +// Test to make sure that code blocks inside double quotes are treated as non-quoted +#[test] +fn test_clear_spaces6() { + let res = clear_spaces("pre quote`foo ${should compress space} inside`post quote"); + assert_eq!(res, "pre quote`foo ${should compress space} inside`post quote".to_owned()) +} \ No newline at end of file diff --git a/src/parser/preparser/insert_semicolons.rs b/src/parser/preparser/insert_semicolons.rs new file mode 100644 index 0000000..1d171b8 --- /dev/null +++ b/src/parser/preparser/insert_semicolons.rs @@ -0,0 +1,5 @@ +// Takes space normalised source code and adds semicolons where needed +// May not work properly if string isn't space normalised beforehand +pub fn insert_semicolons(raw: &str) -> String { + "".to_owned() +} \ No newline at end of file