From 447c57c942f51881e85b87a1e07bab887f11fafa Mon Sep 17 00:00:00 2001 From: Simon Laux Date: Wed, 8 Jan 2025 13:22:35 +0100 Subject: [PATCH] move parenthesis counter into dedicated file and change to more descriptive name also add some tests --- src/parser/link_url/mod.rs | 1 + src/parser/link_url/parenthesis_counter.rs | 81 ++++++++++++++++++++++ src/parser/link_url/parse_link.rs | 65 +---------------- 3 files changed, 85 insertions(+), 62 deletions(-) create mode 100644 src/parser/link_url/parenthesis_counter.rs diff --git a/src/parser/link_url/mod.rs b/src/parser/link_url/mod.rs index 9473ba2..fee9c8e 100644 --- a/src/parser/link_url/mod.rs +++ b/src/parser/link_url/mod.rs @@ -1,4 +1,5 @@ mod ip; +mod parenthesis_counter; mod parse_link; use nom::{ diff --git a/src/parser/link_url/parenthesis_counter.rs b/src/parser/link_url/parenthesis_counter.rs new file mode 100644 index 0000000..dd0dbf6 --- /dev/null +++ b/src/parser/link_url/parenthesis_counter.rs @@ -0,0 +1,81 @@ +use nom::Slice; + +macro_rules! adjust_balance { + ($a: expr, $b: expr, $c: expr, $d: expr) => { + // for opening ones + { + $a = $a.saturating_add(1); + if $d.slice($c..).find($b).is_none() { + return Some($c); + } + } + }; + ($a: expr, $b: expr) => { + // for closing ones + { + if $a == 0 { + return Some($b); + } else { + $a = $a.saturating_sub(1); + } + } + }; +} + +/// finds unbalanced closing parenthesesis and returns distance to it. +/// unbalanced means it was closed but not opened before in the given string +pub(super) fn count_chars_in_complete_parenthesis(input: &str) -> Option { + let mut parenthes = 0usize; // () + let mut curly_bracket = 0usize; // {} + let mut bracket = 0usize; // [] + let mut angle = 0usize; // <> + + for (i, ch) in input.chars().enumerate() { + match ch { + '(' => { + adjust_balance!(parenthes, ')', i, input); + } + '{' => { + adjust_balance!(curly_bracket, '}', i, input); + } + '[' => { + adjust_balance!(bracket, ']', i, input); + } + '<' => { + adjust_balance!(angle, '>', i, input); + } + ')' => { + adjust_balance!(parenthes, i); + } + ']' => { + adjust_balance!(bracket, i); + } + '}' => { + adjust_balance!(curly_bracket, i); + } + '>' => { + adjust_balance!(angle, i); + } + _ => continue, + } + } + None +} + +#[test] +fn test_count_parenthesis() { + assert_eq!(count_chars_in_complete_parenthesis("{}"), None); + assert_eq!(count_chars_in_complete_parenthesis("{} test"), None); + assert_eq!(count_chars_in_complete_parenthesis("(test) test"), None); + assert_eq!(count_chars_in_complete_parenthesis("(test)) test"), Some(6)); +} + +#[test] +fn test_count_different_types_invalid() { + assert_eq!(count_chars_in_complete_parenthesis("(({(})))"), None); +} + +#[test] +fn test_count_different_types_invalid2() { + assert_eq!(count_chars_in_complete_parenthesis("}(({(})))"), Some(0)); +} diff --git a/src/parser/link_url/parse_link.rs b/src/parser/link_url/parse_link.rs index 74776fb..0913a1c 100644 --- a/src/parser/link_url/parse_link.rs +++ b/src/parser/link_url/parse_link.rs @@ -22,6 +22,8 @@ use crate::parser::{ }, }; +use super::parenthesis_counter::count_chars_in_complete_parenthesis; + /// determines which generic schemes (without '://') get linkifyed fn is_allowed_generic_scheme(scheme: &str) -> bool { matches!( @@ -272,67 +274,6 @@ fn ifragment(input: &str) -> IResult<&str, &str, CustomError<&str>> { recognize(tuple((char('#'), take_while_ifragment)))(input) } -macro_rules! link_correct { - ($a: expr, $b: expr, $c: expr, $d: expr) => { - // for opening ones - { - $a = $a.saturating_add(1); - if $d.slice($c..).find($b).is_none() { - return Some($c); - } - } - }; - ($a: expr, $b: expr) => { - // for closing ones - { - if $a == 0 { - return Some($b); - } else { - $a = $a.saturating_sub(1); - } - } - }; -} - -// TODO: better name for this function -fn get_correct_link(link: &str) -> Option { - let mut parenthes = 0usize; // () - let mut curly_bracket = 0usize; // {} - let mut bracket = 0usize; // [] - let mut angle = 0usize; // <> - - for (i, ch) in link.chars().enumerate() { - match ch { - '(' => { - link_correct!(parenthes, ')', i, link); - } - '{' => { - link_correct!(curly_bracket, '}', i, link); - } - '[' => { - link_correct!(bracket, ']', i, link); - } - '<' => { - link_correct!(angle, '>', i, link); - } - ')' => { - link_correct!(parenthes, i); - } - ']' => { - link_correct!(bracket, i); - } - '}' => { - link_correct!(curly_bracket, i); - } - '>' => { - link_correct!(angle, i); - } - _ => continue, - } - } - None -} - fn parse_ipath_abempty(input: &str) -> IResult<&str, &str, CustomError<&str>> { recognize(many0(tuple((char('/'), opt(take_while_ipchar1)))))(input) } @@ -406,7 +347,7 @@ fn parse_iri(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> { host = input_.slice(scheme.len().saturating_add(3)..input_.len().saturating_sub(1)); } } - len = get_correct_link(link).unwrap_or(len); + len = count_chars_in_complete_parenthesis(link).unwrap_or(len); let link = input_.slice(0..len); let input = input_.slice(len..);