From f492e9c44ab7fdbee4ec3e8710216e42af20afb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Sun, 19 Sep 2021 14:52:20 +0200 Subject: [PATCH] finish smart_split --- src/group_handler/handle_mention.rs | 124 +++++++++++++++++++++++++--- 1 file changed, 111 insertions(+), 13 deletions(-) diff --git a/src/group_handler/handle_mention.rs b/src/group_handler/handle_mention.rs index 563b9a7..954b528 100644 --- a/src/group_handler/handle_mention.rs +++ b/src/group_handler/handle_mention.rs @@ -262,7 +262,7 @@ impl<'a> ProcessMention<'a> { apply_trailing_hashtag_pleroma_bug_workaround(&mut msg); } - let mention = format!("@{user}", user = self.status_acct); + let mention = format!("@{user} ", user = self.status_acct); self.send_reply_multipart(mention, msg).await?; } @@ -806,46 +806,69 @@ fn smart_split(msg : &str, prefix: Option, limit: usize) -> Vec let mut parts_to_send = vec![]; let mut this_piece = prefix.clone(); for l in msg.split("\n") { + println!("* Line: {:?}", l); if this_piece.len() + l.len() == limit { + println!("exactly fits within limit"); + // this line exactly reaches the limit this_piece.push_str(l); parts_to_send.push(std::mem::take(&mut this_piece).trim().to_owned()); this_piece.push_str(&prefix); } else if this_piece.len() + l.len() > limit { - let trimmed = this_piece.trim(); - if !trimmed.is_empty() { - parts_to_send.push(trimmed.to_owned()); + println!("too long to append (already {} + new {})", this_piece.len(), l.len()); + // line too long to append + if this_piece != prefix { + let trimmed = this_piece.trim(); + if !trimmed.is_empty() { + println!("flush buffer: {:?}", trimmed); + parts_to_send.push(trimmed.to_owned()); + } } - // start new piece + // start new piece with the line. If the line is too long, break it up. this_piece = format!("{}{}", prefix, l); while this_piece.len() > limit { - let to_send = if let Some(last_space) = (&this_piece[..limit]).rfind(' ') { - let mut p = this_piece.split_off(last_space); + // line too long, try splitting at the last space, if any + let to_send = if let Some(last_space) = (&this_piece[..=limit]).rfind(' ') { + println!("line split at word boundary"); + let mut p = this_piece.split_off(last_space + 1); std::mem::swap(&mut p, &mut this_piece); p } else { + println!("line split at exact len (no word boundary found)"); let mut p = this_piece.split_off(limit); std::mem::swap(&mut p, &mut this_piece); p }; - parts_to_send.push(to_send); - this_piece = format!("{}{}", prefix, this_piece); + let part_trimmed = to_send.trim(); + println!("flush buffer: {:?}", part_trimmed); + parts_to_send.push(part_trimmed.to_owned()); + this_piece = format!("{}{}", prefix, this_piece.trim()); } this_piece.push('\n'); } else { + println!("append line"); + // this line still fits comfortably this_piece.push_str(l); this_piece.push('\n'); } } + if this_piece != prefix { + let leftover_trimmed = this_piece.trim(); + if !leftover_trimmed.is_empty() { + println!("flush buffer: {:?}", leftover_trimmed); + parts_to_send.push(leftover_trimmed.to_owned()); + } + } + parts_to_send } #[cfg(test)] mod test { #[test] - fn test_smart_split1() { + fn test_smart_split_lines() { let to_split = "a234567890\nb234567890\nc234567890\nd234\n67890\ne234567890\n"; let parts = super::smart_split(to_split, None, 10); @@ -859,7 +882,7 @@ mod test { } #[test] - fn test_smart_split2() { + fn test_smart_split_nosplit() { let to_split = "foo\nbar\nbaz"; let parts = super::smart_split(to_split, None, 1000); @@ -869,7 +892,7 @@ mod test { } #[test] - fn test_smart_split3() { + fn test_smart_split_nosplit_prefix() { let to_split = "foo\nbar\nbaz"; let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 1000); assert_eq!(vec![ @@ -878,7 +901,7 @@ mod test { } #[test] - fn test_smart_split4() { + fn test_smart_split_prefix_each() { let to_split = "1234\n56\n7"; let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 10); assert_eq!(vec![ @@ -886,4 +909,79 @@ mod test { "PREFIX56\n7".to_string(), ], parts); } + + #[test] + fn test_smart_split_words() { + let to_split = "one two three four five six seven eight nine ten"; + let parts = super::smart_split(to_split, None, 10); + assert_eq!(vec![ + "one two".to_string(), + "three four".to_string(), + "five six".to_string(), + "seven".to_string(), + "eight nine".to_string(), + "ten".to_string(), + ], parts); + } + + #[test] + fn test_smart_split_words_multispace() { + let to_split = "one two three four five six seven eight nine ten "; + let parts = super::smart_split(to_split, None, 10); + assert_eq!(vec![ + "one two".to_string(), + "three four".to_string(), + "five six".to_string(), + "seven".to_string(), + "eight nine".to_string(), + "ten".to_string(), + ], parts); + } + + #[test] + fn test_smart_split_words_longword() { + let to_split = "one two threefourfive six"; + let parts = super::smart_split(to_split, None, 10); + assert_eq!(vec![ + "one two".to_string(), + "threefourf".to_string(), + "ive six".to_string(), + ], parts); + } + + #[test] + fn test_smart_split_words_prefix() { + let to_split = "one two three four five six seven eight nine ten"; + let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 15); + assert_eq!(vec![ + "PREFIXone two".to_string(), + "PREFIXthree".to_string(), + "PREFIXfour five".to_string(), + "PREFIXsix seven".to_string(), + "PREFIXeight".to_string(), + "PREFIXnine ten".to_string(), + ], parts); + } + + #[test] + fn test_smart_split_realistic() { + let to_split = "\ + Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n\ + Aenean venenatis libero ac ex suscipit, nec efficitur arcu convallis.\n\ + Nulla ante neque, efficitur nec fermentum a, fermentum nec nisl.\n\ + Sed dolor ex, vestibulum at malesuada ut, faucibus ac ante.\n\ + Nullam scelerisque magna dui, id tempor purus faucibus sit amet.\n\ + Curabitur pretium condimentum pharetra.\n\ + Aenean dictum, tortor et ultrices fermentum, mauris erat vehicula lectus.\n\ + Nec varius mauris sem sollicitudin dolor. Nunc porta in urna nec vulputate."; + let parts = super::smart_split(to_split, Some("@pepa@pig.club ".to_string()), 140); + assert_eq!(vec![ + "@pepa@pig.club Lorem ipsum dolor sit amet, consectetur adipiscing elit.".to_string(), + "@pepa@pig.club Aenean venenatis libero ac ex suscipit, nec efficitur arcu convallis.".to_string(), + "@pepa@pig.club Nulla ante neque, efficitur nec fermentum a, fermentum nec nisl.\nSed dolor ex, vestibulum at malesuada ut, faucibus ac ante.".to_string(), + "@pepa@pig.club Nullam scelerisque magna dui, id tempor purus faucibus sit amet.\nCurabitur pretium condimentum pharetra.".to_string(), + "@pepa@pig.club Aenean dictum, tortor et ultrices fermentum, mauris erat vehicula lectus.".to_string(), + "@pepa@pig.club Nec varius mauris sem sollicitudin dolor. Nunc porta in urna nec vulputate.".to_string(), + ], parts); + } } \ No newline at end of file