From 748023c4103c06ad9a5a1b83ee6de6889d826665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Sat, 18 Sep 2021 21:55:16 +0200 Subject: [PATCH 1/2] wip smart split to fit instance character limit --- src/group_handler/handle_mention.rs | 143 +++++++++++++++++++++++++--- src/store/data.rs | 7 ++ src/store/mod.rs | 16 ---- 3 files changed, 136 insertions(+), 30 deletions(-) diff --git a/src/group_handler/handle_mention.rs b/src/group_handler/handle_mention.rs index 2892678..563b9a7 100644 --- a/src/group_handler/handle_mention.rs +++ b/src/group_handler/handle_mention.rs @@ -262,8 +262,30 @@ impl<'a> ProcessMention<'a> { apply_trailing_hashtag_pleroma_bug_workaround(&mut msg); } + let mention = format!("@{user}", user = self.status_acct); + self.send_reply_multipart(mention, msg).await?; + } + + if !self.announcements.is_empty() { + let mut msg = self.announcements.join("\n"); + debug!("a={}", msg); + + if self.want_markdown { + apply_trailing_hashtag_pleroma_bug_workaround(&mut msg); + } + + let msg = format!("**📢 Group announcement**\n{msg}", msg = msg); + self.send_announcement_multipart(&msg).await?; + } + + Ok(()) + } + + async fn send_reply_multipart(&self, mention : String, msg : String) -> Result<(), GroupError> { + let parts = smart_split(&msg, Some(mention), self.config.get_character_limit()); + for p in parts { if let Ok(post) = StatusBuilder::new() - .status(format!("@{user} {msg}", user = self.status_acct, msg = msg)) + .status(p) .content_type(if self.want_markdown { "text/markdown" } else { @@ -272,30 +294,31 @@ impl<'a> ProcessMention<'a> { .visibility(Visibility::Direct) .build() { - let _ = self.client.new_status(post) - .await.log_error("Failed to post"); - // Sleep a bit to avoid throttling - tokio::time::sleep(Duration::from_secs(1)).await; + self.client.new_status(post).await?; } + + // Sleep a bit to avoid throttling + tokio::time::sleep(Duration::from_secs(1)).await; } - if !self.announcements.is_empty() { - let mut msg = self.announcements.join("\n"); - debug!("a={}", msg); + Ok(()) + } - if self.want_markdown { - apply_trailing_hashtag_pleroma_bug_workaround(&mut msg); - } + async fn send_announcement_multipart(&self, msg : &str) -> Result<(), GroupError> { + let parts = smart_split(msg, None, self.config.get_character_limit()); + for p in parts { let post = StatusBuilder::new() - .status(format!("**📢 Group announcement**\n{msg}", msg = msg)) + .status(p) .content_type("text/markdown") .visibility(Visibility::Public) .build() .expect("error build status"); - let _ = self.client.new_status(post) - .await.log_error("Failed to post"); + self.client.new_status(post).await?; + + // Sleep a bit to avoid throttling + tokio::time::sleep(Duration::from_secs(1)).await; } Ok(()) @@ -772,3 +795,95 @@ fn apply_trailing_hashtag_pleroma_bug_workaround(msg: &mut String) { msg.push_str(" ."); } } + +fn smart_split(msg : &str, prefix: Option, limit: usize) -> Vec { + let prefix = prefix.unwrap_or_default(); + + if msg.len() + prefix.len() < limit { + return vec![format!("{}{}", prefix, msg)]; + } + + let mut parts_to_send = vec![]; + let mut this_piece = prefix.clone(); + for l in msg.split("\n") { + if this_piece.len() + l.len() == limit { + this_piece.push_str(l); + parts_to_send.push(std::mem::take(&mut this_piece).trim().to_owned()); + this_piece.push_str(&prefix); + } else if this_piece.len() + l.len() > limit { + let trimmed = this_piece.trim(); + if !trimmed.is_empty() { + parts_to_send.push(trimmed.to_owned()); + } + + // start new piece + this_piece = format!("{}{}", prefix, l); + + while this_piece.len() > limit { + let to_send = if let Some(last_space) = (&this_piece[..limit]).rfind(' ') { + let mut p = this_piece.split_off(last_space); + std::mem::swap(&mut p, &mut this_piece); + p + } else { + let mut p = this_piece.split_off(limit); + std::mem::swap(&mut p, &mut this_piece); + p + }; + parts_to_send.push(to_send); + this_piece = format!("{}{}", prefix, this_piece); + } + this_piece.push('\n'); + } else { + this_piece.push_str(l); + this_piece.push('\n'); + } + } + + parts_to_send +} + +#[cfg(test)] +mod test { + #[test] + fn test_smart_split1() { + let to_split = "a234567890\nb234567890\nc234567890\nd234\n67890\ne234567890\n"; + + let parts = super::smart_split(to_split, None, 10); + assert_eq!(vec![ + "a234567890".to_string(), + "b234567890".to_string(), + "c234567890".to_string(), + "d234\n67890".to_string(), + "e234567890".to_string(), + ], parts); + } + + #[test] + fn test_smart_split2() { + let to_split = "foo\nbar\nbaz"; + + let parts = super::smart_split(to_split, None, 1000); + assert_eq!(vec![ + "foo\nbar\nbaz".to_string(), + ], parts); + } + + #[test] + fn test_smart_split3() { + let to_split = "foo\nbar\nbaz"; + let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 1000); + assert_eq!(vec![ + "PREFIXfoo\nbar\nbaz".to_string(), + ], parts); + } + + #[test] + fn test_smart_split4() { + let to_split = "1234\n56\n7"; + let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 10); + assert_eq!(vec![ + "PREFIX1234".to_string(), + "PREFIX56\n7".to_string(), + ], parts); + } +} \ No newline at end of file diff --git a/src/store/data.rs b/src/store/data.rs index fedab62..8789d5c 100644 --- a/src/store/data.rs +++ b/src/store/data.rs @@ -33,6 +33,8 @@ pub(crate) struct GroupConfig { acct: String, /// elefren data appdata: AppData, + /// Server's character limit + character_limit: usize, /// Hashtags the group will auto-boost from it's members group_tags: HashSet, /// List of admin account "acct" names, e.g. piggo@piggo.space @@ -67,6 +69,7 @@ impl Default for GroupConfig { redirect: Default::default(), token: Default::default(), }, + character_limit: 5000, group_tags: Default::default(), admin_users: Default::default(), member_users: Default::default(), @@ -90,6 +93,10 @@ impl GroupConfig { } } + pub(crate) fn get_character_limit(&self) -> usize { + self.character_limit + } + pub(crate) fn is_enabled(&self) -> bool { self.enabled } diff --git a/src/store/mod.rs b/src/store/mod.rs index 5a72065..56a9d6b 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -232,19 +232,3 @@ fn make_scopes() -> Scopes { | Scopes::write(scopes::Write::Media) | Scopes::write(scopes::Write::Follows) } - -// trait TapOk { -// fn tap_ok(self, f: F) -> Self; -// } -// -// impl TapOk for Result { -// fn tap_ok(self, f: F) -> Self { -// match self { -// Ok(v) => { -// f(&v); -// Ok(v) -// } -// Err(e) => Err(e) -// } -// } -// } From f492e9c44ab7fdbee4ec3e8710216e42af20afb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Sun, 19 Sep 2021 14:52:20 +0200 Subject: [PATCH 2/2] finish smart_split --- src/group_handler/handle_mention.rs | 124 +++++++++++++++++++++++++--- 1 file changed, 111 insertions(+), 13 deletions(-) diff --git a/src/group_handler/handle_mention.rs b/src/group_handler/handle_mention.rs index 563b9a7..954b528 100644 --- a/src/group_handler/handle_mention.rs +++ b/src/group_handler/handle_mention.rs @@ -262,7 +262,7 @@ impl<'a> ProcessMention<'a> { apply_trailing_hashtag_pleroma_bug_workaround(&mut msg); } - let mention = format!("@{user}", user = self.status_acct); + let mention = format!("@{user} ", user = self.status_acct); self.send_reply_multipart(mention, msg).await?; } @@ -806,46 +806,69 @@ fn smart_split(msg : &str, prefix: Option, limit: usize) -> Vec let mut parts_to_send = vec![]; let mut this_piece = prefix.clone(); for l in msg.split("\n") { + println!("* Line: {:?}", l); if this_piece.len() + l.len() == limit { + println!("exactly fits within limit"); + // this line exactly reaches the limit this_piece.push_str(l); parts_to_send.push(std::mem::take(&mut this_piece).trim().to_owned()); this_piece.push_str(&prefix); } else if this_piece.len() + l.len() > limit { - let trimmed = this_piece.trim(); - if !trimmed.is_empty() { - parts_to_send.push(trimmed.to_owned()); + println!("too long to append (already {} + new {})", this_piece.len(), l.len()); + // line too long to append + if this_piece != prefix { + let trimmed = this_piece.trim(); + if !trimmed.is_empty() { + println!("flush buffer: {:?}", trimmed); + parts_to_send.push(trimmed.to_owned()); + } } - // start new piece + // start new piece with the line. If the line is too long, break it up. this_piece = format!("{}{}", prefix, l); while this_piece.len() > limit { - let to_send = if let Some(last_space) = (&this_piece[..limit]).rfind(' ') { - let mut p = this_piece.split_off(last_space); + // line too long, try splitting at the last space, if any + let to_send = if let Some(last_space) = (&this_piece[..=limit]).rfind(' ') { + println!("line split at word boundary"); + let mut p = this_piece.split_off(last_space + 1); std::mem::swap(&mut p, &mut this_piece); p } else { + println!("line split at exact len (no word boundary found)"); let mut p = this_piece.split_off(limit); std::mem::swap(&mut p, &mut this_piece); p }; - parts_to_send.push(to_send); - this_piece = format!("{}{}", prefix, this_piece); + let part_trimmed = to_send.trim(); + println!("flush buffer: {:?}", part_trimmed); + parts_to_send.push(part_trimmed.to_owned()); + this_piece = format!("{}{}", prefix, this_piece.trim()); } this_piece.push('\n'); } else { + println!("append line"); + // this line still fits comfortably this_piece.push_str(l); this_piece.push('\n'); } } + if this_piece != prefix { + let leftover_trimmed = this_piece.trim(); + if !leftover_trimmed.is_empty() { + println!("flush buffer: {:?}", leftover_trimmed); + parts_to_send.push(leftover_trimmed.to_owned()); + } + } + parts_to_send } #[cfg(test)] mod test { #[test] - fn test_smart_split1() { + fn test_smart_split_lines() { let to_split = "a234567890\nb234567890\nc234567890\nd234\n67890\ne234567890\n"; let parts = super::smart_split(to_split, None, 10); @@ -859,7 +882,7 @@ mod test { } #[test] - fn test_smart_split2() { + fn test_smart_split_nosplit() { let to_split = "foo\nbar\nbaz"; let parts = super::smart_split(to_split, None, 1000); @@ -869,7 +892,7 @@ mod test { } #[test] - fn test_smart_split3() { + fn test_smart_split_nosplit_prefix() { let to_split = "foo\nbar\nbaz"; let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 1000); assert_eq!(vec![ @@ -878,7 +901,7 @@ mod test { } #[test] - fn test_smart_split4() { + fn test_smart_split_prefix_each() { let to_split = "1234\n56\n7"; let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 10); assert_eq!(vec![ @@ -886,4 +909,79 @@ mod test { "PREFIX56\n7".to_string(), ], parts); } + + #[test] + fn test_smart_split_words() { + let to_split = "one two three four five six seven eight nine ten"; + let parts = super::smart_split(to_split, None, 10); + assert_eq!(vec![ + "one two".to_string(), + "three four".to_string(), + "five six".to_string(), + "seven".to_string(), + "eight nine".to_string(), + "ten".to_string(), + ], parts); + } + + #[test] + fn test_smart_split_words_multispace() { + let to_split = "one two three four five six seven eight nine ten "; + let parts = super::smart_split(to_split, None, 10); + assert_eq!(vec![ + "one two".to_string(), + "three four".to_string(), + "five six".to_string(), + "seven".to_string(), + "eight nine".to_string(), + "ten".to_string(), + ], parts); + } + + #[test] + fn test_smart_split_words_longword() { + let to_split = "one two threefourfive six"; + let parts = super::smart_split(to_split, None, 10); + assert_eq!(vec![ + "one two".to_string(), + "threefourf".to_string(), + "ive six".to_string(), + ], parts); + } + + #[test] + fn test_smart_split_words_prefix() { + let to_split = "one two three four five six seven eight nine ten"; + let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 15); + assert_eq!(vec![ + "PREFIXone two".to_string(), + "PREFIXthree".to_string(), + "PREFIXfour five".to_string(), + "PREFIXsix seven".to_string(), + "PREFIXeight".to_string(), + "PREFIXnine ten".to_string(), + ], parts); + } + + #[test] + fn test_smart_split_realistic() { + let to_split = "\ + Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n\ + Aenean venenatis libero ac ex suscipit, nec efficitur arcu convallis.\n\ + Nulla ante neque, efficitur nec fermentum a, fermentum nec nisl.\n\ + Sed dolor ex, vestibulum at malesuada ut, faucibus ac ante.\n\ + Nullam scelerisque magna dui, id tempor purus faucibus sit amet.\n\ + Curabitur pretium condimentum pharetra.\n\ + Aenean dictum, tortor et ultrices fermentum, mauris erat vehicula lectus.\n\ + Nec varius mauris sem sollicitudin dolor. Nunc porta in urna nec vulputate."; + let parts = super::smart_split(to_split, Some("@pepa@pig.club ".to_string()), 140); + assert_eq!(vec![ + "@pepa@pig.club Lorem ipsum dolor sit amet, consectetur adipiscing elit.".to_string(), + "@pepa@pig.club Aenean venenatis libero ac ex suscipit, nec efficitur arcu convallis.".to_string(), + "@pepa@pig.club Nulla ante neque, efficitur nec fermentum a, fermentum nec nisl.\nSed dolor ex, vestibulum at malesuada ut, faucibus ac ante.".to_string(), + "@pepa@pig.club Nullam scelerisque magna dui, id tempor purus faucibus sit amet.\nCurabitur pretium condimentum pharetra.".to_string(), + "@pepa@pig.club Aenean dictum, tortor et ultrices fermentum, mauris erat vehicula lectus.".to_string(), + "@pepa@pig.club Nec varius mauris sem sollicitudin dolor. Nunc porta in urna nec vulputate.".to_string(), + ], parts); + } } \ No newline at end of file