From 569ed6c0cc440572b37a06200b6706badcdf8e45 Mon Sep 17 00:00:00 2001
From: Tursiae <git@tursiae.org>
Date: Tue, 11 Feb 2025 00:23:35 +0000
Subject: [PATCH] TTS: Implement and wire up a TTS toggle in Display Settings
 (#251)

This change introduces the ability to enable or disable the spoken interface/TTS from the on-device settings, either via the UI or the Lua console. This closes out the implementation of issue #245.

The TTS setting is only visible in Display settings if voice samples are present in `/.tangara-tts/` on the SD card.

Playback of new TTS voice samples is inhibited when TTS is disabled. By default, the setting is enabled, as the device will only play back TTS voices if samples are present on disk.

If you need samples to test TTS on your device, feel free to grab the voice samples I have at https://codeberg.org/tursiae/tangara-tts-samples. There's about 80-85% coverage of the UI, with the remainder to be added soonish.

Reviewed-on: https://codeberg.org/cool-tech-zone/tangara-fw/pulls/251
Co-authored-by: Tursiae <git@tursiae.org>
Co-committed-by: Tursiae <git@tursiae.org>
---
 lua/settings.lua                    | 37 +++++++++++++++++++++++++++++
 src/drivers/include/drivers/nvs.hpp |  4 ++++
 src/drivers/nvs.cpp                 | 19 +++++++++++++++
 src/tangara/system_fsm/booting.cpp  |  2 +-
 src/tangara/tts/events.hpp          | 11 ++++++++-
 src/tangara/tts/provider.cpp        |  9 +++++--
 src/tangara/tts/provider.hpp        |  5 +++-
 src/tangara/ui/ui_fsm.cpp           | 12 ++++++++++
 src/tangara/ui/ui_fsm.hpp           |  1 +
 9 files changed, 95 insertions(+), 5 deletions(-)

diff --git a/lua/settings.lua b/lua/settings.lua
index 0ae4e73d..0238a876 100644
--- a/lua/settings.lua
+++ b/lua/settings.lua
@@ -320,6 +320,43 @@ settings.DisplaySettings = SettingsScreen:new {
         brightness_pct:set { text = tostring(b) .. "%" }
       end)
     }
+
+    -- Only show the TTS option if there are voice samples on disk.
+    local tts_iter = filesystem.iterator("/.tangara-tts/")
+    for _ in tts_iter do
+      local text_to_speech_container = self.content:Object {
+        flex = {
+          flex_direction = "row",
+          justify_content = "flex-start",
+          align_items = "center",
+          align_content = "flex-start",
+        },
+        w = lvgl.PCT(100),
+        h = lvgl.SIZE_CONTENT,
+        pad_bottom = 4,
+      }
+      text_to_speech_container:add_style(styles.list_item)
+      text_to_speech_container:Label { text = "Spoken Interface", flex_grow = 1 }
+      local text_to_speech_sw = text_to_speech_container:Switch {}
+      local tts_desc = widgets.Description(text_to_speech_sw, "Spoken interface")
+
+      text_to_speech_sw:onevent(lvgl.EVENT.VALUE_CHANGED, function()
+        display.text_to_speech:set(text_to_speech_sw:enabled())
+      end)
+
+      self.bindings = self.bindings + {
+        display.text_to_speech:bind(function(en)
+          if en then
+            text_to_speech_sw:add_state(lvgl.STATE.CHECKED)
+          else
+            text_to_speech_sw:clear_state(lvgl.STATE.CHECKED)
+          end
+        end),
+      }
+
+      break
+    end
+
   end
 }
 
diff --git a/src/drivers/include/drivers/nvs.hpp b/src/drivers/include/drivers/nvs.hpp
index e3a105f8..21d32f42 100644
--- a/src/drivers/include/drivers/nvs.hpp
+++ b/src/drivers/include/drivers/nvs.hpp
@@ -122,6 +122,9 @@ class NvsStorage {
   auto ScreenBrightness() -> uint_fast8_t;
   auto ScreenBrightness(uint_fast8_t) -> void;
 
+  auto UITextToSpeech() -> bool;
+  auto UITextToSpeech(bool) -> void;
+
   auto InterfaceTheme() -> std::optional<std::string>;
   auto InterfaceTheme(std::string) -> void;
 
@@ -179,6 +182,7 @@ class NvsStorage {
   Setting<uint8_t> fast_charge_;
 
   Setting<uint8_t> brightness_;
+  Setting<uint8_t> text_to_speech_;
   Setting<uint8_t> sensitivity_;
   Setting<uint16_t> amp_max_vol_;
   Setting<uint16_t> amp_cur_vol_;
diff --git a/src/drivers/nvs.cpp b/src/drivers/nvs.cpp
index 04a93fd9..3250e556 100644
--- a/src/drivers/nvs.cpp
+++ b/src/drivers/nvs.cpp
@@ -29,6 +29,7 @@ static constexpr char kKeyBluetoothVolumes[] = "bt_vols";
 static constexpr char kKeyBluetoothNames[] = "bt_names";
 static constexpr char kKeyOutput[] = "out";
 static constexpr char kKeyBrightness[] = "bright";
+static constexpr char kKeyTextToSpeech[] = "tts";
 static constexpr char kKeyInterfaceTheme[] = "ui_theme";
 static constexpr char kKeyAmpMaxVolume[] = "hp_vol_max";
 static constexpr char kKeyAmpCurrentVolume[] = "hp_vol";
@@ -269,6 +270,7 @@ NvsStorage::NvsStorage(nvs_handle_t handle)
       lra_calibration_(kKeyLraCalibration),
       fast_charge_(kKeyFastCharge),
       brightness_(kKeyBrightness),
+      text_to_speech_(kKeyTextToSpeech),
       sensitivity_(kKeyScrollSensitivity),
       amp_max_vol_(kKeyAmpMaxVolume),
       amp_cur_vol_(kKeyAmpCurrentVolume),
@@ -299,6 +301,7 @@ auto NvsStorage::Read() -> void {
   lra_calibration_.read(handle_);
   fast_charge_.read(handle_);
   brightness_.read(handle_);
+  text_to_speech_.read(handle_);
   sensitivity_.read(handle_);
   amp_max_vol_.read(handle_);
   amp_cur_vol_.read(handle_);
@@ -324,6 +327,7 @@ auto NvsStorage::Write() -> bool {
   lra_calibration_.write(handle_);
   fast_charge_.write(handle_);
   brightness_.write(handle_);
+  text_to_speech_.write(handle_);
   sensitivity_.write(handle_);
   amp_max_vol_.write(handle_);
   amp_cur_vol_.write(handle_);
@@ -532,6 +536,21 @@ auto NvsStorage::ScreenBrightness(uint_fast8_t val) -> void {
   brightness_.set(val);
 }
 
+auto NvsStorage::UITextToSpeech() -> bool {
+  std::lock_guard<std::mutex> lock{mutex_};
+
+  // Default to enabling text-to-speech if not set; this may need to be
+  // revisited if we end up adding on-device speech generation, but in a world
+  // where speech samples need to be loaded onto the SD card, it makes sense to
+  // enable this by default, as it'll only work if speech samples are present.
+  return text_to_speech_.get().value_or(true);
+}
+
+auto NvsStorage::UITextToSpeech(bool val) -> void {
+  std::lock_guard<std::mutex> lock{mutex_};
+  text_to_speech_.set(val);
+}
+
 auto NvsStorage::InterfaceTheme() -> std::optional<std::string> {
   std::lock_guard<std::mutex> lock{mutex_};
   return theme_.get();
diff --git a/src/tangara/system_fsm/booting.cpp b/src/tangara/system_fsm/booting.cpp
index b909e1ba..0bc6da8e 100644
--- a/src/tangara/system_fsm/booting.cpp
+++ b/src/tangara/system_fsm/booting.cpp
@@ -100,7 +100,7 @@ auto Booting::entry() -> void {
       sServices->bg_worker(), sServices->database(), sServices->nvs()));
   sServices->tag_parser(std::make_unique<database::TagParserImpl>());
   sServices->collator(locale::CreateCollator());
-  sServices->tts(std::make_unique<tts::Provider>());
+  sServices->tts(std::make_unique<tts::Provider>(sServices->nvs()));
 
   ESP_LOGI(kTag, "init bluetooth");
   sServices->bluetooth(std::make_unique<drivers::Bluetooth>(
diff --git a/src/tangara/tts/events.hpp b/src/tangara/tts/events.hpp
index 21199db1..595ff181 100644
--- a/src/tangara/tts/events.hpp
+++ b/src/tangara/tts/events.hpp
@@ -36,6 +36,15 @@ struct SelectionChanged {
   std::optional<Selection> new_selection;
 };
 
-using Event = std::variant<SimpleEvent, SelectionChanged>;
+/*
+  Event emitted when a user enables or disables the 'Spoken Interface' (TTS)
+  setting on the device. This is used to convey the new state to the
+  tts::Provider, but will not stop the current sample from being played.
+*/
+struct TtsEnabledChanged {
+  bool tts_enabled;
+};
+
+using Event = std::variant<SimpleEvent, SelectionChanged, TtsEnabledChanged>;
 
 }  // namespace tts
diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp
index eedfe959..2fb6c426 100644
--- a/src/tangara/tts/provider.cpp
+++ b/src/tangara/tts/provider.cpp
@@ -36,7 +36,9 @@ static auto textToFile(const std::string& text) -> std::optional<std::string> {
   return stream.str();
 }
 
-Provider::Provider() {}
+Provider::Provider(drivers::NvsStorage& nvs) : nvs_(nvs) {
+  tts_enabled_ = nvs_.UITextToSpeech();
+}
 
 auto Provider::player(std::unique_ptr<Player> p) -> void {
   player_ = std::move(p);
@@ -45,6 +47,9 @@ auto Provider::player(std::unique_ptr<Player> p) -> void {
 auto Provider::feed(const Event& e) -> void {
   if (std::holds_alternative<SimpleEvent>(e)) {
     // ESP_LOGI(kTag, "context changed");
+  } else if (std::holds_alternative<TtsEnabledChanged>(e)) {
+    auto ev = std::get<TtsEnabledChanged>(e);
+    tts_enabled_ = ev.tts_enabled;
   } else if (std::holds_alternative<SelectionChanged>(e)) {
     auto ev = std::get<SelectionChanged>(e);
     if (!ev.new_selection) {
@@ -63,7 +68,7 @@ auto Provider::feed(const Event& e) -> void {
         return;
       }
 
-      if (player_) {
+      if (player_ && tts_enabled_) {
         player_->playFile(*text, *file);
       }
     }
diff --git a/src/tangara/tts/provider.hpp b/src/tangara/tts/provider.hpp
index 8fe143cc..3a16d5a9 100644
--- a/src/tangara/tts/provider.hpp
+++ b/src/tangara/tts/provider.hpp
@@ -13,6 +13,7 @@
 
 #include "tts/events.hpp"
 #include "tts/player.hpp"
+#include "drivers/nvs.hpp"
 
 namespace tts {
 
@@ -23,7 +24,7 @@ namespace tts {
  */
 class Provider {
  public:
-  Provider();
+  Provider(drivers::NvsStorage& nvs);
 
   auto player(std::unique_ptr<Player>) -> void;
 
@@ -34,7 +35,9 @@ class Provider {
   Provider& operator=(const Provider&) = delete;
 
  private:
+  drivers::NvsStorage& nvs_;
   std::unique_ptr<Player> player_;
+  bool tts_enabled_;
 };
 
 }  // namespace tts
diff --git a/src/tangara/ui/ui_fsm.cpp b/src/tangara/ui/ui_fsm.cpp
index b974da53..a22c704b 100644
--- a/src/tangara/ui/ui_fsm.cpp
+++ b/src/tangara/ui/ui_fsm.cpp
@@ -295,6 +295,16 @@ lua::Property UiState::sDisplayBrightness{
       return true;
     }};
 
+lua::Property UiState::sDisplayTextToSpeech{
+    false, [](const lua::LuaValue& val) {
+      if (!std::holds_alternative<bool>(val)) {
+        return false;
+      }
+      sServices->nvs().UITextToSpeech(std::get<bool>(val));
+      sServices->tts().feed(tts::TtsEnabledChanged{.tts_enabled = std::get<bool>(val)});
+      return true;
+    }};
+
 lua::Property UiState::sLockSwitch{false};
 
 lua::Property UiState::sDatabaseUpdating{false};
@@ -661,6 +671,7 @@ void Lua::entry() {
     registry.AddPropertyModule("display",
                                {
                                    {"brightness", &sDisplayBrightness},
+                                   {"text_to_speech", &sDisplayTextToSpeech},
                                });
 
     registry.AddPropertyModule(
@@ -721,6 +732,7 @@ void Lua::entry() {
     sBluetoothKnownDevices.setDirect(bt.knownDevices());
 
     sPowerFastChargeEnabled.setDirect(sServices->nvs().FastCharge());
+    sDisplayTextToSpeech.setDirect(sServices->nvs().UITextToSpeech());
 
     if (sServices->sd() == drivers::SdState::kMounted) {
       sLua->RunScript("/sd/config.lua");
diff --git a/src/tangara/ui/ui_fsm.hpp b/src/tangara/ui/ui_fsm.hpp
index 53252a8d..aae69f94 100644
--- a/src/tangara/ui/ui_fsm.hpp
+++ b/src/tangara/ui/ui_fsm.hpp
@@ -130,6 +130,7 @@ class UiState : public tinyfsm::Fsm<UiState> {
   static lua::Property sVolumeLimit;
 
   static lua::Property sDisplayBrightness;
+  static lua::Property sDisplayTextToSpeech;
 
   static lua::Property sLockSwitch;