From 569ed6c0cc440572b37a06200b6706badcdf8e45 Mon Sep 17 00:00:00 2001 From: Tursiae Date: Tue, 11 Feb 2025 00:23:35 +0000 Subject: [PATCH] TTS: Implement and wire up a TTS toggle in Display Settings (#251) This change introduces the ability to enable or disable the spoken interface/TTS from the on-device settings, either via the UI or the Lua console. This closes out the implementation of issue #245. The TTS setting is only visible in Display settings if voice samples are present in `/.tangara-tts/` on the SD card. Playback of new TTS voice samples is inhibited when TTS is disabled. By default, the setting is enabled, as the device will only play back TTS voices if samples are present on disk. If you need samples to test TTS on your device, feel free to grab the voice samples I have at https://codeberg.org/tursiae/tangara-tts-samples. There's about 80-85% coverage of the UI, with the remainder to be added soonish. Reviewed-on: https://codeberg.org/cool-tech-zone/tangara-fw/pulls/251 Co-authored-by: Tursiae Co-committed-by: Tursiae --- lua/settings.lua | 37 +++++++++++++++++++++++++++++ src/drivers/include/drivers/nvs.hpp | 4 ++++ src/drivers/nvs.cpp | 19 +++++++++++++++ src/tangara/system_fsm/booting.cpp | 2 +- src/tangara/tts/events.hpp | 11 ++++++++- src/tangara/tts/provider.cpp | 9 +++++-- src/tangara/tts/provider.hpp | 5 +++- src/tangara/ui/ui_fsm.cpp | 12 ++++++++++ src/tangara/ui/ui_fsm.hpp | 1 + 9 files changed, 95 insertions(+), 5 deletions(-) diff --git a/lua/settings.lua b/lua/settings.lua index 0ae4e73d..0238a876 100644 --- a/lua/settings.lua +++ b/lua/settings.lua @@ -320,6 +320,43 @@ settings.DisplaySettings = SettingsScreen:new { brightness_pct:set { text = tostring(b) .. "%" } end) } + + -- Only show the TTS option if there are voice samples on disk. + local tts_iter = filesystem.iterator("/.tangara-tts/") + for _ in tts_iter do + local text_to_speech_container = self.content:Object { + flex = { + flex_direction = "row", + justify_content = "flex-start", + align_items = "center", + align_content = "flex-start", + }, + w = lvgl.PCT(100), + h = lvgl.SIZE_CONTENT, + pad_bottom = 4, + } + text_to_speech_container:add_style(styles.list_item) + text_to_speech_container:Label { text = "Spoken Interface", flex_grow = 1 } + local text_to_speech_sw = text_to_speech_container:Switch {} + local tts_desc = widgets.Description(text_to_speech_sw, "Spoken interface") + + text_to_speech_sw:onevent(lvgl.EVENT.VALUE_CHANGED, function() + display.text_to_speech:set(text_to_speech_sw:enabled()) + end) + + self.bindings = self.bindings + { + display.text_to_speech:bind(function(en) + if en then + text_to_speech_sw:add_state(lvgl.STATE.CHECKED) + else + text_to_speech_sw:clear_state(lvgl.STATE.CHECKED) + end + end), + } + + break + end + end } diff --git a/src/drivers/include/drivers/nvs.hpp b/src/drivers/include/drivers/nvs.hpp index e3a105f8..21d32f42 100644 --- a/src/drivers/include/drivers/nvs.hpp +++ b/src/drivers/include/drivers/nvs.hpp @@ -122,6 +122,9 @@ class NvsStorage { auto ScreenBrightness() -> uint_fast8_t; auto ScreenBrightness(uint_fast8_t) -> void; + auto UITextToSpeech() -> bool; + auto UITextToSpeech(bool) -> void; + auto InterfaceTheme() -> std::optional; auto InterfaceTheme(std::string) -> void; @@ -179,6 +182,7 @@ class NvsStorage { Setting fast_charge_; Setting brightness_; + Setting text_to_speech_; Setting sensitivity_; Setting amp_max_vol_; Setting amp_cur_vol_; diff --git a/src/drivers/nvs.cpp b/src/drivers/nvs.cpp index 04a93fd9..3250e556 100644 --- a/src/drivers/nvs.cpp +++ b/src/drivers/nvs.cpp @@ -29,6 +29,7 @@ static constexpr char kKeyBluetoothVolumes[] = "bt_vols"; static constexpr char kKeyBluetoothNames[] = "bt_names"; static constexpr char kKeyOutput[] = "out"; static constexpr char kKeyBrightness[] = "bright"; +static constexpr char kKeyTextToSpeech[] = "tts"; static constexpr char kKeyInterfaceTheme[] = "ui_theme"; static constexpr char kKeyAmpMaxVolume[] = "hp_vol_max"; static constexpr char kKeyAmpCurrentVolume[] = "hp_vol"; @@ -269,6 +270,7 @@ NvsStorage::NvsStorage(nvs_handle_t handle) lra_calibration_(kKeyLraCalibration), fast_charge_(kKeyFastCharge), brightness_(kKeyBrightness), + text_to_speech_(kKeyTextToSpeech), sensitivity_(kKeyScrollSensitivity), amp_max_vol_(kKeyAmpMaxVolume), amp_cur_vol_(kKeyAmpCurrentVolume), @@ -299,6 +301,7 @@ auto NvsStorage::Read() -> void { lra_calibration_.read(handle_); fast_charge_.read(handle_); brightness_.read(handle_); + text_to_speech_.read(handle_); sensitivity_.read(handle_); amp_max_vol_.read(handle_); amp_cur_vol_.read(handle_); @@ -324,6 +327,7 @@ auto NvsStorage::Write() -> bool { lra_calibration_.write(handle_); fast_charge_.write(handle_); brightness_.write(handle_); + text_to_speech_.write(handle_); sensitivity_.write(handle_); amp_max_vol_.write(handle_); amp_cur_vol_.write(handle_); @@ -532,6 +536,21 @@ auto NvsStorage::ScreenBrightness(uint_fast8_t val) -> void { brightness_.set(val); } +auto NvsStorage::UITextToSpeech() -> bool { + std::lock_guard lock{mutex_}; + + // Default to enabling text-to-speech if not set; this may need to be + // revisited if we end up adding on-device speech generation, but in a world + // where speech samples need to be loaded onto the SD card, it makes sense to + // enable this by default, as it'll only work if speech samples are present. + return text_to_speech_.get().value_or(true); +} + +auto NvsStorage::UITextToSpeech(bool val) -> void { + std::lock_guard lock{mutex_}; + text_to_speech_.set(val); +} + auto NvsStorage::InterfaceTheme() -> std::optional { std::lock_guard lock{mutex_}; return theme_.get(); diff --git a/src/tangara/system_fsm/booting.cpp b/src/tangara/system_fsm/booting.cpp index b909e1ba..0bc6da8e 100644 --- a/src/tangara/system_fsm/booting.cpp +++ b/src/tangara/system_fsm/booting.cpp @@ -100,7 +100,7 @@ auto Booting::entry() -> void { sServices->bg_worker(), sServices->database(), sServices->nvs())); sServices->tag_parser(std::make_unique()); sServices->collator(locale::CreateCollator()); - sServices->tts(std::make_unique()); + sServices->tts(std::make_unique(sServices->nvs())); ESP_LOGI(kTag, "init bluetooth"); sServices->bluetooth(std::make_unique( diff --git a/src/tangara/tts/events.hpp b/src/tangara/tts/events.hpp index 21199db1..595ff181 100644 --- a/src/tangara/tts/events.hpp +++ b/src/tangara/tts/events.hpp @@ -36,6 +36,15 @@ struct SelectionChanged { std::optional new_selection; }; -using Event = std::variant; +/* + Event emitted when a user enables or disables the 'Spoken Interface' (TTS) + setting on the device. This is used to convey the new state to the + tts::Provider, but will not stop the current sample from being played. +*/ +struct TtsEnabledChanged { + bool tts_enabled; +}; + +using Event = std::variant; } // namespace tts diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp index eedfe959..2fb6c426 100644 --- a/src/tangara/tts/provider.cpp +++ b/src/tangara/tts/provider.cpp @@ -36,7 +36,9 @@ static auto textToFile(const std::string& text) -> std::optional { return stream.str(); } -Provider::Provider() {} +Provider::Provider(drivers::NvsStorage& nvs) : nvs_(nvs) { + tts_enabled_ = nvs_.UITextToSpeech(); +} auto Provider::player(std::unique_ptr p) -> void { player_ = std::move(p); @@ -45,6 +47,9 @@ auto Provider::player(std::unique_ptr p) -> void { auto Provider::feed(const Event& e) -> void { if (std::holds_alternative(e)) { // ESP_LOGI(kTag, "context changed"); + } else if (std::holds_alternative(e)) { + auto ev = std::get(e); + tts_enabled_ = ev.tts_enabled; } else if (std::holds_alternative(e)) { auto ev = std::get(e); if (!ev.new_selection) { @@ -63,7 +68,7 @@ auto Provider::feed(const Event& e) -> void { return; } - if (player_) { + if (player_ && tts_enabled_) { player_->playFile(*text, *file); } } diff --git a/src/tangara/tts/provider.hpp b/src/tangara/tts/provider.hpp index 8fe143cc..3a16d5a9 100644 --- a/src/tangara/tts/provider.hpp +++ b/src/tangara/tts/provider.hpp @@ -13,6 +13,7 @@ #include "tts/events.hpp" #include "tts/player.hpp" +#include "drivers/nvs.hpp" namespace tts { @@ -23,7 +24,7 @@ namespace tts { */ class Provider { public: - Provider(); + Provider(drivers::NvsStorage& nvs); auto player(std::unique_ptr) -> void; @@ -34,7 +35,9 @@ class Provider { Provider& operator=(const Provider&) = delete; private: + drivers::NvsStorage& nvs_; std::unique_ptr player_; + bool tts_enabled_; }; } // namespace tts diff --git a/src/tangara/ui/ui_fsm.cpp b/src/tangara/ui/ui_fsm.cpp index b974da53..a22c704b 100644 --- a/src/tangara/ui/ui_fsm.cpp +++ b/src/tangara/ui/ui_fsm.cpp @@ -295,6 +295,16 @@ lua::Property UiState::sDisplayBrightness{ return true; }}; +lua::Property UiState::sDisplayTextToSpeech{ + false, [](const lua::LuaValue& val) { + if (!std::holds_alternative(val)) { + return false; + } + sServices->nvs().UITextToSpeech(std::get(val)); + sServices->tts().feed(tts::TtsEnabledChanged{.tts_enabled = std::get(val)}); + return true; + }}; + lua::Property UiState::sLockSwitch{false}; lua::Property UiState::sDatabaseUpdating{false}; @@ -661,6 +671,7 @@ void Lua::entry() { registry.AddPropertyModule("display", { {"brightness", &sDisplayBrightness}, + {"text_to_speech", &sDisplayTextToSpeech}, }); registry.AddPropertyModule( @@ -721,6 +732,7 @@ void Lua::entry() { sBluetoothKnownDevices.setDirect(bt.knownDevices()); sPowerFastChargeEnabled.setDirect(sServices->nvs().FastCharge()); + sDisplayTextToSpeech.setDirect(sServices->nvs().UITextToSpeech()); if (sServices->sd() == drivers::SdState::kMounted) { sLua->RunScript("/sd/config.lua"); diff --git a/src/tangara/ui/ui_fsm.hpp b/src/tangara/ui/ui_fsm.hpp index 53252a8d..aae69f94 100644 --- a/src/tangara/ui/ui_fsm.hpp +++ b/src/tangara/ui/ui_fsm.hpp @@ -130,6 +130,7 @@ class UiState : public tinyfsm::Fsm { static lua::Property sVolumeLimit; static lua::Property sDisplayBrightness; + static lua::Property sDisplayTextToSpeech; static lua::Property sLockSwitch;