Merge pull request #104873 from bruvzg/tts_on_demand

Enable TTS on demand, instead of fully disabling it when project setting is not set.
2025-04-07 00:44:24 +08:00 · 2025-04-01 19:53:28 -05:00 · 2025-04-01 19:53:28 -05:00 · 9637894c59
commit 9637894c59
parent b37e368e5e 4638ade13f
16 changed files with 240 additions and 84 deletions
--- a/doc/classes/DisplayServer.xml
+++ b/doc/classes/DisplayServer.xml
@ -1325,7 +1325,6 @@
 				- [code]language[/code] is language code in [code]lang_Variant[/code] format. The [code]lang[/code] part is a 2 or 3-letter code based on the ISO-639 standard, in lowercase. The [code skip-lint]Variant[/code] part is an engine-dependent string describing country, region or/and dialect.
 				Note that Godot depends on system libraries for text-to-speech functionality. These libraries are installed by default on Windows and macOS, but not on all Linux distributions. If they are not present, this method will return an empty list. This applies to both Godot users on Linux, as well as end-users on Linux running Godot games that use text-to-speech.
 				[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
-				[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
 			</description>
 		</method>
 		<method name="tts_get_voices_for_language" qualifiers="const">
@ -1334,7 +1333,6 @@
 			<description>
 				Returns an [PackedStringArray] of voice identifiers for the [param language].
 				[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
-				[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
 			</description>
 		</method>
 		<method name="tts_is_paused" qualifiers="const">
@ -1342,7 +1340,6 @@
 			<description>
 				Returns [code]true[/code] if the synthesizer is in a paused state.
 				[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
-				[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
 			</description>
 		</method>
 		<method name="tts_is_speaking" qualifiers="const">
@ -1350,7 +1347,6 @@
 			<description>
 				Returns [code]true[/code] if the synthesizer is generating speech, or have utterance waiting in the queue.
 				[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
-				[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
 			</description>
 		</method>
 		<method name="tts_pause">
@ -1358,7 +1354,6 @@
 			<description>
 				Puts the synthesizer into a paused state.
 				[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
-				[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
 			</description>
 		</method>
 		<method name="tts_resume">
@ -1366,7 +1361,6 @@
 			<description>
 				Resumes the synthesizer if it was paused.
 				[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
-				[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
 			</description>
 		</method>
 		<method name="tts_set_utterance_callback">
@ -1379,7 +1373,6 @@
 				- [constant TTS_UTTERANCE_BOUNDARY] callable's method should take two [int] parameters, the index of the character and the utterance ID.
 				[b]Note:[/b] The granularity of the boundary callbacks is engine dependent.
 				[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
-				[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
 			</description>
 		</method>
 		<method name="tts_speak">
@ -1401,7 +1394,6 @@
 				[b]Note:[/b] On Windows and Linux (X11/Wayland), utterance [param text] can use SSML markup. SSML support is engine and voice dependent. If the engine does not support SSML, you should strip out all XML markup before calling [method tts_speak].
 				[b]Note:[/b] The granularity of pitch, rate, and volume is engine and voice dependent. Values may be truncated.
 				[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
-				[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
 			</description>
 		</method>
 		<method name="tts_stop">
@ -1409,7 +1401,6 @@
 			<description>
 				Stops synthesis in progress and removes all utterances from the queue.
 				[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Linux), macOS, and Windows.
-				[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
 			</description>
 		</method>
 		<method name="unregister_additional_output">
--- a/doc/classes/ProjectSettings.xml
+++ b/doc/classes/ProjectSettings.xml
@ -427,7 +427,7 @@
 			Sets the [url=https://developer.apple.com/documentation/avfaudio/avaudiosessioncategory]AVAudioSessionCategory[/url] on iOS. Use the [code]Playback[/code] category to get sound output, even if the phone is in silent mode.
 		</member>
 		<member name="audio/general/text_to_speech" type="bool" setter="" getter="" default="false">
-			If [code]true[/code], text-to-speech support is enabled, see [method DisplayServer.tts_get_voices] and [method DisplayServer.tts_speak].
+			If [code]true[/code], text-to-speech support is enabled on startup, otherwise it is enabled first time TTS method is used, see [method DisplayServer.tts_get_voices] and [method DisplayServer.tts_speak].
 			[b]Note:[/b] Enabling TTS can cause addition idle CPU usage and interfere with the sleep mode, so consider disabling it if TTS is not used.
 		</member>
 		<member name="audio/video/video_delay_compensation_ms" type="int" setter="" getter="" default="0">
--- a/platform/android/tts_android.cpp
+++ b/platform/android/tts_android.cpp
@ -49,30 +49,37 @@ jmethodID TTS_Android::_stop_speaking = nullptr;

 HashMap<int, Char16String> TTS_Android::ids;

+void TTS_Android::initialize_tts() {
+	JNIEnv *env = get_jni_env();
+	ERR_FAIL_NULL(env);
+
+	if (_init) {
+		env->CallVoidMethod(tts, _init);
+		initialized = true;
+	}
+}
+
 void TTS_Android::setup(jobject p_tts) {
+	JNIEnv *env = get_jni_env();
+	ERR_FAIL_NULL(env);
+
+	tts = env->NewGlobalRef(p_tts);
+
+	jclass c = env->GetObjectClass(tts);
+	cls = (jclass)env->NewGlobalRef(c);
+
+	_init = env->GetMethodID(cls, "init", "()V");
+	_is_speaking = env->GetMethodID(cls, "isSpeaking", "()Z");
+	_is_paused = env->GetMethodID(cls, "isPaused", "()Z");
+	_get_voices = env->GetMethodID(cls, "getVoices", "()[Ljava/lang/String;");
+	_speak = env->GetMethodID(cls, "speak", "(Ljava/lang/String;Ljava/lang/String;IFFIZ)V");
+	_pause_speaking = env->GetMethodID(cls, "pauseSpeaking", "()V");
+	_resume_speaking = env->GetMethodID(cls, "resumeSpeaking", "()V");
+	_stop_speaking = env->GetMethodID(cls, "stopSpeaking", "()V");
+
 	bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
 	if (tts_enabled) {
-		JNIEnv *env = get_jni_env();
-		ERR_FAIL_NULL(env);
-
-		tts = env->NewGlobalRef(p_tts);
-
-		jclass c = env->GetObjectClass(tts);
-		cls = (jclass)env->NewGlobalRef(c);
-
-		_init = env->GetMethodID(cls, "init", "()V");
-		_is_speaking = env->GetMethodID(cls, "isSpeaking", "()Z");
-		_is_paused = env->GetMethodID(cls, "isPaused", "()Z");
-		_get_voices = env->GetMethodID(cls, "getVoices", "()[Ljava/lang/String;");
-		_speak = env->GetMethodID(cls, "speak", "(Ljava/lang/String;Ljava/lang/String;IFFIZ)V");
-		_pause_speaking = env->GetMethodID(cls, "pauseSpeaking", "()V");
-		_resume_speaking = env->GetMethodID(cls, "resumeSpeaking", "()V");
-		_stop_speaking = env->GetMethodID(cls, "stopSpeaking", "()V");
-
-		if (_init) {
-			env->CallVoidMethod(tts, _init);
-			initialized = true;
-		}
+		initialize_tts();
 	}
 }

@ -80,12 +87,19 @@ void TTS_Android::terminate() {
 	JNIEnv *env = get_jni_env();
 	ERR_FAIL_NULL(env);

-	env->DeleteGlobalRef(cls);
-	env->DeleteGlobalRef(tts);
+	if (cls) {
+		env->DeleteGlobalRef(cls);
+	}
+	if (tts) {
+		env->DeleteGlobalRef(tts);
+	}
 }

 void TTS_Android::_java_utterance_callback(int p_event, int p_id, int p_pos) {
-	ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!initialized)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	if (ids.has(p_id)) {
 		int pos = 0;
 		if ((DisplayServer::TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) {
@ -106,7 +120,10 @@ void TTS_Android::_java_utterance_callback(int p_event, int p_id, int p_pos) {
 }

 bool TTS_Android::is_speaking() {
-	ERR_FAIL_COND_V_MSG(!initialized, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!initialized)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	if (_is_speaking) {
 		JNIEnv *env = get_jni_env();

@ -118,7 +135,10 @@ bool TTS_Android::is_speaking() {
 }

 bool TTS_Android::is_paused() {
-	ERR_FAIL_COND_V_MSG(!initialized, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!initialized)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	if (_is_paused) {
 		JNIEnv *env = get_jni_env();

@ -130,7 +150,10 @@ bool TTS_Android::is_paused() {
 }

 Array TTS_Android::get_voices() {
-	ERR_FAIL_COND_V_MSG(!initialized, Array(), "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!initialized)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, Array());
 	Array list;
 	if (_get_voices) {
 		JNIEnv *env = get_jni_env();
@ -158,7 +181,10 @@ Array TTS_Android::get_voices() {
 }

 void TTS_Android::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
-	ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!initialized)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	if (p_interrupt) {
 		stop();
 	}
@ -183,7 +209,10 @@ void TTS_Android::speak(const String &p_text, const String &p_voice, int p_volum
 }

 void TTS_Android::pause() {
-	ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!initialized)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	if (_pause_speaking) {
 		JNIEnv *env = get_jni_env();

@ -193,7 +222,10 @@ void TTS_Android::pause() {
 }

 void TTS_Android::resume() {
-	ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!initialized)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	if (_resume_speaking) {
 		JNIEnv *env = get_jni_env();

@ -203,7 +235,10 @@ void TTS_Android::resume() {
 }

 void TTS_Android::stop() {
-	ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!initialized)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	for (const KeyValue<int, Char16String> &E : ids) {
 		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E.key);
 	}
--- a/platform/android/tts_android.h
+++ b/platform/android/tts_android.h
@ -54,6 +54,8 @@ class TTS_Android {

 	static HashMap<int, Char16String> ids;

+	static void initialize_tts();
+
 public:
 	static void setup(jobject p_tts);
 	static void terminate();
--- a/platform/ios/display_server_ios.h
+++ b/platform/ios/display_server_ios.h
@ -83,6 +83,8 @@ class DisplayServerIOS : public DisplayServer {

 	void perform_event(const Ref<InputEvent> &p_event);

+	void initialize_tts() const;
+
 	DisplayServerIOS(const String &p_rendering_driver, DisplayServer::WindowMode p_mode, DisplayServer::VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i *p_position, const Vector2i &p_resolution, int p_screen, Context p_context, int64_t p_parent_window, Error &r_error);
 	~DisplayServerIOS();

--- a/platform/ios/display_server_ios.mm
+++ b/platform/ios/display_server_ios.mm
@ -61,7 +61,7 @@ DisplayServerIOS::DisplayServerIOS(const String &p_rendering_driver, WindowMode
 	// Init TTS
 	bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
 	if (tts_enabled) {
-		tts = [[TTS_IOS alloc] init];
+		initialize_tts();
 	}
 	native_menu = memnew(NativeMenu);

@ -389,39 +389,63 @@ bool DisplayServerIOS::has_feature(Feature p_feature) const {
 String DisplayServerIOS::get_name() const {
 	return "iOS";
 }
+void DisplayServerIOS::initialize_tts() const {
+	const_cast<DisplayServerIOS *>(this)->tts = [[TTS_IOS alloc] init];
+}

 bool DisplayServerIOS::tts_is_speaking() const {
-	ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	return [tts isSpeaking];
 }

 bool DisplayServerIOS::tts_is_paused() const {
-	ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	return [tts isPaused];
 }

 TypedArray<Dictionary> DisplayServerIOS::tts_get_voices() const {
-	ERR_FAIL_NULL_V_MSG(tts, TypedArray<Dictionary>(), "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, TypedArray<Dictionary>());
 	return [tts getVoices];
 }

 void DisplayServerIOS::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	[tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt];
 }

 void DisplayServerIOS::tts_pause() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	[tts pauseSpeaking];
 }

 void DisplayServerIOS::tts_resume() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	[tts resumeSpeaking];
 }

 void DisplayServerIOS::tts_stop() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	[tts stopSpeaking];
 }

--- a/platform/linuxbsd/wayland/display_server_wayland.cpp
+++ b/platform/linuxbsd/wayland/display_server_wayland.cpp
@ -245,37 +245,62 @@ String DisplayServerWayland::get_name() const {

 #ifdef SPEECHD_ENABLED

+void DisplayServerWayland::initialize_tts() const {
+	const_cast<DisplayServerWayland *>(this)->tts = memnew(TTS_Linux);
+}
+
 bool DisplayServerWayland::tts_is_speaking() const {
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
 	ERR_FAIL_NULL_V(tts, false);
 	return tts->is_speaking();
 }

 bool DisplayServerWayland::tts_is_paused() const {
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
 	ERR_FAIL_NULL_V(tts, false);
 	return tts->is_paused();
 }

 TypedArray<Dictionary> DisplayServerWayland::tts_get_voices() const {
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
 	ERR_FAIL_NULL_V(tts, TypedArray<Dictionary>());
 	return tts->get_voices();
 }

 void DisplayServerWayland::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
 	ERR_FAIL_NULL(tts);
 	tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
 }

 void DisplayServerWayland::tts_pause() {
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
 	ERR_FAIL_NULL(tts);
 	tts->pause();
 }

 void DisplayServerWayland::tts_resume() {
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
 	ERR_FAIL_NULL(tts);
 	tts->resume();
 }

 void DisplayServerWayland::tts_stop() {
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
 	ERR_FAIL_NULL(tts);
 	tts->stop();
 }
@ -1439,7 +1464,10 @@ DisplayServerWayland::DisplayServerWayland(const String &p_rendering_driver, Win

 #ifdef SPEECHD_ENABLED
 	// Init TTS
-	tts = memnew(TTS_Linux);
+	bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
+	if (tts_enabled) {
+		initialize_tts();
+	}
 #endif

 	rendering_driver = p_rendering_driver;
--- a/platform/linuxbsd/wayland/display_server_wayland.h
+++ b/platform/linuxbsd/wayland/display_server_wayland.h
@ -165,6 +165,8 @@ class DisplayServerWayland : public DisplayServer {

 	void try_suspend();

+	void initialize_tts() const;
+
 public:
 	virtual bool has_feature(Feature p_feature) const override;

--- a/platform/linuxbsd/x11/display_server_x11.cpp
+++ b/platform/linuxbsd/x11/display_server_x11.cpp
@ -340,38 +340,63 @@ void DisplayServerX11::_flush_mouse_motion() {

 #ifdef SPEECHD_ENABLED

+void DisplayServerX11::initialize_tts() const {
+	const_cast<DisplayServerX11 *>(this)->tts = memnew(TTS_Linux);
+}
+
 bool DisplayServerX11::tts_is_speaking() const {
-	ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	return tts->is_speaking();
 }

 bool DisplayServerX11::tts_is_paused() const {
-	ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	return tts->is_paused();
 }

 TypedArray<Dictionary> DisplayServerX11::tts_get_voices() const {
-	ERR_FAIL_NULL_V_MSG(tts, TypedArray<Dictionary>(), "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, TypedArray<Dictionary>());
 	return tts->get_voices();
 }

 void DisplayServerX11::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
 }

 void DisplayServerX11::tts_pause() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	tts->pause();
 }

 void DisplayServerX11::tts_resume() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	tts->resume();
 }

 void DisplayServerX11::tts_stop() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	tts->stop();
 }

@ -6781,7 +6806,7 @@ DisplayServerX11::DisplayServerX11(const String &p_rendering_driver, WindowMode
 	// Init TTS
 	bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
 	if (tts_enabled) {
-		tts = memnew(TTS_Linux);
+		initialize_tts();
 	}
 #endif

--- a/platform/linuxbsd/x11/display_server_x11.h
+++ b/platform/linuxbsd/x11/display_server_x11.h
@ -394,6 +394,8 @@ class DisplayServerX11 : public DisplayServer {
 	void _set_window_taskbar_pager_enabled(Window p_window, bool p_enabled);
 	Rect2i _screens_get_full_rect() const;

+	void initialize_tts() const;
+
 protected:
 	void _window_changed(XEvent *event);

--- a/platform/macos/display_server_macos.h
+++ b/platform/macos/display_server_macos.h
@ -237,6 +237,8 @@ private:

 	Error _file_dialog_with_options_show(const String &p_title, const String &p_current_directory, const String &p_root, const String &p_filename, bool p_show_hidden, FileDialogMode p_mode, const Vector<String> &p_filters, const TypedArray<Dictionary> &p_options, const Callable &p_callback, bool p_options_in_cb, WindowID p_window_id);

+	void initialize_tts() const;
+
 public:
 	void menu_callback(id p_sender);

--- a/platform/macos/display_server_macos.mm
+++ b/platform/macos/display_server_macos.mm
@ -858,38 +858,63 @@ Callable DisplayServerMacOS::_help_get_action_callback() const {
 	return help_action_callback;
 }

+void DisplayServerMacOS::initialize_tts() const {
+	const_cast<DisplayServerMacOS *>(this)->tts = [[TTS_MacOS alloc] init];
+}
+
 bool DisplayServerMacOS::tts_is_speaking() const {
-	ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	return [tts isSpeaking];
 }

 bool DisplayServerMacOS::tts_is_paused() const {
-	ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	return [tts isPaused];
 }

 TypedArray<Dictionary> DisplayServerMacOS::tts_get_voices() const {
-	ERR_FAIL_NULL_V_MSG(tts, Array(), "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, TypedArray<Dictionary>());
 	return [tts getVoices];
 }

 void DisplayServerMacOS::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	[tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt];
 }

 void DisplayServerMacOS::tts_pause() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	[tts pauseSpeaking];
 }

 void DisplayServerMacOS::tts_resume() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	[tts resumeSpeaking];
 }

 void DisplayServerMacOS::tts_stop() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	[tts stopSpeaking];
 }

@ -3753,7 +3778,7 @@ DisplayServerMacOS::DisplayServerMacOS(const String &p_rendering_driver, WindowM
 	// Init TTS
 	bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
 	if (tts_enabled) {
-		tts = [[TTS_MacOS alloc] init];
+		initialize_tts();
 	}

 	native_menu = memnew(NativeMenuMacOS);
--- a/platform/web/display_server_web.cpp
+++ b/platform/web/display_server_web.cpp
@ -384,12 +384,10 @@ const char *DisplayServerWeb::godot2dom_cursor(DisplayServer::CursorShape p_shap
 }

 bool DisplayServerWeb::tts_is_speaking() const {
-	ERR_FAIL_COND_V_MSG(!tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
 	return godot_js_tts_is_speaking();
 }

 bool DisplayServerWeb::tts_is_paused() const {
-	ERR_FAIL_COND_V_MSG(!tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
 	return godot_js_tts_is_paused();
 }

@ -424,13 +422,11 @@ void DisplayServerWeb::_update_voices_callback(const Vector<String> &p_voices) {
 }

 TypedArray<Dictionary> DisplayServerWeb::tts_get_voices() const {
-	ERR_FAIL_COND_V_MSG(!tts, TypedArray<Dictionary>(), "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
 	godot_js_tts_get_voices(update_voices_callback);
 	return voices;
 }

 void DisplayServerWeb::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
-	ERR_FAIL_COND_MSG(!tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
 	if (p_interrupt) {
 		tts_stop();
 	}
@ -447,17 +443,14 @@ void DisplayServerWeb::tts_speak(const String &p_text, const String &p_voice, in
 }

 void DisplayServerWeb::tts_pause() {
-	ERR_FAIL_COND_MSG(!tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
 	godot_js_tts_pause();
 }

 void DisplayServerWeb::tts_resume() {
-	ERR_FAIL_COND_MSG(!tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
 	godot_js_tts_resume();
 }

 void DisplayServerWeb::tts_stop() {
-	ERR_FAIL_COND_MSG(!tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
 	for (const KeyValue<int, CharString> &E : utterance_ids) {
 		tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E.key);
 	}
@ -1086,7 +1079,6 @@ DisplayServer *DisplayServerWeb::create_func(const String &p_rendering_driver, W
 DisplayServerWeb::DisplayServerWeb(const String &p_rendering_driver, WindowMode p_window_mode, VSyncMode p_vsync_mode, uint32_t p_flags, const Point2i *p_position, const Size2i &p_resolution, int p_screen, Context p_context, int64_t p_parent_window, Error &r_error) {
 	r_error = OK; // Always succeeds for now.

-	tts = GLOBAL_GET("audio/general/text_to_speech");
 	native_menu = memnew(NativeMenu); // Dummy native menu.

 	// Ensure the canvas ID.
@ -1199,7 +1191,7 @@ bool DisplayServerWeb::has_feature(Feature p_feature) const {
 		case FEATURE_VIRTUAL_KEYBOARD:
 			return godot_js_display_vk_available() != 0;
 		case FEATURE_TEXT_TO_SPEECH:
-			return tts && (godot_js_display_tts_available() != 0);
+			return godot_js_display_tts_available() != 0;
 		default:
 			return false;
 	}
--- a/platform/web/display_server_web.h
+++ b/platform/web/display_server_web.h
@ -102,7 +102,6 @@ private:
 	int key_event_pos = 0;

 	bool swap_cancel_ok = false;
-	bool tts = false;
 	NativeMenu *native_menu = nullptr;

 	MouseMode mouse_mode_base = MOUSE_MODE_VISIBLE;
--- a/platform/windows/display_server_windows.cpp
+++ b/platform/windows/display_server_windows.cpp
@ -254,38 +254,63 @@ void DisplayServerWindows::_register_raw_input_devices(WindowID p_target_window)
 	}
 }

+void DisplayServerWindows::initialize_tts() const {
+	const_cast<DisplayServerWindows *>(this)->tts = memnew(TTS_Windows);
+}
+
 bool DisplayServerWindows::tts_is_speaking() const {
-	ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	return tts->is_speaking();
 }

 bool DisplayServerWindows::tts_is_paused() const {
-	ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, false);
 	return tts->is_paused();
 }

 TypedArray<Dictionary> DisplayServerWindows::tts_get_voices() const {
-	ERR_FAIL_NULL_V_MSG(tts, TypedArray<Dictionary>(), "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL_V(tts, TypedArray<Dictionary>());
 	return tts->get_voices();
 }

 void DisplayServerWindows::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
 }

 void DisplayServerWindows::tts_pause() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	tts->pause();
 }

 void DisplayServerWindows::tts_resume() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	tts->resume();
 }

 void DisplayServerWindows::tts_stop() {
-	ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
+	if (unlikely(!tts)) {
+		initialize_tts();
+	}
+	ERR_FAIL_NULL(tts);
 	tts->stop();
 }

@ -6626,7 +6651,7 @@ DisplayServerWindows::DisplayServerWindows(const String &p_rendering_driver, Win
 	// Init TTS
 	bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
 	if (tts_enabled) {
-		tts = memnew(TTS_Windows);
+		initialize_tts();
 	}
 	native_menu = memnew(NativeMenuWindows);

--- a/platform/windows/display_server_windows.h
+++ b/platform/windows/display_server_windows.h
@ -687,6 +687,8 @@ class DisplayServerWindows : public DisplayServer {

 	HWND _find_window_from_process_id(OS::ProcessID p_pid, HWND p_current_hwnd);

+	void initialize_tts() const;
+
 public:
 	LRESULT WndProcFileDialog(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam);
 	LRESULT WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam);