aboutsummaryrefslogtreecommitdiffstats
path: root/agl_service_voiceagent/utils
diff options
context:
space:
mode:
Diffstat (limited to 'agl_service_voiceagent/utils')
-rw-r--r--agl_service_voiceagent/utils/audio_recorder.py4
-rw-r--r--agl_service_voiceagent/utils/stt_model.py44
2 files changed, 25 insertions, 23 deletions
diff --git a/agl_service_voiceagent/utils/audio_recorder.py b/agl_service_voiceagent/utils/audio_recorder.py
index 49716c9..e362480 100644
--- a/agl_service_voiceagent/utils/audio_recorder.py
+++ b/agl_service_voiceagent/utils/audio_recorder.py
@@ -64,7 +64,7 @@ class AudioRecorder:
"""
print("Creating pipeline for audio recording in {} mode...".format(self.mode))
self.pipeline = Gst.Pipeline()
- autoaudiosrc = Gst.ElementFactory.make("autoaudiosrc", None)
+ autoaudiosrc = Gst.ElementFactory.make("alsasrc", None)
queue = Gst.ElementFactory.make("queue", None)
queue.set_property("max-size-buffers", 0)
queue.set_property("max-size-bytes", 0)
@@ -109,6 +109,7 @@ class AudioRecorder:
Start recording audio using the GStreamer pipeline.
"""
self.pipeline.set_state(Gst.State.PLAYING)
+ self.loop.run()
print("Recording Voice Input...")
@@ -186,3 +187,4 @@ class AudioRecorder:
print("Pipeline cleanup complete!")
self.bus = None
self.pipeline = None
+ self.loop.quit() \ No newline at end of file
diff --git a/agl_service_voiceagent/utils/stt_model.py b/agl_service_voiceagent/utils/stt_model.py
index 7e8ad8b..0a092ea 100644
--- a/agl_service_voiceagent/utils/stt_model.py
+++ b/agl_service_voiceagent/utils/stt_model.py
@@ -21,7 +21,7 @@ import wave
from agl_service_voiceagent.utils.common import generate_unique_uuid
# import the whisper model
-import whisper
+# import whisper
# for whisper timeout feature
from concurrent.futures import ThreadPoolExecutor
import subprocess
@@ -93,28 +93,28 @@ class STTModel:
return result
# Recognize speech using the whisper model
- def recognize_using_whisper(self,filename,language = None,timeout = 5,fp16=False):
- """
- Recognize speech and return the result as a JSON object.
-
- Args:
- filename (str): The path to the audio file.
- timeout (int, optional): The timeout for recognition (default is 5 seconds).
- fp16 (bool, optional): If True, use 16-bit floating point precision, (default is False) because cuda is not supported.
- language (str, optional): The language code for recognition (default is None).
-
- Returns:
- dict: A JSON object containing recognition results.
- """
- def transcribe_with_whisper():
- return self.whisper_model.transcribe(filename, language = language,fp16=fp16)
+ # def recognize_using_whisper(self,filename,language = None,timeout = 5,fp16=False):
+ # """
+ # Recognize speech and return the result as a JSON object.
+
+ # Args:
+ # filename (str): The path to the audio file.
+ # timeout (int, optional): The timeout for recognition (default is 5 seconds).
+ # fp16 (bool, optional): If True, use 16-bit floating point precision, (default is False) because cuda is not supported.
+ # language (str, optional): The language code for recognition (default is None).
+
+ # Returns:
+ # dict: A JSON object containing recognition results.
+ # """
+ # def transcribe_with_whisper():
+ # return self.whisper_model.transcribe(filename, language = language,fp16=fp16)
- with ThreadPoolExecutor() as executor:
- future = executor.submit(transcribe_with_whisper)
- try:
- return future.result(timeout=timeout)
- except TimeoutError:
- return {"error": "Transcription with Whisper exceeded the timeout."}
+ # with ThreadPoolExecutor() as executor:
+ # future = executor.submit(transcribe_with_whisper)
+ # try:
+ # return future.result(timeout=timeout)
+ # except TimeoutError:
+ # return {"error": "Transcription with Whisper exceeded the timeout."}
def recognize_using_whisper_cpp(self,filename):
command = self.whisper_cpp_path