Update voice agent service

Add new features such as an option to load service using an external config file, enhanced kuksa client, and a more robust mapper. Signed-off-by: Malik Talha <talhamalik727x@gmail.com> Change-Id: Iba3cfd234c0aabad67b293669d456bb73d8e3135
author: Malik Talha <talhamalik727x@gmail.com> 2023-10-29 20:52:29 +0500
committer: Malik Talha <talhamalik727x@gmail.com> 2023-10-29 20:52:29 +0500
commit: 42a03d2550f60a8064078f19a743afb944f9ff69 (patch)
tree: c9a7b3d028737d5fecd2e05f69e1c744810ed5fb /agl_service_voiceagent/utils/stt_model.py
parent: a10c988b5480ca5b937a2793b450cfa01f569d76 (diff)
1 files changed, 58 insertions, 25 deletions
diff --git a/agl_service_voiceagent/utils/stt_model.py b/agl_service_voiceagent/utils/stt_model.py
index 5337162..d51ae31 100644
--- a/agl_service_voiceagent/utils/stt_model.py
+++ b/agl_service_voiceagent/utils/stt_model.py
@@ -21,21 +21,61 @@ import wave
 from agl_service_voiceagent.utils.common import generate_unique_uuid
 
 class STTModel:
+    """
+    STTModel is a class for speech-to-text (STT) recognition using the Vosk speech recognition library.
+    """
+
     def __init__(self, model_path, sample_rate=16000):
+        """
+        Initialize the STTModel instance with the provided model and sample rate.
+
+        Args:
+            model_path (str): The path to the Vosk speech recognition model.
+            sample_rate (int, optional): The audio sample rate in Hz (default is 16000).
+        """
         self.sample_rate = sample_rate
         self.model = vosk.Model(model_path)
         self.recognizer = {}
         self.chunk_size = 1024
     
+
     def setup_recognizer(self):
+        """
+        Set up a Vosk recognizer for a new session and return a unique identifier (UUID) for the session.
+
+        Returns:
+            str: A unique identifier (UUID) for the session.
+        """
         uuid = generate_unique_uuid(6)
         self.recognizer[uuid] = vosk.KaldiRecognizer(self.model, self.sample_rate)
         return uuid
 
+
     def init_recognition(self, uuid, audio_data):
+        """
+        Initialize the Vosk recognizer for a session with audio data.
+
+        Args:
+            uuid (str): The unique identifier (UUID) for the session.
+            audio_data (bytes): Audio data to process.
+
+        Returns:
+            bool: True if initialization was successful, False otherwise.
+        """
         return self.recognizer[uuid].AcceptWaveform(audio_data)
 
+
     def recognize(self, uuid, partial=False):
+        """
+        Recognize speech and return the result as a JSON object.
+
+        Args:
+            uuid (str): The unique identifier (UUID) for the session.
+            partial (bool, optional): If True, return partial recognition results (default is False).
+
+        Returns:
+            dict: A JSON object containing recognition results.
+        """
         self.recognizer[uuid].SetWords(True)
         if partial:
             result = json.loads(self.recognizer[uuid].PartialResult())
@@ -44,7 +84,18 @@ class STTModel:
             self.recognizer[uuid].Reset()
         return result
     
+
     def recognize_from_file(self, uuid, filename):
+        """
+        Recognize speech from an audio file and return the recognized text.
+
+        Args:
+            uuid (str): The unique identifier (UUID) for the session.
+            filename (str): The path to the audio file.
+
+        Returns:
+            str: The recognized text or error messages.
+        """
         if not os.path.exists(filename):
             print(f"Audio file '{filename}' not found.")
             return "FILE_NOT_FOUND"
@@ -75,31 +126,13 @@ class STTModel:
             print("Voice not recognized. Please speak again...")
             return "VOICE_NOT_RECOGNIZED"
     
-    def cleanup_recognizer(self, uuid):
-        del self.recognizer[uuid]
 
-import wave
-
-def read_wav_file(filename, chunk_size=1024):
-    try:
-        wf = wave.open(filename, "rb")
-        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
-            print("Audio file must be WAV format mono PCM.")
-            return "FILE_FORMAT_INVALID"
-
-        audio_data = b""  # Initialize an empty bytes object to store audio data
-        while True:
-            chunk = wf.readframes(chunk_size)
-            if not chunk:
-                break  # End of file reached
-            audio_data += chunk
-
-        return audio_data
-    except Exception as e:
-        print(f"Error reading audio file: {e}")
-        return None
+    def cleanup_recognizer(self, uuid):
+        """
+        Clean up and remove the Vosk recognizer for a session.
 
-# Example usage:
-filename = "your_audio.wav"
-audio_data = read_wav_file(filename)
+        Args:
+            uuid (str): The unique identifier (UUID) for the session.
+        """
+        del self.recognizer[uuid]
     
 \ No newline at end of file
author	Malik Talha <talhamalik727x@gmail.com>	2023-10-29 20:52:29 +0500
committer	Malik Talha <talhamalik727x@gmail.com>	2023-10-29 20:52:29 +0500
commit	42a03d2550f60a8064078f19a743afb944f9ff69 (patch)
tree	c9a7b3d028737d5fecd2e05f69e1c744810ed5fb /agl_service_voiceagent/utils/stt_model.py
parent	a10c988b5480ca5b937a2793b450cfa01f569d76 (diff)