diff options
author | Anuj Solanki <anuj603362@gmail.com> | 2024-06-16 18:49:45 +0530 |
---|---|---|
committer | Anuj Solanki <anuj603362@gmail.com> | 2024-09-07 20:16:14 +0530 |
commit | 1144fcd343bc56f8c27ff73d3e76904010dbb832 (patch) | |
tree | 490915cd969f19b4eb3b3dd480554b27c1058243 /agl_service_voiceagent/protos | |
parent | f2b62ba4da5a178221c3210c2d468cd684e626cc (diff) |
Integrate Whisper AI into agl-service-voiceagent
V1:
- Integrated Whisper AI for speech-to-text functionality into
agl-service-voiceagent.
- Add support for both online and offline mode.
- Implemented a gRPC-based connection for online mode between
Whisper ASR service and voice-agent service.
V2:
- Update kuksa-interface
- Add whisper-cpp for speech-to-text functionality
- Add support to control media using mpd
- Fix audio recorder
Bug-AGL: SPEC-5200
Change-Id: I2661ae61ba2c3283bcfde26d6e4f498270240b19
Signed-off-by: Anuj Solanki <anuj603362@gmail.com>
Diffstat (limited to 'agl_service_voiceagent/protos')
-rw-r--r-- | agl_service_voiceagent/protos/audio_processing.proto | 23 | ||||
-rw-r--r-- | agl_service_voiceagent/protos/voice_agent.proto | 12 |
2 files changed, 35 insertions, 0 deletions
diff --git a/agl_service_voiceagent/protos/audio_processing.proto b/agl_service_voiceagent/protos/audio_processing.proto new file mode 100644 index 0000000..edacc04 --- /dev/null +++ b/agl_service_voiceagent/protos/audio_processing.proto @@ -0,0 +1,23 @@ +// proto file for audio processing service for whiisper online service + +syntax = "proto3"; + +package audioproc; + +service AudioProcessing { + // Sends audio data and receives processed text. + rpc ProcessAudio (AudioRequest) returns (TextResponse); +} + +// The request message containing the audio data. +message AudioRequest { + bytes audio_data = 1; +} + +// The response message containing the processed text. +message TextResponse { + string text = 1; +} + +// usage: +// python -m grpc_tools.protoc -I. --python_out=./generated/ --grpc_python_out=./generated/ audio_processing.proto
\ No newline at end of file diff --git a/agl_service_voiceagent/protos/voice_agent.proto b/agl_service_voiceagent/protos/voice_agent.proto index 40dfe6a..bd2daa2 100644 --- a/agl_service_voiceagent/protos/voice_agent.proto +++ b/agl_service_voiceagent/protos/voice_agent.proto @@ -11,6 +11,15 @@ service VoiceAgentService { rpc ExecuteCommand(ExecuteInput) returns (ExecuteResult); } +enum STTFramework { + VOSK = 0; + WHISPER = 1; +} + +enum OnlineMode { + ONLINE = 0; + OFFLINE = 1; +} enum RecordAction { START = 0; @@ -69,6 +78,7 @@ message S_RecognizeVoiceControl { VoiceAudio audio_stream = 1; NLUModel nlu_model = 2; string stream_id = 3; + STTFramework stt_framework = 4; } message RecognizeVoiceControl { @@ -76,6 +86,8 @@ message RecognizeVoiceControl { NLUModel nlu_model = 2; RecordMode record_mode = 3; string stream_id = 4; + STTFramework stt_framework = 5; + OnlineMode online_mode = 6; } message RecognizeTextControl { |