aboutsummaryrefslogtreecommitdiffstats
path: root/agl_service_voiceagent/protos/voice_agent.proto
blob: 40dfe6aeab126bfe2047d370995b428223b695a6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
syntax = "proto3";


service VoiceAgentService {
  rpc CheckServiceStatus(Empty) returns (ServiceStatus);
  rpc S_DetectWakeWord(stream VoiceAudio) returns (stream WakeWordStatus); // Stream version of DetectWakeWord, assumes audio is coming from client
  rpc DetectWakeWord(Empty) returns (stream WakeWordStatus);
  rpc S_RecognizeVoiceCommand(stream S_RecognizeVoiceControl) returns (RecognizeResult); // Stream version of RecognizeVoiceCommand, assumes audio is coming from client
  rpc RecognizeVoiceCommand(stream RecognizeVoiceControl) returns (RecognizeResult);
  rpc RecognizeTextCommand(RecognizeTextControl) returns (RecognizeResult);
  rpc ExecuteCommand(ExecuteInput) returns (ExecuteResult);
}


enum RecordAction {
  START = 0;
  STOP = 1;
}

enum NLUModel {
  SNIPS = 0;
  RASA = 1;
}

enum RecordMode {
  MANUAL = 0;
  AUTO = 1;
}

enum RecognizeStatusType {
  REC_ERROR = 0;
  REC_SUCCESS = 1;
  REC_PROCESSING = 2;
  VOICE_NOT_RECOGNIZED = 3;
  INTENT_NOT_RECOGNIZED = 4;
  TEXT_NOT_RECOGNIZED = 5;
  NLU_MODEL_NOT_SUPPORTED = 6;
}

enum ExecuteStatusType {
  EXEC_ERROR = 0;
  EXEC_SUCCESS = 1;
  KUKSA_CONN_ERROR = 2;
  INTENT_NOT_SUPPORTED = 3;
  INTENT_SLOTS_INCOMPLETE = 4;
}


message Empty {}

message ServiceStatus {
  string version = 1;
  bool status = 2;  
  string wake_word = 3;
}

message VoiceAudio {
  bytes audio_chunk = 1;
  string audio_format = 2;
  int32 sample_rate = 3;
  string language = 4;
}

message WakeWordStatus {
  bool status = 1;
}

message S_RecognizeVoiceControl {
  VoiceAudio audio_stream = 1;
  NLUModel nlu_model = 2;
  string stream_id = 3;
}

message RecognizeVoiceControl {
  RecordAction action = 1;
  NLUModel nlu_model = 2;
  RecordMode record_mode = 3;
  string stream_id = 4;
}

message RecognizeTextControl {
  string text_command = 1;
  NLUModel nlu_model = 2;
}

message IntentSlot {
  string name = 1;
  string value = 2;
}

message RecognizeResult {
  string command = 1;
  string intent = 2;
  repeated IntentSlot intent_slots = 3;
  string stream_id = 4;
  RecognizeStatusType status = 5;
}

message ExecuteInput {
  string intent = 1;
  repeated IntentSlot intent_slots = 2;
}

message ExecuteResult {
  string response = 1;
  ExecuteStatusType status = 2;
}