1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
|
from __future__ import absolute_import, print_function, unicode_literals
from abc import ABCMeta, abstractmethod
from builtins import object
from io import IOBase
from future.utils import with_metaclass
from snips_inference_agl.constants import DATA, ENTITY, SLOT_NAME, TEXT, UTTERANCES
from snips_inference_agl.exceptions import IntentFormatError
class Intent(object):
"""Intent data of a :class:`.Dataset`
Attributes:
intent_name (str): name of the intent
utterances (list of :class:`.IntentUtterance`): annotated intent
utterances
slot_mapping (dict): mapping between slot names and entities
"""
def __init__(self, intent_name, utterances, slot_mapping=None):
if slot_mapping is None:
slot_mapping = dict()
self.intent_name = intent_name
self.utterances = utterances
self.slot_mapping = slot_mapping
self._complete_slot_name_mapping()
self._ensure_entity_names()
@classmethod
def from_yaml(cls, yaml_dict):
"""Build an :class:`.Intent` from its YAML definition object
Args:
yaml_dict (dict or :class:`.IOBase`): object containing the YAML
definition of the intent. It can be either a stream, or the
corresponding python dict.
Examples:
An intent can be defined with a YAML document following the schema
illustrated in the example below:
>>> import io
>>> from snips_inference_agl.common.utils import json_string
>>> intent_yaml = io.StringIO('''
... # searchFlight Intent
... ---
... type: intent
... name: searchFlight
... slots:
... - name: origin
... entity: city
... - name: destination
... entity: city
... - name: date
... entity: snips/datetime
... utterances:
... - find me a flight from [origin](Oslo) to [destination](Lima)
... - I need a flight leaving to [destination](Berlin)''')
>>> intent = Intent.from_yaml(intent_yaml)
>>> print(json_string(intent.json, indent=4, sort_keys=True))
{
"utterances": [
{
"data": [
{
"text": "find me a flight from "
},
{
"entity": "city",
"slot_name": "origin",
"text": "Oslo"
},
{
"text": " to "
},
{
"entity": "city",
"slot_name": "destination",
"text": "Lima"
}
]
},
{
"data": [
{
"text": "I need a flight leaving to "
},
{
"entity": "city",
"slot_name": "destination",
"text": "Berlin"
}
]
}
]
}
Raises:
IntentFormatError: When the YAML dict does not correspond to the
:ref:`expected intent format <yaml_intent_format>`
"""
if isinstance(yaml_dict, IOBase):
from snips_inference_agl.dataset.yaml_wrapper import yaml
yaml_dict = yaml.safe_load(yaml_dict)
object_type = yaml_dict.get("type")
if object_type and object_type != "intent":
raise IntentFormatError("Wrong type: '%s'" % object_type)
intent_name = yaml_dict.get("name")
if not intent_name:
raise IntentFormatError("Missing 'name' attribute")
slot_mapping = dict()
for slot in yaml_dict.get("slots", []):
slot_mapping[slot["name"]] = slot["entity"]
utterances = [IntentUtterance.parse(u.strip())
for u in yaml_dict["utterances"] if u.strip()]
if not utterances:
raise IntentFormatError(
"Intent must contain at least one utterance")
return cls(intent_name, utterances, slot_mapping)
def _complete_slot_name_mapping(self):
for utterance in self.utterances:
for chunk in utterance.slot_chunks:
if chunk.entity and chunk.slot_name not in self.slot_mapping:
self.slot_mapping[chunk.slot_name] = chunk.entity
return self
def _ensure_entity_names(self):
for utterance in self.utterances:
for chunk in utterance.slot_chunks:
if chunk.entity:
continue
chunk.entity = self.slot_mapping.get(
chunk.slot_name, chunk.slot_name)
return self
@property
def json(self):
"""Intent data in json format"""
return {
UTTERANCES: [
{DATA: [chunk.json for chunk in utterance.chunks]}
for utterance in self.utterances
]
}
@property
def entities_names(self):
return set(chunk.entity for u in self.utterances
for chunk in u.chunks if isinstance(chunk, SlotChunk))
class IntentUtterance(object):
def __init__(self, chunks):
self.chunks = chunks
@property
def text(self):
return "".join((chunk.text for chunk in self.chunks))
@property
def slot_chunks(self):
return (chunk for chunk in self.chunks if isinstance(chunk, SlotChunk))
@classmethod
def parse(cls, string):
"""Parses an utterance
Args:
string (str): an utterance in the class:`.Utterance` format
Examples:
>>> from snips_inference_agl.dataset.intent import IntentUtterance
>>> u = IntentUtterance.\
parse("president of [country:default](France)")
>>> u.text
'president of France'
>>> len(u.chunks)
2
>>> u.chunks[0].text
'president of '
>>> u.chunks[1].slot_name
'country'
>>> u.chunks[1].entity
'default'
"""
sm = SM(string)
capture_text(sm)
return cls(sm.chunks)
class Chunk(with_metaclass(ABCMeta, object)):
def __init__(self, text):
self.text = text
@abstractmethod
def json(self):
pass
class SlotChunk(Chunk):
def __init__(self, slot_name, entity, text):
super(SlotChunk, self).__init__(text)
self.slot_name = slot_name
self.entity = entity
@property
def json(self):
return {
TEXT: self.text,
SLOT_NAME: self.slot_name,
ENTITY: self.entity,
}
class TextChunk(Chunk):
@property
def json(self):
return {
TEXT: self.text
}
class SM(object):
"""State Machine for parsing"""
def __init__(self, input):
self.input = input
self.chunks = []
self.current = 0
@property
def end_of_input(self):
return self.current >= len(self.input)
def add_slot(self, name, entity=None):
"""Adds a named slot
Args:
name (str): slot name
entity (str): entity name
"""
chunk = SlotChunk(slot_name=name, entity=entity, text=None)
self.chunks.append(chunk)
def add_text(self, text):
"""Adds a simple text chunk using the current position"""
chunk = TextChunk(text=text)
self.chunks.append(chunk)
def add_tagged(self, text):
"""Adds text to the last slot"""
if not self.chunks:
raise AssertionError("Cannot add tagged text because chunks list "
"is empty")
self.chunks[-1].text = text
def find(self, s):
return self.input.find(s, self.current)
def move(self, pos):
"""Moves the cursor of the state to position after given
Args:
pos (int): position to place the cursor just after
"""
self.current = pos + 1
def peek(self):
if self.end_of_input:
return None
return self[0]
def read(self):
c = self[0]
self.current += 1
return c
def __getitem__(self, key):
current = self.current
if isinstance(key, int):
return self.input[current + key]
elif isinstance(key, slice):
start = current + key.start if key.start else current
return self.input[slice(start, key.stop, key.step)]
else:
raise TypeError("Bad key type: %s" % type(key))
def capture_text(state):
next_pos = state.find('[')
sub = state[:] if next_pos < 0 else state[:next_pos]
if sub:
state.add_text(sub)
if next_pos >= 0:
state.move(next_pos)
capture_slot(state)
def capture_slot(state):
next_colon_pos = state.find(':')
next_square_bracket_pos = state.find(']')
if next_square_bracket_pos < 0:
raise IntentFormatError(
"Missing ending ']' in annotated utterance \"%s\"" % state.input)
if next_colon_pos < 0 or next_square_bracket_pos < next_colon_pos:
slot_name = state[:next_square_bracket_pos]
state.move(next_square_bracket_pos)
state.add_slot(slot_name)
else:
slot_name = state[:next_colon_pos]
state.move(next_colon_pos)
entity = state[:next_square_bracket_pos]
state.move(next_square_bracket_pos)
state.add_slot(slot_name, entity)
if state.peek() == '(':
state.read()
capture_tagged(state)
else:
capture_text(state)
def capture_tagged(state):
next_pos = state.find(')')
if next_pos < 1:
raise IntentFormatError(
"Missing ending ')' in annotated utterance \"%s\"" % state.input)
else:
tagged_text = state[:next_pos]
state.add_tagged(tagged_text)
state.move(next_pos)
capture_text(state)
|