/
action_string_converter.py
538 lines (440 loc) · 21.3 KB
/
action_string_converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
import re
from abc import abstractmethod, ABC
from typing import List, Optional, Tuple
from .actions import NoAction, Yield, Wash, Wait, Stir, SetTemperature, Reflux, Quench, Purify, PhaseSeparation, PH, \
Partition, Filter, Extract, Degas, Concentrate, InvalidAction, Add, Action, CollectLayer, Chemical, MakeSolution, \
Sonicate, Microwave, FollowOtherProcedure, DrySolid, DrySolution, Triturate, Recrystallize, OtherLanguage
from .utils import get_all_action_types
class ActionStringConversionError(ValueError):
def __init__(self, action_string: str):
super().__init__(f'Conversion from string to actions failed for "{action_string}".')
class ActionStringConverter(ABC):
"""
Base class for the conversion of our custom action classes to and from strings.
"""
def __init__(self):
for action_name in get_all_action_types():
assert self.action_type_supported(
action_name
), f'Action "{action_name}" is not supported by the converter'
@abstractmethod
def action_type_supported(self, action_type: str) -> bool:
"""
Whether the conversion to and from a given action type is supported by the converter.
"""
@abstractmethod
def actions_to_string(self, actions: List[Action]) -> str:
"""
Converts a list of actions to a string representation.
"""
@abstractmethod
def string_to_actions(self, action_string: str) -> List[Action]:
"""
Converts a string representation to the corresponding list of actions.
"""
class ReprConverter(ActionStringConverter):
"""
Action-String converter based on the `repr` form of the action instances.
"""
def action_type_supported(self, action_type: str) -> bool:
return action_type in get_all_action_types()
def actions_to_string(self, actions: List[Action]) -> str:
return repr(actions)
def string_to_actions(self, action_string: str) -> List[Action]:
try:
return eval(action_string)
except Exception as e:
raise ActionStringConversionError(action_string) from e
class ReadableConverter(ActionStringConverter):
"""
Action-String converter based on the format used initially for the translation task:
For instance:
> FILTERFILTRATE; EXTRACT with chloroform (quantity unspecified).
"""
def __init__(self, separator: str = '; ', end_mark: str = '.'):
"""
Args:
separator: string that will be inserted between the action strings.
end_mark: string that will be added at the end of the action string.
"""
super().__init__()
self.separator = separator
self.end_mark = end_mark
assert len(self.separator) > 1
# If a compound name / duration contains the separator, the string
# will be modified by adding a no-break space between the first and
# second character.
self.separator_substitute = (self.separator[:1] + '\u200C' + self.separator[1:])
def action_type_supported(self, action_type: str) -> bool:
from_method_exists = self._get_from_method(action_type) is not None
to_method_exists = self._get_to_method(action_type) is not None
return from_method_exists and to_method_exists
def actions_to_string(self, actions: List[Action]) -> str:
action_strings = (self.action_to_string(a) for a in actions)
return self.separator.join(action_strings) + self.end_mark
def string_to_actions(self, action_string: str) -> List[Action]:
try:
if self.end_mark:
# remove last dot (or other end mark)
action_string = action_string[:-len(self.end_mark)]
if not action_string:
return []
action_strings = action_string.split(self.separator)
return [self.string_to_action(action_string) for action_string in action_strings]
except Exception as e:
raise ActionStringConversionError(action_string) from e
def action_to_string(self, action: Action) -> str:
action_string = self._get_from_method(action.action_name)(action)
# replace normal space by no-break space if there is '; ' in the string,
# as it would lead to an error in the back-conversion
action_string = action_string.replace(self.separator, self.separator_substitute)
return action_string
def string_to_action(self, action_string: str) -> Action:
# replace no-break space (probably introduced in action_to_string) by normal space
action_string = action_string.replace(self.separator_substitute, self.separator)
action_type = action_string.split(' ', 1)[0]
return self._get_to_method(action_type)(action_string)
def _uppercase_action_name(self, action: Action) -> str:
return action.action_name.upper()
def _chemical_to_string(self, c: Chemical) -> str:
"""
Convert a chemical to a string
"""
# if there is a parenthesis after a space, it may mess up for the
# back-conversion of the quantities
compound_name = c.name.replace(' (', ' \u200C(')
if not c.quantity:
return compound_name
else:
quantity_str = ', '.join(c.quantity)
return f'{compound_name} ({quantity_str})'
def _get_from_method(self, action_type: str):
method_name = '_from_' + action_type.lower()
method = getattr(self, method_name, None)
if method is None:
raise ValueError(f'Cannot find method to convert "{action_type}" action to string')
return method
def _get_to_method(self, action_type: str):
method_name = '_to_' + action_type.lower()
method = getattr(self, method_name, None)
if method is None:
raise ValueError(
f'Cannot find method to convert "{action_type}" action string to an actual action'
)
return method
def _from_add(self, action: Add) -> str:
s = f'{self._uppercase_action_name(action)} {self._chemical_to_string(action.material)}'
if action.dropwise:
s += ' dropwise'
if action.temperature:
s += f' at {action.temperature}'
if action.atmosphere:
s += f' under {action.atmosphere}'
if action.duration:
s += f' over {action.duration}'
return s
def _to_add(self, action_text: str) -> Action:
remaining, duration = self._get_property_from_split(action_text, 'over')
remaining, atmosphere = self._get_property_from_split(remaining, 'under')
remaining, temperature = self._get_property_from_split(remaining, 'at')
dropwise = ' dropwise' in remaining
remaining = remaining.replace(' dropwise', '')
chemical = self._get_chemical(remaining, 'ADD ')
return Add(
material=chemical,
dropwise=dropwise,
temperature=temperature,
atmosphere=atmosphere,
duration=duration
)
def _from_invalidaction(self, action: InvalidAction) -> str:
s = f'{self._uppercase_action_name(action)}'
if action.error:
s += f' {action.error}'
return s
def _to_invalidaction(self, action_text: str) -> Action:
m = re.match('INVALIDACTION (.*)', action_text)
if m is None:
# if there is no match, it means that there was no error message
return InvalidAction()
return InvalidAction(error=m.group(1))
def _from_collectlayer(self, action: CollectLayer) -> str:
return f'{self._uppercase_action_name(action)} {action.layer}'
def _to_collectlayer(self, action_text: str) -> Action:
layer = self._re_match(r'COLLECTLAYER (.*)', action_text)
return CollectLayer(layer=layer)
def _from_concentrate(self, action: Concentrate) -> str:
return self._uppercase_action_name(action)
def _to_concentrate(self, action_text: str) -> Action:
return Concentrate()
def _from_degas(self, action: Degas) -> str:
return f'{self._uppercase_action_name(action)} with {action.gas} for {action.duration}'
def _to_degas(self, action_text: str) -> Action:
remaining, duration = self._get_property_from_split(action_text, 'for')
material = self._re_match(r'DEGAS with (.*)', remaining)
return Degas(gas=material, duration=duration)
def _from_drysolid(self, action: DrySolid) -> str:
s = f'{self._uppercase_action_name(action)}'
if action.duration:
s += f' for {action.duration}'
if action.temperature:
s += f' at {action.temperature}'
if action.atmosphere:
s += f' under {action.atmosphere}'
return s
def _to_drysolid(self, action_text: str) -> Action:
remaining, atmosphere = self._get_property_from_split(action_text, 'under')
remaining, temperature = self._get_property_from_split(remaining, 'at')
remaining, duration = self._get_property_from_split(remaining, 'for')
return DrySolid(duration=duration, temperature=temperature, atmosphere=atmosphere)
def _from_drysolution(self, action: DrySolution) -> str:
s = self._uppercase_action_name(action)
if action.material:
s += f' over {action.material}'
return s
def _to_drysolution(self, action_text: str) -> Action:
remaining, material = self._get_property_from_split(action_text, 'over')
return DrySolution(material=material)
def _from_extract(self, action: Extract) -> str:
s = f'{self._uppercase_action_name(action)} with {self._chemical_to_string(action.solvent)}'
if action.repetitions != 1:
s += f' {action.repetitions} x'
return s
def _to_extract(self, action_text: str) -> Action:
repetitions = 1
match = re.findall(r'( (\d+) x)$', action_text)
if match:
assert len(match) == 1
assert len(match[0]) == 2
repetitions = int(match[0][1])
action_text = action_text.replace(match[0][0], '')
material = self._get_chemical(action_text, 'EXTRACT with ')
return Extract(solvent=material, repetitions=repetitions)
def _from_filter(self, action: Filter) -> str:
s = self._uppercase_action_name(action)
if action.phase_to_keep is not None:
s += f' keep {action.phase_to_keep}'
return s
def _to_filter(self, action_text: str) -> Action:
_, phase = self._get_property_from_split(action_text, 'keep')
return Filter(phase)
def _from_followotherprocedure(self, action: Concentrate) -> str:
return self._uppercase_action_name(action)
def _to_followotherprocedure(self, action_text: str) -> Action:
return FollowOtherProcedure()
def _from_makesolution(self, action: MakeSolution) -> str:
materials_strings = [self._chemical_to_string(c) for c in action.materials]
combined_materials = ' and '.join(materials_strings)
return f'{self._uppercase_action_name(action)} with {combined_materials}'
def _to_makesolution(self, action_text: str) -> Action:
action_text = action_text.replace('MAKESOLUTION with ', '')
compounds_strings = action_text.split(' and ')
compounds = [self._get_chemical(compound_string) for compound_string in compounds_strings]
return MakeSolution(materials=compounds)
def _from_microwave(self, action: Sonicate) -> str:
s = f'{self._uppercase_action_name(action)}'
if action.duration:
s += f' for {action.duration}'
if action.temperature:
s += f' at {action.temperature}'
return s
def _to_microwave(self, action_text: str) -> Action:
remaining, temperature = self._get_property_from_split(action_text, 'at')
remaining, duration = self._get_property_from_split(remaining, 'for')
return Microwave(duration=duration, temperature=temperature)
def _from_otherlanguage(self, action: OtherLanguage) -> str:
return self._uppercase_action_name(action)
def _to_otherlanguage(self, action_text: str) -> Action:
return OtherLanguage()
def _from_partition(self, action: Partition) -> str:
return f'{self._uppercase_action_name(action)} ' \
f'with {self._chemical_to_string(action.material_1)} ' \
f'and {self._chemical_to_string(action.material_2)}'
def _to_partition(self, action_text: str) -> Action:
action_text = action_text.replace('PARTITION with ', '')
splits = action_text.split(' and ')
m1 = self._get_chemical(splits[0])
m2 = self._get_chemical(splits[1])
return Partition(material_1=m1, material_2=m2)
def _from_ph(self, action: PH) -> str:
s = (
f'{self._uppercase_action_name(action)} '
f'with {self._chemical_to_string(action.material)}'
)
if action.ph:
s += f' to pH {action.ph}'
if action.dropwise:
s += ' dropwise'
if action.temperature:
s += f' at {action.temperature}'
return s
def _to_ph(self, action_text: str) -> Action:
remaining, temperature = self._get_property_from_split(action_text, 'at')
dropwise = ' dropwise' in remaining
remaining = remaining.replace(' dropwise', '')
splits = remaining.split(' to pH ')
ph = splits[1] if len(splits) > 1 else None
material = self._get_chemical(splits[0], 'PH with ')
return PH(material=material, ph=ph, dropwise=dropwise, temperature=temperature)
def _from_phaseseparation(self, action: PhaseSeparation) -> str:
return self._uppercase_action_name(action)
def _to_phaseseparation(self, action_text: str) -> Action:
return PhaseSeparation()
def _from_purify(self, action: Purify) -> str:
return self._uppercase_action_name(action)
def _to_purify(self, action_text: str) -> Action:
return Purify()
def _from_quench(self, action: Quench) -> str:
s = (
f'{self._uppercase_action_name(action)} '
f'with {self._chemical_to_string(action.material)}'
)
if action.dropwise:
s += ' dropwise'
if action.temperature:
s += f' at {action.temperature}'
return s
def _to_quench(self, action_text: str) -> Action:
remaining, temperature = self._get_property_from_split(action_text, 'at')
dropwise = ' dropwise' in remaining
remaining = remaining.replace(' dropwise', '')
material = self._get_chemical(remaining, 'QUENCH with ')
return Quench(material=material, dropwise=dropwise, temperature=temperature)
def _from_recrystallize(self, action: Recrystallize) -> str:
return (
f'{self._uppercase_action_name(action)} '
f'from {self._chemical_to_string(action.solvent)}'
)
def _to_recrystallize(self, action_text: str) -> Action:
chemical = self._get_chemical(action_text, 'RECRYSTALLIZE from ')
return Recrystallize(solvent=chemical)
def _from_reflux(self, action: Reflux) -> str:
s = self._uppercase_action_name(action)
if action.duration is not None:
s += f' for {action.duration}'
if action.atmosphere is not None:
s += f' under {action.atmosphere}'
if action.dean_stark:
s += f' with Dean-Stark apparatus'
return s
def _to_reflux(self, action_text: str) -> Action:
dean_stark = ' with Dean-Stark apparatus' in action_text
remaining = action_text.replace(' with Dean-Stark apparatus', '')
remaining, atmosphere = self._get_property_from_split(remaining, 'under')
remaining, duration = self._get_property_from_split(remaining, 'for')
return Reflux(duration=duration, dean_stark=dean_stark, atmosphere=atmosphere)
def _from_settemperature(self, action: SetTemperature) -> str:
return f'{self._uppercase_action_name(action)} {action.temperature}'
def _to_settemperature(self, action_text: str) -> Action:
temperature = self._re_match(r'SETTEMPERATURE (.*)', action_text)
return SetTemperature(temperature=temperature)
def _from_sonicate(self, action: Sonicate) -> str:
s = f'{self._uppercase_action_name(action)}'
if action.duration:
s += f' for {action.duration}'
if action.temperature:
s += f' at {action.temperature}'
return s
def _to_sonicate(self, action_text: str) -> Action:
remaining, temperature = self._get_property_from_split(action_text, 'at')
remaining, duration = self._get_property_from_split(remaining, 'for')
return Sonicate(duration=duration, temperature=temperature)
def _from_stir(self, action: Stir) -> str:
s = self._uppercase_action_name(action)
if action.duration:
s += f' for {action.duration}'
if action.temperature:
s += f' at {action.temperature}'
if action.atmosphere:
s += f' under {action.atmosphere}'
return s
def _to_stir(self, action_text: str) -> Action:
remaining, atmosphere = self._get_property_from_split(action_text, 'under')
remaining, temperature = self._get_property_from_split(remaining, 'at')
remaining, duration = self._get_property_from_split(remaining, 'for')
return Stir(duration=duration, temperature=temperature, atmosphere=atmosphere)
def _from_triturate(self, action: Triturate) -> str:
s = f'{self._uppercase_action_name(action)} with {self._chemical_to_string(action.solvent)}'
return s
def _to_triturate(self, action_text: str) -> Action:
material = self._get_chemical(action_text, 'TRITURATE with ')
return Triturate(solvent=material)
def _from_wait(self, action: Wait) -> str:
s = f'{self._uppercase_action_name(action)} for {action.duration}'
if action.temperature:
s += f' at {action.temperature}'
return s
def _to_wait(self, action_text: str) -> Action:
remaining, temperature = self._get_property_from_split(action_text, 'at')
remaining, duration = self._get_property_from_split(remaining, 'for')
if duration is None:
raise ValueError('The duration must be set for Wait actions')
return Wait(duration=duration, temperature=temperature)
def _from_wash(self, action: Wash) -> str:
s = (
f'{self._uppercase_action_name(action)} '
f'with {self._chemical_to_string(action.material)}'
)
if action.repetitions != 1:
s += f' {action.repetitions} x'
return s
def _to_wash(self, action_text: str) -> Action:
repetitions = 1
match = re.findall(r'( (\d+) x)$', action_text)
if match:
assert len(match) == 1
assert len(match[0]) == 2
repetitions = int(match[0][1])
action_text = action_text.replace(match[0][0], '')
material = self._get_chemical(action_text, 'WASH with ')
return Wash(material=material, repetitions=repetitions)
def _from_yield(self, action: Yield) -> str:
return f'{self._uppercase_action_name(action)} {self._chemical_to_string(action.material)}'
def _to_yield(self, action_text: str) -> Action:
material = self._get_chemical(action_text, 'YIELD ')
return Yield(material=material)
def _from_noaction(self, action: NoAction) -> str:
return self._uppercase_action_name(action)
def _to_noaction(self, action_text: str) -> Action:
return NoAction()
def _get_property_from_split(self, sentence: str,
splitting_word: str) -> Tuple[str, Optional[str]]:
"""
Gets an optional property following a given splitting word.
Returns:
Tuple: (sentence before splitting word, optional property)
"""
splits = sentence.split(f' {splitting_word} ')
prop = splits[1] if len(splits) > 1 else None
return splits[0], prop
def _get_quantities(self, sentence: str) -> Tuple[str, List[str]]:
match = re.match(r'^.*( \(.*\))$', sentence)
if not match:
return sentence, []
full_match_string = match.group(1)
# remove parentheses
match_string = full_match_string[2:-1]
quantities = match_string.split(', ')
remaining = sentence.replace(full_match_string, '')
return remaining, quantities
def _get_chemical(self, sentence: str, prefix: str = '') -> Chemical:
"""
Convert a string to a chemical.
Input examples:
4-butyloctane (5 ml)
DMF
If prefix is given, it will remove it from the original sentence
(useful for the action names in the beginning of the string).
"""
if prefix:
sentence = sentence.replace(prefix, '')
remaining, quantities = self._get_quantities(sentence)
# replace the insecable space before the parenthesis back to a normal space
compound_name = remaining.replace(' \u200C(', ' (')
return Chemical(name=compound_name, quantity=quantities)
def _re_match(self, regex: str, text: str) -> str:
p = re.compile(regex)
m = p.match(text)
if m is None:
raise RuntimeError(f'No match for regex "{regex}" in "{text}"')
return m.group(1)