1
2 __doc__ = """Base classes for match providers.
3
4 They are used by business objects to give
5 phrasewheels the ability to guess phrases.
6
7 Copyright (C) GNUMed developers
8 license: GPL v2 or later
9 """
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>, I.Haywood <ihaywood@gnu.org>, S.J.Tan <sjtan@bigpond.com>"
11
12
13 import sys
14 import logging
15 import re as regex
16 import datetime as pydt
17
18
19
20 if __name__ == "__main__":
21 sys.path.insert(0, '../../')
22 from Gnumed.pycommon import gmPG2
23
24
25 _log = logging.getLogger('gm.ui')
26
27
28
29
30 default_ignored_chars = "[?!.'\\(){}\[\]<>~#*$%^_]+" + '"'
31
32
33
34
35 default_word_separators = '[- \t=+&:@]+'
36
38 """Base class for match providing objects.
39
40 Match sources might be:
41 - database tables
42 - flat files
43 - previous input
44 - config files
45 - in-memory list created on the fly
46 """
47 print_queries = False
48
56
57
58
60 """Return matches according to aFragment and matching thresholds.
61
62 FIXME: design decision: we dont worry about data source changes
63 during the lifetime of a MatchProvider
64 FIXME: append _("*get all items*") on truncation
65 """
66
67 if aFragment is None:
68 raise ValueError('Cannot find matches without a fragment.')
69
70
71 if aFragment == '*':
72 return self.getAllMatches()
73
74
75 tmpFragment = aFragment.lower()
76
77 if self.__ignored_chars is not None:
78 tmpFragment = self.__ignored_chars.sub('', tmpFragment)
79
80 if self.__word_separators is not None:
81 tmpFragment = ' '.join(self.__word_separators.split(tmpFragment))
82
83 lngFragment = len(tmpFragment)
84
85
86 if lngFragment >= self.__threshold_substring:
87 return self.getMatchesBySubstr(tmpFragment)
88 elif lngFragment >= self.__threshold_word:
89 return self.getMatchesByWord(tmpFragment)
90 elif lngFragment >= self.__threshold_phrase:
91 return self.getMatchesByPhrase(tmpFragment)
92 else:
93 return (False, [])
94
96 raise NotImplementedError
97
99 raise NotImplementedError
100
102 raise NotImplementedError
103
105 raise NotImplementedError
106
109
110
111
112 - def setThresholds(self, aPhrase = 1, aWord = 3, aSubstring = 5):
113 """Set match location thresholds.
114
115 - the fragment passed to getMatches() must contain at least this many
116 characters before it triggers a match search at:
117 1) phrase_start - start of phrase (first word)
118 2) word_start - start of any word within phrase
119 3) in_word - _inside_ any word within phrase
120 """
121
122 if aSubstring < aWord:
123 _log.error('Setting substring threshold (%s) lower than word-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_substring, self.__threshold_word))
124 return False
125 if aWord < aPhrase:
126 _log.error('Setting word-start threshold (%s) lower than phrase-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_word, self.__threshold_phrase))
127 return False
128
129
130 self.__threshold_phrase = aPhrase
131 self.__threshold_word = aWord
132 self.__threshold_substring = aSubstring
133
134 return True
135
141
143 if self.__word_separators is None:
144 return None
145 return self.__word_separators.pattern
146
147 word_separators = property(_get_word_separators, _set_word_separators)
148
154
156 if self.__ignored_chars is None:
157 return None
158 return self.__ignored_chars.pattern
159
160 ignored_chars = property(_get_ignored_chars, _set_ignored_chars)
161
162 - def set_context (self, context=None, val=None):
163 """Set value to provide context information for matches.
164
165 The matching code may ignore it depending on its exact
166 implementation. Names and values of the context depend
167 on what is being matched.
168
169 <context> -- the *placeholder* key *inside* the context
170 definition, not the context *definition* key
171 """
172 if context is None:
173 return False
174 self._context_vals[context] = val
175 return True
176
177 - def unset_context(self, context=None):
178 try:
179 del self._context_vals[context]
180 except KeyError:
181 pass
182
183
184
186 """Match provider where all possible options can be held
187 in a reasonably sized, pre-allocated list.
188 """
190 """aSeq must be a list of dicts. Each dict must have the keys (data, label, weight)
191 """
192 if not type(aSeq) in [type(None), list, tuple]:
193 _log.error('fixed list match provider argument must be a list/tuple of dicts/None')
194 raise TypeError('fixed list match provider argument must be a list/tuple of dicts/None')
195
196 self.__items = aSeq
197 cMatchProvider.__init__(self)
198
199
200
201
202
203
204
205
206
208 """Return matches for aFragment at start of phrases."""
209 matches = []
210
211 for item in self.__items:
212
213 if item['list_label'].lower().startswith(aFragment.lower()):
214 matches.append(item)
215
216 if len(matches) == 0:
217 return (False, [])
218
219
220 matches.sort(key = lambda x: x['weight'], reverse = True)
221 return (True, matches)
222
223
225 """Return matches for aFragment at start of words inside phrases."""
226 matches = []
227
228 for item in self.__items:
229 item_label = item['list_label'].lower()
230 fragment_pos = item_label.find(aFragment.lower())
231
232 if fragment_pos == 0:
233 matches.append(item)
234
235 elif fragment_pos > 0:
236
237 if item_label[fragment_pos-1] == ' ':
238 matches.append(item)
239
240 if len(matches) == 0:
241 return (False, [])
242
243
244 matches.sort(key = lambda x: x['weight'], reverse = True)
245 return (True, matches)
246
247
249 """Return matches for aFragment as a true substring."""
250 matches = []
251
252 for item in self.__items:
253 if item['list_label'].lower().find(aFragment.lower()) != -1:
254 matches.append(item)
255
256 if len(matches) == 0:
257 return (False, [])
258
259
260 matches.sort(key = lambda x: x['weight'], reverse = True)
261 return (True, matches)
262
263
265 """Return all items."""
266 matches = self.__items
267
268 if len(matches) == 0:
269 return (False, [])
270
271
272 matches.sort(key = lambda x: x['weight'], reverse = True)
273 return (True, matches)
274
275
277 """items must be a list of dicts. Each dict must have the keys (data, list_label, weight)"""
278 self.__items = items
279
280
281
282
283
284
285
286
287
288
289
290
291
292
294 """Match provider which searches matches
295 in the results of a function call.
296 """
297 - def __init__(self, get_candidates = None):
298 """get_candidates() must return a list of strings."""
299 if get_candidates is None:
300 _log.error('must define function to retrieve match candidates list')
301 raise ValueError('must define function to retrieve match candidates list')
302
303 self._get_candidates = get_candidates
304 cMatchProvider.__init__(self)
305
306
307
308
309
310
311
312
314 """Return matches for aFragment at start of phrases."""
315 matches = []
316 candidates = self._get_candidates()
317
318 for candidate in candidates:
319
320 if aFragment.startswith(candidate['list_label'].lower()):
321 matches.append(candidate)
322
323 if len(matches) == 0:
324 return (False, [])
325
326 matches.sort(key = self.__cmp_candidates)
327 return (True, matches)
328
330 """Return matches for aFragment at start of words inside phrases."""
331 matches = []
332 candidates = self._get_candidates()
333
334 for candidate in candidates:
335 pos = candidate['list_label'].lower().find(aFragment)
336
337
338
339
340 if (pos == 0) or (candidate['list_label'][pos-1] == ' '):
341 matches.append(candidate)
342
343 if len(matches) == 0:
344 return (False, [])
345
346 matches.sort(key = self.__cmp_candidates)
347 return (True, matches)
348
350 """Return matches for aFragment as a true substring."""
351 matches = []
352 candidates = self._get_candidates()
353
354 for candidate in candidates:
355 if candidate['list_label'].lower().find(aFragment) != -1:
356
357 matches.append(candidate)
358
359 if len(matches) == 0:
360 return (False, [])
361
362 matches.sort(key = self.__cmp_candidates)
363 return (True, matches)
364
366 """Return all candidates."""
367 return self._get_candidates()
368
369
371 """naive ordering"""
372 return 0
373
374
375
376
377
378
379
380
382 """Match provider which searches matches
383 in possibly several database tables.
384
385 queries:
386 - a list of unicode strings
387 - each string is a query
388 - each string must contain: "... WHERE <column> %(fragment_condition)s ..."
389 - each string can contain in the where clause: "... %(<ctxt_key1>)s ..."
390 - each query must return (data, list_label, field_label)
391
392 context definitions to be used in the queries, example:
393 {'ctxt_key1': {'where_part': 'AND country = %(country)s', 'placeholder': 'country'}}
394
395 client code using .set_context() must use the 'placeholder':
396 <phrasewheel>/<match provider>.set_context('country', 'Germany')
397
398 full example query:
399
400 query = u" " "
401 SELECT DISTINCT ON (list_label)
402 pk_encounter
403 AS data,
404 to_char(started, 'YYYY Mon DD (HH24:MI)') || ': ' || l10n_type || ' [#' || pk_encounter || ']'
405 AS list_label,
406 to_char(started, 'YYYY Mon DD') || ': ' || l10n_type
407 AS field_label
408 FROM
409 clin.v_pat_encounters
410 WHERE
411 (
412 l10n_type %(fragment_condition)s
413 OR
414 type %(fragment_condition)s
415 ) %(ctxt_patient)s
416 ORDER BY
417 list_label
418 LIMIT
419 30
420 " " "
421 context = {'ctxt_patient': {
422 'where_part': u'AND pk_patient = %(PLACEHOLDER)s',
423 'placeholder': u'PLACEHOLDER'
424 }}
425 self.mp = gmMatchProvider.cMatchProvider_SQL2(queries = query, context = context)
426 self.set_context(context = 'PLACEHOLDER', val = '<THE VALUE>')
427
428 _SQL_data2match:
429 SQL to retrieve a match by, say, primary key
430 wherein the only keyword argument is 'pk'
431 """
432 - def __init__(self, queries = None, context = None):
433
434 cMatchProvider.__init__(self)
435
436 if type(queries) == type([]):
437 self._queries = queries
438 else:
439 self._queries = [queries]
440
441 if context is None:
442 self._context = {}
443 else:
444 self._context = context
445
446 self._args = {}
447
448 self._SQL_data2match = None
449
450
451
452
453
454
455
456
458 """Return matches for aFragment at start of phrases."""
459
460 fragment_condition = "ILIKE %(fragment)s"
461 self._args['fragment'] = "%s%%" % aFragment
462
463 return self._find_matches(fragment_condition)
464
466 """Return matches for aFragment at start of words inside phrases."""
467
468 fragment_condition = "~* %(fragment)s"
469 aFragment = gmPG2.sanitize_pg_regex(expression = aFragment, escape_all = False)
470 self._args['fragment'] = "( %s)|(^%s)" % (aFragment, aFragment)
471
472 return self._find_matches(fragment_condition)
473
475 """Return matches for aFragment as a true substring."""
476
477 fragment_condition = "ILIKE %(fragment)s"
478 self._args['fragment'] = "%%%s%%" % aFragment
479
480 return self._find_matches(fragment_condition)
481
485
487 if self._SQL_data2match is None:
488 return None
489
490 query = {'cmd': self._SQL_data2match, 'args': {'pk': data}}
491 try:
492 rows, idx = gmPG2.run_ro_queries(queries = [query], get_col_idx = False)
493 except:
494 _log.exception('[%s]: error running _SQL_data2match, dropping query', self.__class__.__name__)
495 self._SQL_data2match = None
496 return None
497
498
499 if len(rows) == 1:
500 return rows[0]
501
502 _log.error('[%s]: 0 or >1 rows found by running _SQL_data2match, ambiguous, ignoring', self.__class__.__name__)
503 return None
504
506 if self.print_queries:
507 print("----------------------")
508 print(pydt.datetime.now())
509 matches = []
510 for query in self._queries:
511 where_fragments = {'fragment_condition': fragment_condition}
512
513 for context_key, context_def in self._context.items():
514 try:
515 placeholder = context_def['placeholder']
516 where_part = context_def['where_part']
517 self._args[placeholder] = self._context_vals[placeholder]
518
519 where_fragments[context_key] = where_part
520 if self.print_queries:
521 print("ctxt ph:", placeholder)
522 print("ctxt where:", where_part)
523 print("ctxt val:", self._context_vals[placeholder])
524 except KeyError:
525
526 where_fragments[context_key] = ''
527 if self.print_queries:
528 print("invalid ctxt key:", context_key)
529
530 cmd = query % where_fragments
531
532 if self.print_queries:
533 print("class:", self.__class__.__name__)
534 print("ctxt:", self._context_vals)
535 print("args:", self._args)
536 print("query:", cmd)
537
538 try:
539 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': self._args}], get_col_idx = False)
540 except:
541 _log.exception('[%s]: error running match provider SQL, dropping query', self.__class__.__name__)
542 idx = self._queries.index(query)
543 del self._queries[idx]
544 break
545
546
547 if len(rows) == 0:
548 continue
549
550 for row in rows:
551 match = {'weight': 0}
552
553 try:
554 match['data'] = row['data']
555 except KeyError:
556 match['data'] = row[0]
557
558 try:
559 match['list_label'] = row['list_label']
560 except KeyError:
561 match['list_label'] = row[1]
562
563
564 try:
565 match['field_label'] = row['field_label']
566
567 except KeyError:
568
569 try:
570 match['field_label'] = row[2]
571
572 except IndexError:
573 match['field_label'] = match['list_label']
574
575
576
577
578
579
580 matches.append(match)
581
582 return (True, matches)
583
584
585 return (False, [])
586
587
588 if __name__ == '__main__':
589 pass
590