1
2 """Some HL7 handling."""
3
4 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
5 __license__ = "GPL v2 or later"
6
7
8 import sys
9 import os
10 import codecs
11 import logging
12 import time
13 import datetime as pyDT
14 import hl7 as pyhl7
15 from xml.etree import ElementTree as pyxml
16
17
18 if __name__ == '__main__':
19 sys.path.insert(0, '../../')
20 from Gnumed.pycommon import gmTools
21 from Gnumed.pycommon import gmBusinessDBObject
22 from Gnumed.pycommon import gmPG2
23 from Gnumed.pycommon import gmDateTime
24 from Gnumed.business import gmPathLab
25
26
27 _log = logging.getLogger('gm.hl7')
28
29
30 HL7_EOL = u'\r'
31
32 HL7_SEGMENTS = u'FHS BHS MSH PID PV1 OBX NTE ORC OBR'.split()
33
34 MSH_sending_lab = 3
35
36 PID_name = 5
37 PID_lastname = 0
38 PID_firstname = 1
39 PID_middlename = 2
40 PID_dob = 7
41 PID_gender = 8
42
43 OBX_type = 3
44 OBX_LOINC = 0
45 OBX_name = 1
46 OBX_value = 5
47 OBX_unit = 6
48
49 HL7_field_labels = {
50 'PID': {
51 0: 'Segment Type',
52 1: '<PID> Set ID',
53 2: 'Patient ID',
54 5: 'Patient name',
55 7: 'Date/Time of birth',
56 8: 'Administrative gender'
57 },
58 'OBR': {
59 0: 'Segment Type'
60 },
61 'OBX': {
62 0: 'Segment Type',
63 1: 'Set ID',
64 2: 'Value Type',
65 3: 'Identifier (LOINC)',
66 4: 'Observation Sub-ID',
67 5: 'Value',
68 6: 'Units',
69 7: 'References Range (Low - High)',
70 8: 'Abnormal Flags',
71 11: 'Result Status',
72 14: 'Date/Time of Observation'
73 }
74 }
75
76
77
78
79 _SQL_get_incoming_data = u"""SELECT * FROM clin.v_incoming_data_unmatched WHERE %s"""
80
82 """Represents items of incoming data, say, HL7 snippets."""
83
84 _cmd_fetch_payload = _SQL_get_incoming_data % u"pk_incoming_data_unmatched = %s"
85 _cmds_store_payload = [
86 u"""UPDATE clin.incoming_data_unmatched SET
87 fk_patient_candidates = %(pk_patient_candidates)s,
88 fk_identity_disambiguated = %(pk_identity_disambiguated)s,
89 fk_provider_disambiguated = %(pk_provider_disambiguated)s,
90 request_id = gm.nullify_empty_string(%(request_id)s),
91 firstnames = gm.nullify_empty_string(%(firstnames)s),
92 lastnames = gm.nullify_empty_string(%(lastnames)s),
93 dob = %(dob)s,
94 postcode = gm.nullify_empty_string(%(postcode)s),
95 other_info = gm.nullify_empty_string(%(other_info)s),
96 type = gm.nullify_empty_string(%(data_type)s),
97 gender = gm.nullify_empty_string(%(gender)s),
98 requestor = gm.nullify_empty_string(%(requestor)s),
99 external_data_id = gm.nullify_empty_string(%(external_data_id)s),
100 comment = gm.nullify_empty_string(%(comment)s)
101 WHERE
102 pk = %(pk_incoming_data_unmatched)s
103 AND
104 xmin = %(xmin_incoming_data_unmatched)s
105 RETURNING
106 xmin as xmin_incoming_data_unmatched,
107 octet_length(data) as data_size
108 """
109 ]
110
111 _updatable_fields = [
112 u'pk_patient_candidates',
113 u'request_id',
114 u'firstnames',
115 u'lastnames',
116 u'dob',
117 u'postcode',
118 u'other_info',
119 u'data_type',
120 u'gender',
121 u'requestor',
122 u'external_data_id',
123 u'comment',
124 u'pk_identity_disambiguated',
125 u'pk_provider_disambiguated'
126 ]
127
130
132
133 if not (os.access(fname, os.R_OK) and os.path.isfile(fname)):
134 _log.error('[%s] is not a readable file' % fname)
135 return False
136
137 gmPG2.file2bytea (
138 query = u"UPDATE clin.incoming_data_unmatched SET data = %(data)s::bytea WHERE pk = %(pk)s",
139 filename = fname,
140 args = {'pk': self.pk_obj}
141 )
142
143
144 self.refetch_payload()
145 return True
146
148
149 if self._payload[self._idx['data_size']] == 0:
150 return None
151
152 if self._payload[self._idx['data_size']] is None:
153 return None
154
155 if filename is None:
156 filename = gmTools.get_unique_filename(prefix = 'gm-incoming_data_unmatched-')
157
158 success = gmPG2.bytea2file (
159 data_query = {
160 'cmd': u'SELECT substring(data from %(start)s for %(size)s) FROM clin.incoming_data_unmatched WHERE pk = %(pk)s',
161 'args': {'pk': self.pk_obj}
162 },
163 filename = filename,
164 chunk_size = aChunkSize,
165 data_size = self._payload[self._idx['data_size']]
166 )
167
168 if not success:
169 return None
170
171 return filename
172
173
175 if order_by is None:
176 order_by = u'true'
177 else:
178 order_by = u'true ORDER BY %s' % order_by
179 cmd = _SQL_get_incoming_data % order_by
180 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = True)
181 return [ cIncomingData(row = {'data': r, 'idx': idx, 'pk_field': 'pk_incoming_data_unmatched'}) for r in rows ]
182
183
185 args = {'typ': data_type}
186 cmd = u"""
187 INSERT INTO clin.incoming_data_unmatched (type, data)
188 VALUES (%(typ)s, 'new data'::bytea)
189 RETURNING pk"""
190 rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}], return_data = True, get_col_idx = False)
191 pk = rows[0]['pk']
192 incoming = cIncomingData(aPK_obj = pk)
193 if not incoming.update_data_from_file(fname = filename):
194 delete_incoming_data(incoming_data = pk)
195 return None
196 return incoming
197
198
200 args = {'pk': pk_incoming_data}
201 cmd = u"DELETE FROM clin.incoming_data_unmatched WHERE pk = %(pk)s"
202 gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}])
203 return True
204
205
206
207
209
210 out_fname = gmTools.get_unique_filename (
211 prefix = u'%s-fixed-' % gmTools.fname_stem(filename),
212 suffix = '.hl7'
213 )
214 _log.debug('fixing HL7 [%s] -> [%s]', filename, out_fname)
215 hl7_in = codecs.open(filename, 'rb', encoding)
216 hl7_out = codecs.open(out_fname, 'wb', 'utf8')
217
218 line_idx = 0
219 prev_line = None
220 for line in hl7_in:
221 line_idx += 1
222
223 if line.endswith(u' \n'):
224 _log.debug('#%s: suspicious non-terminating line ("...<SPACE>\\n"): [%s...%s]', line_idx, line[:4], line[-7:])
225 if prev_line is None:
226 prev_line = line[:-1]
227 else:
228 prev_line = prev_line + line[:-1]
229 continue
230
231 line = line.strip('\r').strip('\n').strip('\r').strip('\n')
232
233
234 if line[3] != u'|':
235 if prev_line is None:
236 raise ValueError('line #%s does not start with "<SEGMENT>|" but previous line did not end with BLANK either: [%s]' % (line_idx, line))
237 hl7_out.write(prev_line)
238 prev_line = None
239 hl7_out.write(line + HL7_EOL)
240 continue
241
242
243 if line[:3] in HL7_SEGMENTS:
244 if prev_line is not None:
245 hl7_out.write(prev_line + HL7_EOL)
246 prev_line = None
247 hl7_out.write(line + HL7_EOL)
248 continue
249
250 hl7_out.close()
251 hl7_in.close()
252
253 return out_fname
254
256
257 _log.debug('extracting HL7 from CDATA of <%s> nodes in XML file [%s]', xml_path, filename)
258
259 hl7_xml = pyxml.ElementTree()
260 try:
261 hl7_xml.parse(filename)
262 except pyxml.ParseError:
263 _log.exception('cannot parse [%s]' % filename)
264 return None
265 nodes = hl7_xml.findall(xml_path)
266 if len(nodes) == 0:
267 _log.debug('no data found')
268 return None
269
270 out_fname = gmTools.get_unique_filename(prefix = u'%s-' % gmTools.fname_stem(filename), suffix = '.hl7')
271 _log.debug('writing HL7 to [%s]', out_fname)
272 hl7_file = codecs.open(out_fname, 'wb', 'utf8')
273 for node in nodes:
274 hl7_file.write(node.text)
275
276 return out_fname
277
279
280 _log.debug('splitting [%s]', filename)
281
282 hl7_in = codecs.open(filename, 'rb', encoding)
283
284 idx = 0
285 first_line = True
286 MSH_file = None
287 MSH_fnames = []
288 for line in hl7_in:
289
290 if first_line:
291
292 if line.strip() == u'':
293 continue
294 if line.startswith(u'FHS|'):
295 _log.debug('ignoring FHS')
296 continue
297 if line.startswith(u'BHS|'):
298 _log.debug('ignoring BHS')
299 continue
300 if not line.startswith(u'MSH|'):
301 raise ValueError('HL7 file <%s> does not start with "MSH" line' % filename)
302 first_line = False
303
304 if line.startswith(u'MSH|'):
305 if MSH_file is not None:
306 MSH_file.close()
307 idx += 1
308 out_fname = gmTools.get_unique_filename(prefix = u'%s-MSH_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
309 _log.debug('writing message %s to [%s]', idx, out_fname)
310 MSH_fnames.append(out_fname)
311 MSH_file = codecs.open(out_fname, 'wb', 'utf8')
312
313 if line.startswith(u'BTS|'):
314 _log.debug('ignoring BTS')
315 continue
316 if line.startswith(u'FTS|'):
317 _log.debug('ignoring FTS')
318 continue
319
320 MSH_file.write(line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r')
321
322 if MSH_file is not None:
323 MSH_file.close()
324 hl7_in.close()
325
326 return MSH_fnames
327
328
330 """Assumes:
331 - ONE MSH per file
332 - utf8 encoding
333 - first non-empty line must be MSH line
334
335 - anything between MSH and PID is lost
336
337 IOW, what's created by split_HL7_into_MSH()
338 """
339 _log.debug('splitting [%s]', filename)
340
341 MSH_in = codecs.open(filename, 'rb', 'utf8')
342
343 looking_for_MSH = True
344 MSH_line = None
345 looking_for_first_PID = True
346 PID_file = None
347 PID_fnames = []
348 idx = 0
349 for line in MSH_in:
350
351 if line.strip() == u'':
352 continue
353
354
355 if looking_for_MSH:
356 if line.startswith(u'MSH|'):
357 looking_for_MSH = False
358 MSH_line = line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r'
359 continue
360 raise ValueError('HL7 MSH file <%s> does not start with "MSH" line' % filename)
361
362
363 if looking_for_first_PID:
364 if not line.startswith(u'PID|'):
365 raise ValueError('HL7 MSH file <%s> does not have "PID" line follow "MSH" line' % filename)
366 looking_for_first_PID = False
367
368
369 if line.startswith(u'PID|'):
370 if PID_file is not None:
371 PID_file.close()
372 idx += 1
373 out_fname = gmTools.get_unique_filename(prefix = u'%s-PID_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
374 _log.debug('writing message for PID %s to [%s]', idx, out_fname)
375 PID_fnames.append(out_fname)
376 PID_file = codecs.open(out_fname, 'wb', 'utf8')
377 PID_file.write(MSH_line)
378
379 PID_file.write(line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r')
380
381 if PID_file is not None:
382 PID_file.close()
383 MSH_in.close()
384
385 return PID_fnames
386
387
395
396
398 comment_tag = u'[HL7 name::%s]' % hl7_lab
399 for gm_lab in gmPathLab.get_test_orgs():
400 if comment_tag in gmTools.coalesce(gm_lab['comment'], u''):
401 return gm_lab
402 _log.debug('lab not found: %s', hl7_lab)
403 gm_lab = gmPathLab.create_test_org(name = hl7_lab, comment = comment_tag)
404 if gm_lab is None:
405 raise ValueError('cannot create lab [%s] in GNUmed' % hl7_lab)
406 _log.debug('created lab: %s', gm_lab)
407 return gm_lab
408
409
411
412 tt = gmPathLab.find_measurement_type(lab = pk_lab, name = name)
413 if tt is None:
414 _log.debug('test type [%s %s (%s)] not found for lab #%s, creating', name, unit, loinc, pk_lab)
415 tt = gmPathLab.create_measurement_type(lab = pk_lab, abbrev = name, unit = unit, name = name)
416
417 if loinc is None:
418 return tt
419 if loinc.strip() == u'':
420 return tt
421 if tt['loinc'] is None:
422 tt['loinc'] = loinc
423 tt.save()
424 return tt
425 if tt['loinc'] != loinc:
426
427 _log.error('LOINC code mismatch between GM (%s) and HL7 (%s) for result type [%s]', tt['loinc'], loinc, name)
428
429 return tt
430
431
459
460
462 """Assumes what's produced by flatten_MSH_by_PID()."""
463
464 _log.debug('staging HL7%s from [%s]', gmTools.coalesce(source, u'', u' (%s)'), filename)
465
466
467 MSH_file = codecs.open(filename, 'rb', 'utf8')
468 HL7 = pyhl7.parse(MSH_file.read(1024 * 1024 * 5))
469 MSH_file.close()
470
471
472 inc = create_incoming_data(u'HL7%s' % gmTools.coalesce(source, u'', u' (%s)'), filename)
473 if inc is None:
474 return None
475
476 try:
477
478 PID = HL7.segment('PID')
479 name = PID[PID_name]
480 inc['lastnames'] = gmTools.coalesce(name[PID_lastname], u'')
481 inc['firstnames'] = gmTools.coalesce(name[PID_firstname], u'')
482 if len(name) > 2:
483 inc['firstnames'] += u' '
484 inc['firstnames'] += name[PID_middlename]
485 if PID[PID_dob] is not None:
486 tmp = time.strptime(PID[PID_dob][0], '%Y%m%d')
487 inc['dob'] = pyDT.datetime(tmp.tm_year, tmp.tm_mon, tmp.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone)
488 if PID[PID_gender] is not None:
489 inc['gender'] = PID[PID_gender][0]
490 inc['external_data_id'] = filename
491
492
493
494
495
496
497
498
499 inc.save()
500 except:
501 delete_incoming_data(pk_incoming_data = inc['pk_incoming_data_unmatched'])
502 raise
503
504 return inc
505
506
546
547
569
570
571
572 if __name__ == "__main__":
573
574 if len(sys.argv) < 2:
575 sys.exit()
576
577 if sys.argv[1] != 'test':
578 sys.exit()
579
580 from Gnumed.pycommon import gmLog2
581 from Gnumed.pycommon import gmI18N
582
583 gmDateTime.init()
584 gmI18N.activate_locale()
585 gmI18N.install_domain()
586
587
593
603
607
618
627
638
644
645
646
647
648
649
650
651 test_format_hl7_file()
652