1 """GNUmed German XDT parsing objects.
2
3 This encapsulates some of the XDT data into
4 objects for easy access.
5 """
6
7 __version__ = "$Revision: 1.33 $"
8 __author__ = "K.Hilbert, S.Hilbert"
9 __license__ = "GPL"
10
11 import os.path, sys, linecache, codecs, re as regex, time, datetime as pyDT, logging
12
13
14 import mx.DateTime as mxDT
15
16
17 if __name__ == '__main__':
18 sys.path.insert(0, '../../')
19 from Gnumed.pycommon import gmDateTime, gmTools
20 from Gnumed.business import gmXdtMappings, gmPerson
21
22
23 _log = logging.getLogger('gm.xdt')
24 _log.info(__version__)
25
26
31
33
34 f = codecs.open(filename=filename, mode='rU', encoding='utf8', errors='ignore')
35
36 file_encoding = None
37 for line in f:
38 field = line[3:7]
39 if field in gmXdtMappings._charset_fields:
40 _log.debug('found charset field [%s] in <%s>', field, filename)
41 val = line[7:8]
42 file_encoding = gmXdtMappings._map_field2charset[field][val]
43 _log.debug('encoding in file is "%s" (%s)', file_encoding, val)
44 break
45 f.close()
46
47 if file_encoding is None:
48 _log.debug('no encoding found in <%s>, assuming [%s]', filename, default_encoding)
49 return default_encoding
50
51 return file_encoding
52
54
55 _map_id2name = {
56 '3101': 'lastnames',
57 '3102': 'firstnames',
58 '3103': 'dob',
59 '3110': 'gender',
60 '3106': 'zipurb',
61 '3107': 'street',
62 '3112': 'zip',
63 '3113': 'urb',
64 '8316': 'source'
65 }
66
67 needed_fields = (
68 '3101',
69 '3102'
70 )
71
72 interesting_fields = _map_id2name.keys()
73
74 data = {}
75
76
77 if encoding is None:
78 encoding = determine_xdt_encoding(filename=filename)
79
80 xdt_file = codecs.open(filename=filename, mode='rU', encoding=encoding)
81
82 for line in xdt_file:
83
84
85
86
87
88 line = line.replace('\015','')
89 line = line.replace('\012','')
90
91
92 field = line[3:7]
93
94 if field in interesting_fields:
95 try:
96 already_seen = data[_map_id2name[field]]
97 break
98 except KeyError:
99 data[_map_id2name[field]] = line[7:]
100
101 xdt_file.close()
102
103
104 if len(data) < len(needed_fields):
105 raise ValueError('insufficient patient data in XDT file [%s], found only: %s' % (filename, data))
106
107 from Gnumed.business import gmPerson
108 dto = gmPerson.cDTO_person()
109
110 dto.firstnames = data['firstnames']
111 dto.lastnames = data['lastnames']
112
113
114
115 try:
116 dob = time.strptime(data['dob'], gmTools.coalesce(dob_format, '%d%m%Y'))
117 dto.dob = pyDT.datetime(dob.tm_year, dob.tm_mon, dob.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone)
118 except KeyError:
119 dto.dob = None
120
121 try:
122 dto.gender = gmXdtMappings.map_gender_xdt2gm[data['gender'].lower()]
123 except KeyError:
124 dto.gender = None
125
126 dto.zip = None
127 try:
128 dto.zip = regex.match('\d{5}', data['zipurb']).group()
129 except KeyError: pass
130 try:
131 dto.zip = data['zip']
132 except KeyError: pass
133
134 dto.urb = None
135 try:
136 dto.urb = regex.sub('\d{5} ', '', data['zipurb'])
137 except KeyError: pass
138 try:
139 dto.urb = data['urb']
140 except KeyError: pass
141
142 try:
143 dto.street = data['street']
144 except KeyError:
145 dto.street = None
146
147 try:
148 dto.source = data['source']
149 except KeyError:
150 dto.source = None
151
152 return dto
153
155
156 - def __init__(self, filename=None, encoding=None, override_encoding=False):
178
180
181 if self.__header is not None:
182 return self.__header
183
184 ldt_file = codecs.open(filename = self.filename, mode = 'rU', encoding = self.encoding)
185 self.__header = []
186 for line in ldt_file:
187 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
188
189 if field == u'8000':
190 if content in [u'8202']:
191 break
192 self.__header.append(line)
193
194 ldt_file.close()
195 return self.__header
196
197 header = property(_get_header, lambda x:x)
198
200
201 if self.__tail is not None:
202 return self.__tail
203
204 ldt_file = codecs.open(filename = self.filename, mode = 'rU', encoding = self.encoding)
205 self.__tail = []
206 in_tail = False
207 for line in ldt_file:
208 if in_tail:
209 self.__tail.append(line)
210 continue
211
212 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
213
214
215 if field == u'8000':
216 if content not in [u'8221']:
217 continue
218 in_tail = True
219 self.__tail.append(line)
220
221 ldt_file.close()
222 return self.__tail
223
224 tail = property(_get_tail, lambda x:x)
225
227
228 ldt_file = codecs.open(filename = self.filename, mode = 'rU', encoding = self.encoding)
229 out_file = None
230
231 in_patient = False
232 for line in ldt_file:
233
234 if in_patient:
235 out_file.write(line)
236 continue
237
238 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
239
240
241 if field == u'8000':
242
243 if content == u'8202':
244 in_patient = True
245 if out_file is not None:
246 out_file.write(u''.join(self.tail))
247 out_file.close()
248
249 out_file.write(u''.join(self.header))
250 else:
251 in_patient = False
252 if out_file is not None:
253 out_file.write(u''.join(self.tail))
254 out_file.close()
255
256 if out_file is not None:
257 if not out_file.closed:
258 out_file.write(u''.join(self.tail))
259 out_file.close()
260
261 ldt_file.close()
262
263
264
266 pat_ids = []
267 pat_names = []
268 pats = {}
269
270
271 for line in fileinput.input(aFile):
272
273 line = line.replace('\015','')
274 line = line.replace('\012','')
275
276 field = line[3:7]
277
278 if field == '3000':
279 pat_id = line[7:]
280 if pat_id not in pat_ids:
281 pat_ids.append(pat_id)
282 continue
283
284 if field == '3101':
285 pat_name = line [7:]
286 if pat_name not in pat_names:
287 pat_names.append(pat_name)
288 pats[pat_id] = pat_name
289 continue
290 fileinput.close()
291
292 _log.debug("patients found: %s" % len(pat_ids))
293 return pats
294
295 -def get_pat_files(aFile, ID, name, patdir = None, patlst = None):
296 _log.debug("getting files for patient [%s:%s]" % (ID, name))
297 files = patlst.get(aGroup = "%s:%s" % (ID, name), anOption = "files")
298 _log.debug("%s => %s" % (patdir, files))
299 return [patdir, files]
300
302 content=[]
303 lineno = []
304
305
306
307 content = []
308 record_start_lines = []
309
310
311 for line in fileinput.input(aFile):
312 strippedline = line.replace('\015','')
313 strippedline = strippedline.replace('\012','')
314
315 if strippedline[3:7] == '8000':
316 record_start_lines.append(fileinput.filelineno())
317
318
319 for aline in record_start_lines:
320
321 line = linecache.getline(aFile,aline+2)
322
323 strippedline = line.replace('\015','')
324 strippedline = strippedline.replace('\012','')
325
326 field = strippedline[3:7]
327
328 if field == '3000':
329 ID = strippedline[7:]
330 line = linecache.getline(aFile,aline+3)
331
332 strippedline = line.replace('\015','')
333 strippedline = strippedline.replace('\012','')
334
335 field = strippedline[3:7]
336 if field == '3101':
337 name = strippedline [7:]
338 startline=aline
339 endline=record_start_lines[record_start_lines.index(aline)+1]
340 _log.debug("reading from%s" %str(startline)+' '+str(endline) )
341 for tmp in range(startline,endline):
342 content.append(linecache.getline(aFile,tmp))
343 _log.debug("reading %s"%tmp )
344 hashes = check_for_previous_records(ID,name,patlst)
345
346 data_hash = md5.new()
347 map(data_hash.update, content)
348 digest = data_hash.hexdigest()
349 if digest not in hashes:
350 pat_dir = cfg.get("xdt-viewer", "export-dir")
351 file = write_xdt_pat_data(content, pat_dir)
352 add_file_to_patlst(ID, name, patlst, file, ahash)
353 content = []
354 else:
355 continue
356
357 fileinput.close()
358 patlst.store()
359 return 1
360
365
367 """write record for this patient to new file"""
368 pat_file = open(os.path.join(aDir, get_rand_fname(aDir)), "w")
369 map(pat_file.write, data)
370 pat_file.close()
371 return fname
372
374 anIdentity = "%s:%s" % (ID, name)
375 hashes = []
376
377 if anIdentity not in patlst.getGroups():
378 _log.debug("identity not yet in list" )
379 patlst.set(aGroup = anIdentity, anOption = 'files', aValue = [], aComment = '')
380
381 file_defs = patlst.get(aGroup = anIdentity, anOption = "files")
382 for line in file_defs:
383 file, ahash = line.split(':')
384 hashes.append(ahash)
385
386 return hashes
387
389 anIdentity = "%s:%s" % (ID, name)
390 files = patlst.get(aGroup = anIdentity, anOption = "files")
391 for file in new_files:
392 files.append("%s:%s" % (file, ahash))
393 _log.debug("files now there : %s" % files)
394 patlst.set(aGroup=anIdentity, anOption="files", aValue = files, aComment="")
395
396
397
398 if __name__ == "__main__":
399 from Gnumed.pycommon import gmI18N, gmLog2
400
401 root_log = logging.getLogger()
402 root_log.setLevel(logging.DEBUG)
403 _log = logging.getLogger('gm.xdt')
404
405
406 gmI18N.activate_locale()
407 gmI18N.install_domain()
408 gmDateTime.init()
409
410 ldt = cLDTFile(filename = sys.argv[1])
411 print "header:"
412 for line in ldt.header:
413 print line.encode('utf8', 'replace')
414 print "tail:"
415 for line in ldt.tail:
416 print line.encode('utf8', 'replace')
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436