Package common :: Module dbf
[frames] | no frames]

Source Code for Module common.dbf

  1  # -*- coding: utf-8 -*- 
  2  # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. 
  3  # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr 
  4  # 
  5  # This file is part of logilab-common. 
  6  # 
  7  # logilab-common is free software: you can redistribute it and/or modify it under 
  8  # the terms of the GNU Lesser General Public License as published by the Free 
  9  # Software Foundation, either version 2.1 of the License, or (at your option) any 
 10  # later version. 
 11  # 
 12  # logilab-common is distributed in the hope that it will be useful, but WITHOUT 
 13  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 14  # FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more 
 15  # details. 
 16  # 
 17  # You should have received a copy of the GNU Lesser General Public License along 
 18  # with logilab-common.  If not, see <http://www.gnu.org/licenses/>. 
 19  """This is a DBF reader which reads Visual Fox Pro DBF format with Memo field 
 20   
 21  Usage: 
 22   
 23  >>> rec = readDbf('test.dbf') 
 24  >>> for line in rec: 
 25  >>>     print line['name'] 
 26   
 27   
 28  :date: 13/07/2007 
 29   
 30  http://www.physics.ox.ac.uk/users/santoso/Software.Repository.html 
 31  page says code is "available as is without any warranty or support". 
 32  """ 
 33   
 34  import struct 
 35  import os, os.path 
 36  import sys 
 37  import csv 
 38  import tempfile 
 39  import configparser 
 40   
41 -class Dbase:
42 - def __init__(self):
43 self.fdb = None 44 self.fmemo = None 45 self.db_data = None 46 self.memo_data = None 47 self.fields = None 48 self.num_records = 0 49 self.header = None 50 self.memo_file = '' 51 self.memo_header = None 52 self.memo_block_size = 0 53 self.memo_header_len = 0
54
55 - def _drop_after_NULL(self, txt):
56 for i in range(0, len(txt)): 57 if ord(struct.unpack('c', txt[i])[0])==0: 58 return txt[:i] 59 return txt
60
61 - def _reverse_endian(self, num):
62 if not len(num): 63 return 0 64 val = struct.unpack('<L', num) 65 val = struct.pack('>L', val[0]) 66 val = struct.unpack('>L', val) 67 return val[0]
68
69 - def _assign_ids(self, lst, ids):
70 result = {} 71 idx = 0 72 for item in lst: 73 id = ids[idx] 74 result[id] = item 75 idx += 1 76 return result
77
78 - def open(self, db_name):
79 filesize = os.path.getsize(db_name) 80 if filesize <= 68: 81 raise IOError('The file is not large enough to be a dbf file') 82 83 self.fdb = open(db_name, 'rb') 84 85 self.memo_file = '' 86 if os.path.isfile(db_name[0:-1] + 't'): 87 self.memo_file = db_name[0:-1] + 't' 88 elif os.path.isfile(db_name[0:-3] + 'fpt'): 89 self.memo_file = db_name[0:-3] + 'fpt' 90 91 if self.memo_file: 92 #Read memo file 93 self.fmemo = open(self.memo_file, 'rb') 94 self.memo_data = self.fmemo.read() 95 self.memo_header = self._assign_ids(struct.unpack('>6x1H', self.memo_data[:8]), ['Block size']) 96 block_size = self.memo_header['Block size'] 97 if not block_size: 98 block_size = 512 99 self.memo_block_size = block_size 100 self.memo_header_len = block_size 101 memo_size = os.path.getsize(self.memo_file) 102 103 #Start reading data file 104 data = self.fdb.read(32) 105 self.header = self._assign_ids(struct.unpack('<B 3B L 2H 20x', data), ['id', 'Year', 'Month', 'Day', '# of Records', 'Header Size', 'Record Size']) 106 self.header['id'] = hex(self.header['id']) 107 108 self.num_records = self.header['# of Records'] 109 data = self.fdb.read(self.header['Header Size']-34) 110 self.fields = {} 111 x = 0 112 header_pattern = '<11s c 4x B B 14x' 113 ids = ['Field Name', 'Field Type', 'Field Length', 'Field Precision'] 114 pattern_len = 32 115 for offset in range(0, len(data), 32): 116 if ord(data[offset])==0x0d: 117 break 118 x += 1 119 data_subset = data[offset: offset+pattern_len] 120 if len(data_subset) < pattern_len: 121 data_subset += ' '*(pattern_len-len(data_subset)) 122 self.fields[x] = self._assign_ids(struct.unpack(header_pattern, data_subset), ids) 123 self.fields[x]['Field Name'] = self._drop_after_NULL(self.fields[x]['Field Name']) 124 125 self.fdb.read(3) 126 if self.header['# of Records']: 127 data_size = (self.header['# of Records'] * self.header['Record Size']) - 1 128 self.db_data = self.fdb.read(data_size) 129 else: 130 self.db_data = '' 131 self.row_format = '<' 132 self.row_ids = [] 133 self.row_len = 0 134 for key in self.fields: 135 field = self.fields[key] 136 self.row_format += '%ds ' % (field['Field Length']) 137 self.row_ids.append(field['Field Name']) 138 self.row_len += field['Field Length']
139
140 - def close(self):
141 if self.fdb: 142 self.fdb.close() 143 if self.fmemo: 144 self.fmemo.close()
145
146 - def get_numrecords(self):
147 return self.num_records
148
149 - def get_record_with_names(self, rec_no):
150 """ 151 This function accept record number from 0 to N-1 152 """ 153 if rec_no < 0 or rec_no > self.num_records: 154 raise Exception('Unable to extract data outside the range') 155 156 offset = self.header['Record Size'] * rec_no 157 data = self.db_data[offset:offset+self.row_len] 158 record = self._assign_ids(struct.unpack(self.row_format, data), self.row_ids) 159 160 if self.memo_file: 161 for key in self.fields: 162 field = self.fields[key] 163 f_type = field['Field Type'] 164 f_name = field['Field Name'] 165 c_data = record[f_name] 166 167 if f_type=='M' or f_type=='G' or f_type=='B' or f_type=='P': 168 c_data = self._reverse_endian(c_data) 169 if c_data: 170 record[f_name] = self.read_memo(c_data-1).strip() 171 else: 172 record[f_name] = c_data.strip() 173 return record
174
175 - def read_memo_record(self, num, in_length):
176 """ 177 Read the record of given number. The second parameter is the length of 178 the record to read. It can be undefined, meaning read the whole record, 179 and it can be negative, meaning at most the length 180 """ 181 if in_length < 0: 182 in_length = -self.memo_block_size 183 184 offset = self.memo_header_len + num * self.memo_block_size 185 self.fmemo.seek(offset) 186 if in_length<0: 187 in_length = -in_length 188 if in_length==0: 189 return '' 190 return self.fmemo.read(in_length)
191
192 - def read_memo(self, num):
193 result = '' 194 buffer = self.read_memo_record(num, -1) 195 if len(buffer)<=0: 196 return '' 197 length = struct.unpack('>L', buffer[4:4+4])[0] + 8 198 199 block_size = self.memo_block_size 200 if length < block_size: 201 return buffer[8:length] 202 rest_length = length - block_size 203 rest_data = self.read_memo_record(num+1, rest_length) 204 if len(rest_data)<=0: 205 return '' 206 return buffer[8:] + rest_data
207
208 -def readDbf(filename):
209 """ 210 Read the DBF file specified by the filename and 211 return the records as a list of dictionary. 212 213 :param: filename File name of the DBF 214 :return: List of rows 215 """ 216 db = Dbase() 217 db.open(filename) 218 num = db.get_numrecords() 219 rec = [] 220 for i in range(0, num): 221 record = db.get_record_with_names(i) 222 rec.append(record) 223 db.close() 224 return rec
225 226 if __name__=='__main__': 227 rec = readDbf('dbf/sptable.dbf') 228 for line in rec: 229 print('%s %s' % (line['GENUS'].strip(), line['SPECIES'].strip())) 230