1 """GNUmed client internationalization/localization.
2
3 All i18n/l10n issues should be handled through this modules.
4
5 Theory of operation:
6
7 To activate proper locale settings and translation services you need to
8
9 - import this module
10 - call activate_locale()
11 - call install_domain()
12
13 The translating method gettext.gettext() will then be
14 installed into the global (!) namespace as _(). Your own
15 modules thus need not do _anything_ (not even import gmI18N)
16 to have _() available to them for translating strings. You
17 need to make sure, however, that gmI18N is imported in your
18 main module before any of the modules using it. In order to
19 resolve circular references involving modules that
20 absolutely _have_ to be imported before this module you can
21 explicitly import gmI18N into them at the very beginning.
22
23 The text domain (i.e. the name of the message catalog file)
24 is derived from the name of the main executing script unless
25 explicitly passed to install_domain(). The language you
26 want to translate to is derived from environment variables
27 by the locale system unless explicitly passed to
28 install_domain().
29
30 This module searches for message catalog files in 3 main locations:
31
32 - standard POSIX places (/usr/share/locale/ ...)
33 - below "${YOURAPPNAME_DIR}/po/"
34 - below "<directory of binary of your app>/../po/"
35
36 For DOS/Windows I don't know of standard places so probably
37 only the last option will work. I don't know a thing about
38 classic Mac behaviour. New Macs are POSIX, of course.
39
40 It will then try to install candidates and *verify* whether
41 the translation works by checking for the translation of a
42 tag within itself (this is similar to the self-compiling
43 compiler inserting a backdoor into its self-compiled
44 copies).
45
46 If none of this works it will fall back to making _() a noop.
47
48 @copyright: authors
49 """
50
51 __author__ = "H. Herb <hherb@gnumed.net>, I. Haywood <i.haywood@ugrad.unimelb.edu.au>, K. Hilbert <Karsten.Hilbert@gmx.net>"
52 __license__ = "GPL v2 or later (details at http://www.gnu.org)"
53
54
55
56 import sys, os.path, os, re as regex, locale, gettext, logging, codecs
57
58
59 _log = logging.getLogger('gm.i18n')
60
61 system_locale = ''
62 system_locale_level = {}
63
64
65 _translate_original = lambda x:x
66 _substitutes_regex = regex.compile(r'%\(.+?\)s')
67
68
69
70
71
72
73 __orig_tag__ = u'Translate this or i18n into <en_EN> will not work properly !'
74
75
76
77
78
79
80
81
82
83
84
85
105
107 _setlocale_categories = {}
108 for category in 'LC_ALL LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split():
109 try:
110 _setlocale_categories[category] = getattr(locale, category)
111 except:
112 _log.warning('this OS does not have locale.%s', category)
113
114 _getlocale_categories = {}
115 for category in 'LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split():
116 try:
117 _getlocale_categories[category] = getattr(locale, category)
118 except:
119 pass
120
121 if message is not None:
122 _log.debug(message)
123
124 _log.debug('current locale settings:')
125 _log.debug('locale.get_locale(): %s' % str(locale.getlocale()))
126 for category in _getlocale_categories.keys():
127 _log.debug('locale.get_locale(%s): %s' % (category, locale.getlocale(_getlocale_categories[category])))
128
129 for category in _setlocale_categories.keys():
130 _log.debug('(locale.set_locale(%s): %s)' % (category, locale.setlocale(_setlocale_categories[category])))
131
132 try:
133 _log.debug('locale.getdefaultlocale() - default (user) locale: %s' % str(locale.getdefaultlocale()))
134 except ValueError:
135 _log.exception('the OS locale setup seems faulty')
136
137 _log.debug('encoding sanity check (also check "locale.nl_langinfo(CODESET)" below):')
138 pref_loc_enc = locale.getpreferredencoding(do_setlocale=False)
139 loc_enc = locale.getlocale()[1]
140 py_str_enc = sys.getdefaultencoding()
141 sys_fs_enc = sys.getfilesystemencoding()
142 _log.debug('sys.getdefaultencoding(): [%s]' % py_str_enc)
143 _log.debug('locale.getpreferredencoding(): [%s]' % pref_loc_enc)
144 _log.debug('locale.getlocale()[1]: [%s]' % loc_enc)
145 _log.debug('sys.getfilesystemencoding(): [%s]' % sys_fs_enc)
146 if loc_enc is not None:
147 loc_enc = loc_enc.upper()
148 loc_enc_compare = loc_enc.replace(u'-', u'')
149 else:
150 loc_enc_compare = loc_enc
151 if pref_loc_enc.upper().replace(u'-', u'') != loc_enc_compare:
152 _log.warning('encoding suggested by locale (%s) does not match encoding currently set in locale (%s)' % (pref_loc_enc, loc_enc))
153 _log.warning('this might lead to encoding errors')
154 for enc in [pref_loc_enc, loc_enc, py_str_enc, sys_fs_enc]:
155 if enc is not None:
156 try:
157 codecs.lookup(enc)
158 _log.debug('<codecs> module CAN handle encoding [%s]' % enc)
159 except LookupError:
160 _log.warning('<codecs> module can NOT handle encoding [%s]' % enc)
161 _log.debug('on Linux you can determine a likely candidate for the encoding by running "locale charmap"')
162
163 _log.debug('locale related environment variables (${LANG} is typically used):')
164 for var in 'LANGUAGE LC_ALL LC_CTYPE LANG'.split():
165 try:
166 _log.debug('${%s}=%s' % (var, os.environ[var]))
167 except KeyError:
168 _log.debug('${%s} not set' % (var))
169
170 _log.debug('database of locale conventions:')
171 data = locale.localeconv()
172 for key in data.keys():
173 if loc_enc is None:
174 _log.debug(u'locale.localeconv(%s): %s', key, data[key])
175 else:
176 try:
177 _log.debug(u'locale.localeconv(%s): %s', key, unicode(data[key]))
178 except UnicodeDecodeError:
179 _log.debug(u'locale.localeconv(%s): %s', key, unicode(data[key], loc_enc))
180 _nl_langinfo_categories = {}
181 for category in 'CODESET D_T_FMT D_FMT T_FMT T_FMT_AMPM RADIXCHAR THOUSEP YESEXPR NOEXPR CRNCYSTR ERA ERA_D_T_FMT ERA_D_FMT ALT_DIGITS'.split():
182 try:
183 _nl_langinfo_categories[category] = getattr(locale, category)
184 except:
185 _log.warning('this OS does not support nl_langinfo category locale.%s' % category)
186 try:
187 for category in _nl_langinfo_categories.keys():
188 if loc_enc is None:
189 _log.debug('locale.nl_langinfo(%s): %s' % (category, locale.nl_langinfo(_nl_langinfo_categories[category])))
190 else:
191 try:
192 _log.debug(u'locale.nl_langinfo(%s): %s', category, unicode(locale.nl_langinfo(_nl_langinfo_categories[category])))
193 except UnicodeDecodeError:
194 _log.debug(u'locale.nl_langinfo(%s): %s', category, unicode(locale.nl_langinfo(_nl_langinfo_categories[category]), loc_enc))
195 except:
196 _log.exception('this OS does not support nl_langinfo')
197
198 _log.debug('gmI18N.get_encoding(): %s', get_encoding())
199
201 """This wraps _().
202
203 It protects against translation errors such as a different number of "%s".
204 """
205 translation = _translate_original(term)
206
207
208 if translation.count(u'%s') != term.count(u'%s'):
209 _log.error('count("%s") mismatch, returning untranslated string')
210 _log.error('original : %s', term)
211 _log.error('translation: %s', translation)
212 return term
213
214 term_substitutes = _substitutes_regex.findall(term)
215 trans_substitutes = _substitutes_regex.findall(translation)
216
217
218 if len(term_substitutes) != len(trans_substitutes):
219 _log.error('count("%(...)s") mismatch, returning untranslated string')
220 _log.error('original : %s', term)
221 _log.error('translation: %s', translation)
222 return term
223
224
225 if set(term_substitutes) != set(trans_substitutes):
226 _log.error('"%(...)s" name mismatch, returning untranslated string')
227 _log.error('original : %s', term)
228 _log.error('translation: %s', translation)
229 return term
230
231 return translation
232
233
234
236 """Get system locale from environment."""
237 global system_locale
238
239
240 __log_locale_settings('unmodified startup locale settings (should be [C])')
241
242
243 loc, enc = None, None
244 try:
245
246 loc, loc_enc = locale.getlocale()
247 if loc is None:
248 loc = locale.setlocale(locale.LC_ALL, '')
249 _log.debug("activating user-default locale with <locale.setlocale(locale.LC_ALL, '')> returns: [%s]" % loc)
250 else:
251 _log.info('user-default locale already activated')
252 loc, loc_enc = locale.getlocale()
253 except AttributeError:
254 _log.exception('Windows does not support locale.LC_ALL')
255 except:
256 _log.exception('error activating user-default locale')
257
258
259 __log_locale_settings('locale settings after activating user-default locale')
260
261
262 if loc in [None, 'C']:
263 _log.error('the current system locale is still [None] or [C], assuming [en_EN]')
264 system_locale = "en_EN"
265 else:
266 system_locale = loc
267
268
269 __split_locale_into_levels()
270
271 return True
272
273 -def install_domain(domain=None, language=None, prefer_local_catalog=False):
274 """Install a text domain suitable for the main script."""
275
276
277 if domain is None:
278 _log.info('domain not specified, deriving from script name')
279
280 domain = os.path.splitext(os.path.basename(sys.argv[0]))[0]
281 _log.info('text domain is [%s]' % domain)
282
283
284 _log.debug('searching message catalog file for system locale [%s]' % system_locale)
285
286 for env_var in ['LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG']:
287 tmp = os.getenv(env_var)
288 if env_var is None:
289 _log.debug('${%s} not set' % env_var)
290 else:
291 _log.debug('${%s} = [%s]' % (env_var, tmp))
292
293 if language is not None:
294 _log.info('explicit setting of ${LANG} requested: [%s]' % language)
295 _log.info('this will override the system locale language setting')
296 os.environ['LANG'] = language
297
298
299 candidates = []
300
301
302 if prefer_local_catalog:
303 _log.debug('preferring local message catalog')
304
305
306
307
308 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po'))
309 _log.debug('looking above binary install directory [%s]' % loc_dir)
310 candidates.append(loc_dir)
311
312 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po'))
313 _log.debug('looking in binary install directory [%s]' % loc_dir)
314 candidates.append(loc_dir)
315
316
317 if os.name == 'posix':
318 _log.debug('system is POSIX, looking in standard locations (see Python Manual)')
319
320
321 candidates.append(gettext.bindtextdomain(domain))
322 else:
323 _log.debug('No use looking in standard POSIX locations - not a POSIX system.')
324
325
326 env_key = "%s_DIR" % os.path.splitext(os.path.basename(sys.argv[0]))[0].upper()
327 _log.debug('looking at ${%s}' % env_key)
328 if os.environ.has_key(env_key):
329 loc_dir = os.path.abspath(os.path.join(os.environ[env_key], 'po'))
330 _log.debug('${%s} = "%s" -> [%s]' % (env_key, os.environ[env_key], loc_dir))
331 candidates.append(loc_dir)
332 else:
333 _log.info("${%s} not set" % env_key)
334
335
336 if not prefer_local_catalog:
337
338
339
340
341 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po'))
342 _log.debug('looking above binary install directory [%s]' % loc_dir)
343 candidates.append(loc_dir)
344
345 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po' ))
346 _log.debug('looking in binary install directory [%s]' % loc_dir)
347 candidates.append(loc_dir)
348
349
350 for candidate in candidates:
351 _log.debug('trying [%s](/%s/LC_MESSAGES/%s.mo)', candidate, system_locale, domain)
352 if not os.path.exists(candidate):
353 continue
354 try:
355 gettext.install(domain, candidate, unicode=1)
356 except:
357 _log.exception('installing text domain [%s] failed from [%s]', domain, candidate)
358 continue
359 global _
360
361 if _(__orig_tag__) == __orig_tag__:
362 _log.debug('does not translate: [%s] => [%s]', __orig_tag__, _(__orig_tag__))
363 continue
364 else:
365 _log.debug('found msg catalog: [%s] => [%s]', __orig_tag__, _(__orig_tag__))
366 import __builtin__
367 global _translate_original
368 _translate_original = __builtin__._
369 __builtin__._ = _translate_protected
370 return True
371
372
373 _log.warning("falling back to NullTranslations() class")
374
375 dummy = gettext.NullTranslations()
376 dummy.install()
377 return True
378
379 _encoding_mismatch_already_logged = False
380 _current_encoding = None
381
383 """Try to get a sane encoding.
384
385 On MaxOSX locale.setlocale(locale.LC_ALL, '') does not
386 have the desired effect, so that locale.getlocale()[1]
387 still returns None. So in that case try to fallback to
388 locale.getpreferredencoding().
389
390 <sys.getdefaultencoding()>
391 - what Python itself uses to convert string <-> unicode
392 when no other encoding was specified
393 - ascii by default
394 - can be set in site.py and sitecustomize.py
395 <locale.getlocale()[1]>
396 - what the current locale is *actually* using
397 as the encoding for text conversion
398 <locale.getpreferredencoding()>
399 - what the current locale would *recommend* using
400 as the encoding for text conversion
401 """
402 global _current_encoding
403 if _current_encoding is not None:
404 return _current_encoding
405
406 enc = sys.getdefaultencoding()
407 if enc != 'ascii':
408 _current_encoding = enc
409 return _current_encoding
410
411 enc = locale.getlocale()[1]
412 if enc is not None:
413 _current_encoding = enc
414 return _current_encoding
415
416 global _encoding_mismatch_already_logged
417 if not _encoding_mismatch_already_logged:
418 _log.debug('*actual* encoding of locale is None, using encoding *recommended* by locale')
419 _encoding_mismatch_already_logged = True
420
421 return locale.getpreferredencoding(do_setlocale=False)
422
423
424
425 if __name__ == "__main__":
426
427 if len(sys.argv) == 1:
428 sys.exit()
429
430 if sys.argv[1] != u'test':
431 sys.exit()
432
433 logging.basicConfig(level = logging.DEBUG)
434
435 print "======================================================================"
436 print "GNUmed i18n"
437 print ""
438 print "authors:", __author__
439 print "license:", __license__, "; version:", __version__
440 print "======================================================================"
441
442 activate_locale()
443 print "system locale: ", system_locale, "; levels:", system_locale_level
444 print "likely encoding:", get_encoding()
445
446 if len(sys.argv) > 1:
447 install_domain(domain = sys.argv[2])
448 else:
449 install_domain()
450
451
452
453
454
455 tmp = _('Translate this or i18n into <en_EN> will not work properly !')
456
457
458
459
460