Package Gnumed :: Package pycommon :: Module gmI18N
[frames] | no frames]

Source Code for Module Gnumed.pycommon.gmI18N

  1  """GNUmed client internationalization/localization. 
  2   
  3  All i18n/l10n issues should be handled through this modules. 
  4   
  5  Theory of operation: 
  6   
  7  To activate proper locale settings and translation services you need to 
  8   
  9  - import this module 
 10  - call activate_locale() 
 11  - call install_domain() 
 12   
 13  The translating method gettext.gettext() will then be 
 14  installed into the global (!) namespace as _(). Your own 
 15  modules thus need not do _anything_ (not even import gmI18N) 
 16  to have _() available to them for translating strings. You 
 17  need to make sure, however, that gmI18N is imported in your 
 18  main module before any of the modules using it. In order to 
 19  resolve circular references involving modules that 
 20  absolutely _have_ to be imported before this module you can 
 21  explicitly import gmI18N into them at the very beginning. 
 22   
 23  The text domain (i.e. the name of the message catalog file) 
 24  is derived from the name of the main executing script unless 
 25  explicitly passed to install_domain(). The language you 
 26  want to translate to is derived from environment variables 
 27  by the locale system unless explicitly passed to 
 28  install_domain(). 
 29   
 30  This module searches for message catalog files in 3 main locations: 
 31   
 32   - standard POSIX places (/usr/share/locale/ ...) 
 33   - below "${YOURAPPNAME_DIR}/po/" 
 34   - below "<directory of binary of your app>/../po/" 
 35   
 36  For DOS/Windows I don't know of standard places so probably 
 37  only the last option will work. I don't know a thing about 
 38  classic Mac behaviour. New Macs are POSIX, of course. 
 39   
 40  It will then try to install candidates and *verify* whether 
 41  the translation works by checking for the translation of a 
 42  tag within itself (this is similar to the self-compiling 
 43  compiler inserting a backdoor into its self-compiled 
 44  copies). 
 45   
 46  If none of this works it will fall back to making _() a noop. 
 47   
 48  @copyright: authors 
 49  """ 
 50  #=========================================================================== 
 51  __author__ = "H. Herb <hherb@gnumed.net>, I. Haywood <i.haywood@ugrad.unimelb.edu.au>, K. Hilbert <Karsten.Hilbert@gmx.net>" 
 52  __license__ = "GPL v2 or later (details at http://www.gnu.org)" 
 53   
 54   
 55  # stdlib 
 56  import sys, os.path, os, re as regex, locale, gettext, logging, codecs 
 57   
 58   
 59  _log = logging.getLogger('gm.i18n') 
 60   
 61  system_locale = '' 
 62  system_locale_level = {} 
 63   
 64   
 65  _translate_original = lambda x:x 
 66  _substitutes_regex = regex.compile(r'%\(.+?\)s') 
 67   
 68  # ********************************************************** 
 69  # == do not remove this line =============================== 
 70  # it is needed to check for successful installation of 
 71  # the desired message catalog 
 72  # ********************************************************** 
 73  __orig_tag__ = u'Translate this or i18n into <en_EN> will not work properly !' 
 74  # ********************************************************** 
 75  # ********************************************************** 
 76   
 77  # Q: I can't use non-ascii characters in labels and menus. 
 78  # A: This can happen if your Python's system encoding is ASCII and 
 79  #    wxPython is non-unicode. Edit/create the file sitecustomize.py 
 80  #    (should be somewhere in your PYTHONPATH), and put these magic lines: 
 81  # 
 82  #       import sys 
 83  #       sys.setdefaultencoding('iso8859-1') # replace with encoding you want to be the default one 
 84   
 85  #=========================================================================== 
86 -def __split_locale_into_levels():
87 """Split locale into language, country and variant parts. 88 89 - we have observed the following formats in the wild: 90 - de_DE@euro 91 - ec_CA.UTF-8 92 - en_US:en 93 - German_Germany.1252 94 """ 95 _log.debug('splitting canonical locale [%s] into levels', system_locale) 96 97 global system_locale_level 98 system_locale_level['full'] = system_locale 99 # trim '@<variant>' part 100 system_locale_level['country'] = regex.split('@|:|\.', system_locale, 1)[0] 101 # trim '_<COUNTRY>@<variant>' part 102 system_locale_level['language'] = system_locale.split('_', 1)[0] 103 104 _log.debug('system locale levels: %s', system_locale_level)
105 #---------------------------------------------------------------------------
106 -def __log_locale_settings(message=None):
107 _setlocale_categories = {} 108 for category in 'LC_ALL LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split(): 109 try: 110 _setlocale_categories[category] = getattr(locale, category) 111 except: 112 _log.warning('this OS does not have locale.%s', category) 113 114 _getlocale_categories = {} 115 for category in 'LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split(): 116 try: 117 _getlocale_categories[category] = getattr(locale, category) 118 except: 119 pass 120 121 if message is not None: 122 _log.debug(message) 123 124 _log.debug('current locale settings:') 125 _log.debug('locale.get_locale(): %s' % str(locale.getlocale())) 126 for category in _getlocale_categories.keys(): 127 _log.debug('locale.get_locale(%s): %s' % (category, locale.getlocale(_getlocale_categories[category]))) 128 129 for category in _setlocale_categories.keys(): 130 _log.debug('(locale.set_locale(%s): %s)' % (category, locale.setlocale(_setlocale_categories[category]))) 131 132 try: 133 _log.debug('locale.getdefaultlocale() - default (user) locale: %s' % str(locale.getdefaultlocale())) 134 except ValueError: 135 _log.exception('the OS locale setup seems faulty') 136 137 _log.debug('encoding sanity check (also check "locale.nl_langinfo(CODESET)" below):') 138 pref_loc_enc = locale.getpreferredencoding(do_setlocale=False) 139 loc_enc = locale.getlocale()[1] 140 py_str_enc = sys.getdefaultencoding() 141 sys_fs_enc = sys.getfilesystemencoding() 142 _log.debug('sys.getdefaultencoding(): [%s]' % py_str_enc) 143 _log.debug('locale.getpreferredencoding(): [%s]' % pref_loc_enc) 144 _log.debug('locale.getlocale()[1]: [%s]' % loc_enc) 145 _log.debug('sys.getfilesystemencoding(): [%s]' % sys_fs_enc) 146 if loc_enc is not None: 147 loc_enc = loc_enc.upper() 148 loc_enc_compare = loc_enc.replace(u'-', u'') 149 else: 150 loc_enc_compare = loc_enc 151 if pref_loc_enc.upper().replace(u'-', u'') != loc_enc_compare: 152 _log.warning('encoding suggested by locale (%s) does not match encoding currently set in locale (%s)' % (pref_loc_enc, loc_enc)) 153 _log.warning('this might lead to encoding errors') 154 for enc in [pref_loc_enc, loc_enc, py_str_enc, sys_fs_enc]: 155 if enc is not None: 156 try: 157 codecs.lookup(enc) 158 _log.debug('<codecs> module CAN handle encoding [%s]' % enc) 159 except LookupError: 160 _log.warning('<codecs> module can NOT handle encoding [%s]' % enc) 161 _log.debug('on Linux you can determine a likely candidate for the encoding by running "locale charmap"') 162 163 _log.debug('locale related environment variables (${LANG} is typically used):') 164 for var in 'LANGUAGE LC_ALL LC_CTYPE LANG'.split(): 165 try: 166 _log.debug('${%s}=%s' % (var, os.environ[var])) 167 except KeyError: 168 _log.debug('${%s} not set' % (var)) 169 170 _log.debug('database of locale conventions:') 171 data = locale.localeconv() 172 for key in data.keys(): 173 if loc_enc is None: 174 _log.debug(u'locale.localeconv(%s): %s', key, data[key]) 175 else: 176 try: 177 _log.debug(u'locale.localeconv(%s): %s', key, unicode(data[key])) 178 except UnicodeDecodeError: 179 _log.debug(u'locale.localeconv(%s): %s', key, unicode(data[key], loc_enc)) 180 _nl_langinfo_categories = {} 181 for category in 'CODESET D_T_FMT D_FMT T_FMT T_FMT_AMPM RADIXCHAR THOUSEP YESEXPR NOEXPR CRNCYSTR ERA ERA_D_T_FMT ERA_D_FMT ALT_DIGITS'.split(): 182 try: 183 _nl_langinfo_categories[category] = getattr(locale, category) 184 except: 185 _log.warning('this OS does not support nl_langinfo category locale.%s' % category) 186 try: 187 for category in _nl_langinfo_categories.keys(): 188 if loc_enc is None: 189 _log.debug('locale.nl_langinfo(%s): %s' % (category, locale.nl_langinfo(_nl_langinfo_categories[category]))) 190 else: 191 try: 192 _log.debug(u'locale.nl_langinfo(%s): %s', category, unicode(locale.nl_langinfo(_nl_langinfo_categories[category]))) 193 except UnicodeDecodeError: 194 _log.debug(u'locale.nl_langinfo(%s): %s', category, unicode(locale.nl_langinfo(_nl_langinfo_categories[category]), loc_enc)) 195 except: 196 _log.exception('this OS does not support nl_langinfo') 197 198 _log.debug('gmI18N.get_encoding(): %s', get_encoding())
199 #---------------------------------------------------------------------------
200 -def _translate_protected(term):
201 """This wraps _(). 202 203 It protects against translation errors such as a different number of "%s". 204 """ 205 translation = _translate_original(term) 206 207 # different number of %s substitutes ? 208 if translation.count(u'%s') != term.count(u'%s'): 209 _log.error('count("%s") mismatch, returning untranslated string') 210 _log.error('original : %s', term) 211 _log.error('translation: %s', translation) 212 return term 213 214 term_substitutes = _substitutes_regex.findall(term) 215 trans_substitutes = _substitutes_regex.findall(translation) 216 217 # different number of %(...)s substitutes ? 218 if len(term_substitutes) != len(trans_substitutes): 219 _log.error('count("%(...)s") mismatch, returning untranslated string') 220 _log.error('original : %s', term) 221 _log.error('translation: %s', translation) 222 return term 223 224 # different %(...)s substitutes ? 225 if set(term_substitutes) != set(trans_substitutes): 226 _log.error('"%(...)s" name mismatch, returning untranslated string') 227 _log.error('original : %s', term) 228 _log.error('translation: %s', translation) 229 return term 230 231 return translation
232 #--------------------------------------------------------------------------- 233 # external API 234 #---------------------------------------------------------------------------
235 -def activate_locale():
236 """Get system locale from environment.""" 237 global system_locale 238 239 # logging state of affairs 240 __log_locale_settings('unmodified startup locale settings (should be [C])') 241 242 # activate user-preferred locale 243 loc, enc = None, None 244 try: 245 # check whether already set 246 loc, loc_enc = locale.getlocale() 247 if loc is None: 248 loc = locale.setlocale(locale.LC_ALL, '') 249 _log.debug("activating user-default locale with <locale.setlocale(locale.LC_ALL, '')> returns: [%s]" % loc) 250 else: 251 _log.info('user-default locale already activated') 252 loc, loc_enc = locale.getlocale() 253 except AttributeError: 254 _log.exception('Windows does not support locale.LC_ALL') 255 except: 256 _log.exception('error activating user-default locale') 257 258 # logging state of affairs 259 __log_locale_settings('locale settings after activating user-default locale') 260 261 # did we find any locale setting ? assume en_EN if not 262 if loc in [None, 'C']: 263 _log.error('the current system locale is still [None] or [C], assuming [en_EN]') 264 system_locale = "en_EN" 265 else: 266 system_locale = loc 267 268 # generate system locale levels 269 __split_locale_into_levels() 270 271 return True
272 #---------------------------------------------------------------------------
273 -def install_domain(domain=None, language=None, prefer_local_catalog=False):
274 """Install a text domain suitable for the main script.""" 275 276 # text domain directly specified ? 277 if domain is None: 278 _log.info('domain not specified, deriving from script name') 279 # get text domain from name of script 280 domain = os.path.splitext(os.path.basename(sys.argv[0]))[0] 281 _log.info('text domain is [%s]' % domain) 282 283 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap08.html 284 _log.debug('searching message catalog file for system locale [%s]' % system_locale) 285 286 for env_var in ['LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG']: 287 tmp = os.getenv(env_var) 288 if env_var is None: 289 _log.debug('${%s} not set' % env_var) 290 else: 291 _log.debug('${%s} = [%s]' % (env_var, tmp)) 292 293 if language is not None: 294 _log.info('explicit setting of ${LANG} requested: [%s]' % language) 295 _log.info('this will override the system locale language setting') 296 os.environ['LANG'] = language 297 298 # search for message catalog 299 candidates = [] 300 301 # - locally 302 if prefer_local_catalog: 303 _log.debug('preferring local message catalog') 304 # - one level above path to binary 305 # last resort for inferior operating systems such as DOS/Windows 306 # strip one directory level 307 # this is a rather neat trick :-) 308 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po')) 309 _log.debug('looking above binary install directory [%s]' % loc_dir) 310 candidates.append(loc_dir) 311 # - in path to binary 312 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po')) 313 _log.debug('looking in binary install directory [%s]' % loc_dir) 314 candidates.append(loc_dir) 315 316 # - standard places 317 if os.name == 'posix': 318 _log.debug('system is POSIX, looking in standard locations (see Python Manual)') 319 # if this is reported to segfault/fail/except on some 320 # systems we may have to assume "sys.prefix/share/locale/" 321 candidates.append(gettext.bindtextdomain(domain)) 322 else: 323 _log.debug('No use looking in standard POSIX locations - not a POSIX system.') 324 325 # - $(<script-name>_DIR)/ 326 env_key = "%s_DIR" % os.path.splitext(os.path.basename(sys.argv[0]))[0].upper() 327 _log.debug('looking at ${%s}' % env_key) 328 if os.environ.has_key(env_key): 329 loc_dir = os.path.abspath(os.path.join(os.environ[env_key], 'po')) 330 _log.debug('${%s} = "%s" -> [%s]' % (env_key, os.environ[env_key], loc_dir)) 331 candidates.append(loc_dir) 332 else: 333 _log.info("${%s} not set" % env_key) 334 335 # - locally 336 if not prefer_local_catalog: 337 # - one level above path to binary 338 # last resort for inferior operating systems such as DOS/Windows 339 # strip one directory level 340 # this is a rather neat trick :-) 341 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po')) 342 _log.debug('looking above binary install directory [%s]' % loc_dir) 343 candidates.append(loc_dir) 344 # - in path to binary 345 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po' )) 346 _log.debug('looking in binary install directory [%s]' % loc_dir) 347 candidates.append(loc_dir) 348 349 # now try to actually install it 350 for candidate in candidates: 351 _log.debug('trying [%s](/%s/LC_MESSAGES/%s.mo)', candidate, system_locale, domain) 352 if not os.path.exists(candidate): 353 continue 354 try: 355 gettext.install(domain, candidate, unicode=1) 356 except: 357 _log.exception('installing text domain [%s] failed from [%s]', domain, candidate) 358 continue 359 global _ 360 # does it translate ? 361 if _(__orig_tag__) == __orig_tag__: 362 _log.debug('does not translate: [%s] => [%s]', __orig_tag__, _(__orig_tag__)) 363 continue 364 else: 365 _log.debug('found msg catalog: [%s] => [%s]', __orig_tag__, _(__orig_tag__)) 366 import __builtin__ 367 global _translate_original 368 _translate_original = __builtin__._ 369 __builtin__._ = _translate_protected 370 return True 371 372 # 5) install a dummy translation class 373 _log.warning("falling back to NullTranslations() class") 374 # this shouldn't fail 375 dummy = gettext.NullTranslations() 376 dummy.install() 377 return True
378 #=========================================================================== 379 _encoding_mismatch_already_logged = False 380 _current_encoding = None 381
382 -def get_encoding():
383 """Try to get a sane encoding. 384 385 On MaxOSX locale.setlocale(locale.LC_ALL, '') does not 386 have the desired effect, so that locale.getlocale()[1] 387 still returns None. So in that case try to fallback to 388 locale.getpreferredencoding(). 389 390 <sys.getdefaultencoding()> 391 - what Python itself uses to convert string <-> unicode 392 when no other encoding was specified 393 - ascii by default 394 - can be set in site.py and sitecustomize.py 395 <locale.getlocale()[1]> 396 - what the current locale is *actually* using 397 as the encoding for text conversion 398 <locale.getpreferredencoding()> 399 - what the current locale would *recommend* using 400 as the encoding for text conversion 401 """ 402 global _current_encoding 403 if _current_encoding is not None: 404 return _current_encoding 405 406 enc = sys.getdefaultencoding() 407 if enc != 'ascii': 408 _current_encoding = enc 409 return _current_encoding 410 411 enc = locale.getlocale()[1] 412 if enc is not None: 413 _current_encoding = enc 414 return _current_encoding 415 416 global _encoding_mismatch_already_logged 417 if not _encoding_mismatch_already_logged: 418 _log.debug('*actual* encoding of locale is None, using encoding *recommended* by locale') 419 _encoding_mismatch_already_logged = True 420 421 return locale.getpreferredencoding(do_setlocale=False)
422 #=========================================================================== 423 # Main 424 #--------------------------------------------------------------------------- 425 if __name__ == "__main__": 426 427 if len(sys.argv) == 1: 428 sys.exit() 429 430 if sys.argv[1] != u'test': 431 sys.exit() 432 433 logging.basicConfig(level = logging.DEBUG) 434 435 print "======================================================================" 436 print "GNUmed i18n" 437 print "" 438 print "authors:", __author__ 439 print "license:", __license__, "; version:", __version__ 440 print "======================================================================" 441 442 activate_locale() 443 print "system locale: ", system_locale, "; levels:", system_locale_level 444 print "likely encoding:", get_encoding() 445 446 if len(sys.argv) > 1: 447 install_domain(domain = sys.argv[2]) 448 else: 449 install_domain() 450 # ******************************************************** 451 # == do not remove this line ============================= 452 # it is needed to check for successful installation of 453 # the desired message catalog 454 # ******************************************************** 455 tmp = _('Translate this or i18n into <en_EN> will not work properly !') 456 # ******************************************************** 457 # ******************************************************** 458 459 #===================================================================== 460