12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835 |
- # -* coding: utf-8 -*-
- #
- # License: MIT (see LICENSE file provided)
- # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
-
- """
- **polib** allows you to manipulate, create, modify gettext files (pot, po and
- mo files). You can load existing files, iterate through it's entries, add,
- modify entries, comments or metadata, etc. or create new po files from scratch.
-
- **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
- :func:`~polib.mofile` convenience functions.
- """
-
- __author__ = 'David Jean Louis <izimobil@gmail.com>'
- __version__ = '1.0.7'
- __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
- 'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
-
- import array
- import codecs
- import os
- import re
- import struct
- import sys
- import textwrap
-
- try:
- import io
- except ImportError:
- # replacement of io.open() for python < 2.6
- # we use codecs instead
- class io(object):
- @staticmethod
- def open(fpath, mode='r', encoding=None):
- return codecs.open(fpath, mode, encoding)
-
-
- # the default encoding to use when encoding cannot be detected
- default_encoding = 'utf-8'
-
- # python 2/3 compatibility helpers {{{
-
-
- if sys.version_info[:2] < (3, 0):
- PY3 = False
- text_type = unicode
-
- def b(s):
- return s
-
- def u(s):
- return unicode(s, "unicode_escape")
-
- else:
- PY3 = True
- text_type = str
-
- def b(s):
- return s.encode("latin-1")
-
- def u(s):
- return s
- # }}}
- # _pofile_or_mofile {{{
-
-
- def _pofile_or_mofile(f, type, **kwargs):
- """
- Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
- honor the DRY concept.
- """
- # get the file encoding
- enc = kwargs.get('encoding')
- if enc is None:
- enc = detect_encoding(f, type == 'mofile')
-
- # parse the file
- kls = type == 'pofile' and _POFileParser or _MOFileParser
- parser = kls(
- f,
- encoding=enc,
- check_for_duplicates=kwargs.get('check_for_duplicates', False),
- klass=kwargs.get('klass')
- )
- instance = parser.parse()
- instance.wrapwidth = kwargs.get('wrapwidth', 78)
- return instance
- # }}}
- # _is_file {{{
-
-
- def _is_file(filename_or_contents):
- """
- Safely returns the value of os.path.exists(filename_or_contents).
-
- Arguments:
-
- ``filename_or_contents``
- either a filename, or a string holding the contents of some file.
- In the latter case, this function will always return False.
- """
- try:
- return os.path.exists(filename_or_contents)
- except (ValueError, UnicodeEncodeError):
- return False
- # }}}
- # function pofile() {{{
-
-
- def pofile(pofile, **kwargs):
- """
- Convenience function that parses the po or pot file ``pofile`` and returns
- a :class:`~polib.POFile` instance.
-
- Arguments:
-
- ``pofile``
- string, full or relative path to the po/pot file or its content (data).
-
- ``wrapwidth``
- integer, the wrap width, only useful when the ``-w`` option was passed
- to xgettext (optional, default: ``78``).
-
- ``encoding``
- string, the encoding to use (e.g. "utf-8") (default: ``None``, the
- encoding will be auto-detected).
-
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file (optional, default: ``False``).
-
- ``klass``
- class which is used to instantiate the return value (optional,
- default: ``None``, the return value with be a :class:`~polib.POFile`
- instance).
- """
- return _pofile_or_mofile(pofile, 'pofile', **kwargs)
- # }}}
- # function mofile() {{{
-
-
- def mofile(mofile, **kwargs):
- """
- Convenience function that parses the mo file ``mofile`` and returns a
- :class:`~polib.MOFile` instance.
-
- Arguments:
-
- ``mofile``
- string, full or relative path to the mo file or its content (data).
-
- ``wrapwidth``
- integer, the wrap width, only useful when the ``-w`` option was passed
- to xgettext to generate the po file that was used to format the mo file
- (optional, default: ``78``).
-
- ``encoding``
- string, the encoding to use (e.g. "utf-8") (default: ``None``, the
- encoding will be auto-detected).
-
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file (optional, default: ``False``).
-
- ``klass``
- class which is used to instantiate the return value (optional,
- default: ``None``, the return value with be a :class:`~polib.POFile`
- instance).
- """
- return _pofile_or_mofile(mofile, 'mofile', **kwargs)
- # }}}
- # function detect_encoding() {{{
-
-
- def detect_encoding(file, binary_mode=False):
- """
- Try to detect the encoding used by the ``file``. The ``file`` argument can
- be a PO or MO file path or a string containing the contents of the file.
- If the encoding cannot be detected, the function will return the value of
- ``default_encoding``.
-
- Arguments:
-
- ``file``
- string, full or relative path to the po/mo file or its content.
-
- ``binary_mode``
- boolean, set this to True if ``file`` is a mo file.
- """
- PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
- rxt = re.compile(u(PATTERN))
- rxb = re.compile(b(PATTERN))
-
- def charset_exists(charset):
- """Check whether ``charset`` is valid or not."""
- try:
- codecs.lookup(charset)
- except LookupError:
- return False
- return True
-
- if not _is_file(file):
- match = rxt.search(file)
- if match:
- enc = match.group(1).strip()
- if charset_exists(enc):
- return enc
- else:
- # For PY3, always treat as binary
- if binary_mode or PY3:
- mode = 'rb'
- rx = rxb
- else:
- mode = 'r'
- rx = rxt
- f = open(file, mode)
- for l in f.readlines():
- match = rx.search(l)
- if match:
- f.close()
- enc = match.group(1).strip()
- if not isinstance(enc, text_type):
- enc = enc.decode('utf-8')
- if charset_exists(enc):
- return enc
- f.close()
- return default_encoding
- # }}}
- # function escape() {{{
-
-
- def escape(st):
- """
- Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
- the given string ``st`` and returns it.
- """
- return st.replace('\\', r'\\')\
- .replace('\t', r'\t')\
- .replace('\r', r'\r')\
- .replace('\n', r'\n')\
- .replace('\"', r'\"')
- # }}}
- # function unescape() {{{
-
-
- def unescape(st):
- """
- Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
- the given string ``st`` and returns it.
- """
- def unescape_repl(m):
- m = m.group(1)
- if m == 'n':
- return '\n'
- if m == 't':
- return '\t'
- if m == 'r':
- return '\r'
- if m == '\\':
- return '\\'
- return m # handles escaped double quote
- return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
- # }}}
- # class _BaseFile {{{
-
-
- class _BaseFile(list):
- """
- Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
- classes. This class should **not** be instantiated directly.
- """
-
- def __init__(self, *args, **kwargs):
- """
- Constructor, accepts the following keyword arguments:
-
- ``pofile``
- string, the path to the po or mo file, or its content as a string.
-
- ``wrapwidth``
- integer, the wrap width, only useful when the ``-w`` option was
- passed to xgettext (optional, default: ``78``).
-
- ``encoding``
- string, the encoding to use, defaults to ``default_encoding``
- global variable (optional).
-
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file, (optional, default: ``False``).
- """
- list.__init__(self)
- # the opened file handle
- pofile = kwargs.get('pofile', None)
- if pofile and _is_file(pofile):
- self.fpath = pofile
- else:
- self.fpath = kwargs.get('fpath')
- # the width at which lines should be wrapped
- self.wrapwidth = kwargs.get('wrapwidth', 78)
- # the file encoding
- self.encoding = kwargs.get('encoding', default_encoding)
- # whether to check for duplicate entries or not
- self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
- # header
- self.header = ''
- # both po and mo files have metadata
- self.metadata = {}
- self.metadata_is_fuzzy = 0
-
- def __unicode__(self):
- """
- Returns the unicode representation of the file.
- """
- ret = []
- entries = [self.metadata_as_entry()] + \
- [e for e in self if not e.obsolete]
- for entry in entries:
- ret.append(entry.__unicode__(self.wrapwidth))
- for entry in self.obsolete_entries():
- ret.append(entry.__unicode__(self.wrapwidth))
- ret = u('\n').join(ret)
-
- assert isinstance(ret, text_type)
- #if type(ret) != text_type:
- # return unicode(ret, self.encoding)
- return ret
-
- if PY3:
- def __str__(self):
- return self.__unicode__()
- else:
- def __str__(self):
- """
- Returns the string representation of the file.
- """
- return unicode(self).encode(self.encoding)
-
- def __contains__(self, entry):
- """
- Overridden ``list`` method to implement the membership test (in and
- not in).
- The method considers that an entry is in the file if it finds an entry
- that has the same msgid (the test is **case sensitive**) and the same
- msgctxt (or none for both entries).
-
- Argument:
-
- ``entry``
- an instance of :class:`~polib._BaseEntry`.
- """
- return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
- is not None
-
- def __eq__(self, other):
- return str(self) == str(other)
-
- def append(self, entry):
- """
- Overridden method to check for duplicates entries, if a user tries to
- add an entry that is already in the file, the method will raise a
- ``ValueError`` exception.
-
- Argument:
-
- ``entry``
- an instance of :class:`~polib._BaseEntry`.
- """
- if self.check_for_duplicates and entry in self:
- raise ValueError('Entry "%s" already exists' % entry.msgid)
- super(_BaseFile, self).append(entry)
-
- def insert(self, index, entry):
- """
- Overridden method to check for duplicates entries, if a user tries to
- add an entry that is already in the file, the method will raise a
- ``ValueError`` exception.
-
- Arguments:
-
- ``index``
- index at which the entry should be inserted.
-
- ``entry``
- an instance of :class:`~polib._BaseEntry`.
- """
- if self.check_for_duplicates and entry in self:
- raise ValueError('Entry "%s" already exists' % entry.msgid)
- super(_BaseFile, self).insert(index, entry)
-
- def metadata_as_entry(self):
- """
- Returns the file metadata as a :class:`~polib.POFile` instance.
- """
- e = POEntry(msgid='')
- mdata = self.ordered_metadata()
- if mdata:
- strs = []
- for name, value in mdata:
- # Strip whitespace off each line in a multi-line entry
- strs.append('%s: %s' % (name, value))
- e.msgstr = '\n'.join(strs) + '\n'
- if self.metadata_is_fuzzy:
- e.flags.append('fuzzy')
- return e
-
- def save(self, fpath=None, repr_method='__unicode__'):
- """
- Saves the po file to ``fpath``.
- If it is an existing file and no ``fpath`` is provided, then the
- existing file is rewritten with the modified data.
-
- Keyword arguments:
-
- ``fpath``
- string, full or relative path to the file.
-
- ``repr_method``
- string, the method to use for output.
- """
- if self.fpath is None and fpath is None:
- raise IOError('You must provide a file path to save() method')
- contents = getattr(self, repr_method)()
- if fpath is None:
- fpath = self.fpath
- if repr_method == 'to_binary':
- fhandle = open(fpath, 'wb')
- else:
- fhandle = io.open(fpath, 'w', encoding=self.encoding)
- if not isinstance(contents, text_type):
- contents = contents.decode(self.encoding)
- fhandle.write(contents)
- fhandle.close()
- # set the file path if not set
- if self.fpath is None and fpath:
- self.fpath = fpath
-
- def find(self, st, by='msgid', include_obsolete_entries=False,
- msgctxt=False):
- """
- Find the entry which msgid (or property identified by the ``by``
- argument) matches the string ``st``.
-
- Keyword arguments:
-
- ``st``
- string, the string to search for.
-
- ``by``
- string, the property to use for comparison (default: ``msgid``).
-
- ``include_obsolete_entries``
- boolean, whether to also search in entries that are obsolete.
-
- ``msgctxt``
- string, allows specifying a specific message context for the
- search.
- """
- if include_obsolete_entries:
- entries = self[:]
- else:
- entries = [e for e in self if not e.obsolete]
- for e in entries:
- if getattr(e, by) == st:
- if msgctxt is not False and e.msgctxt != msgctxt:
- continue
- return e
- return None
-
- def ordered_metadata(self):
- """
- Convenience method that returns an ordered version of the metadata
- dictionary. The return value is list of tuples (metadata name,
- metadata_value).
- """
- # copy the dict first
- metadata = self.metadata.copy()
- data_order = [
- 'Project-Id-Version',
- 'Report-Msgid-Bugs-To',
- 'POT-Creation-Date',
- 'PO-Revision-Date',
- 'Last-Translator',
- 'Language-Team',
- 'MIME-Version',
- 'Content-Type',
- 'Content-Transfer-Encoding',
- 'Language',
- 'Plural-Forms'
- ]
- ordered_data = []
- for data in data_order:
- try:
- value = metadata.pop(data)
- ordered_data.append((data, value))
- except KeyError:
- pass
- # the rest of the metadata will be alphabetically ordered since there
- # are no specs for this AFAIK
- for data in sorted(metadata.keys()):
- value = metadata[data]
- ordered_data.append((data, value))
- return ordered_data
-
- def to_binary(self):
- """
- Return the binary representation of the file.
- """
- offsets = []
- entries = self.translated_entries()
-
- # the keys are sorted in the .mo file
- def cmp(_self, other):
- # msgfmt compares entries with msgctxt if it exists
- self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
- other_msgid = other.msgctxt and other.msgctxt or other.msgid
- if self_msgid > other_msgid:
- return 1
- elif self_msgid < other_msgid:
- return -1
- else:
- return 0
- # add metadata entry
- entries.sort(key=lambda o: o.msgctxt or o.msgid)
- mentry = self.metadata_as_entry()
- #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
- entries = [mentry] + entries
- entries_len = len(entries)
- ids, strs = b(''), b('')
- for e in entries:
- # For each string, we need size and file offset. Each string is
- # NUL terminated; the NUL does not count into the size.
- msgid = b('')
- if e.msgctxt:
- # Contexts are stored by storing the concatenation of the
- # context, a <EOT> byte, and the original string
- msgid = self._encode(e.msgctxt + '\4')
- if e.msgid_plural:
- msgstr = []
- for index in sorted(e.msgstr_plural.keys()):
- msgstr.append(e.msgstr_plural[index])
- msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
- msgstr = self._encode('\0'.join(msgstr))
- else:
- msgid += self._encode(e.msgid)
- msgstr = self._encode(e.msgstr)
- offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
- ids += msgid + b('\0')
- strs += msgstr + b('\0')
-
- # The header is 7 32-bit unsigned integers.
- keystart = 7 * 4 + 16 * entries_len
- # and the values start after the keys
- valuestart = keystart + len(ids)
- koffsets = []
- voffsets = []
- # The string table first has the list of keys, then the list of values.
- # Each entry has first the size of the string, then the file offset.
- for o1, l1, o2, l2 in offsets:
- koffsets += [l1, o1 + keystart]
- voffsets += [l2, o2 + valuestart]
- offsets = koffsets + voffsets
-
- output = struct.pack(
- "Iiiiiii",
- # Magic number
- MOFile.MAGIC,
- # Version
- 0,
- # number of entries
- entries_len,
- # start of key index
- 7 * 4,
- # start of value index
- 7 * 4 + entries_len * 8,
- # size and offset of hash table, we don't use hash tables
- 0, keystart
-
- )
- if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
- output += array.array("i", offsets).tobytes()
- else:
- output += array.array("i", offsets).tostring()
- output += ids
- output += strs
- return output
-
- def _encode(self, mixed):
- """
- Encodes the given ``mixed`` argument with the file encoding if and
- only if it's an unicode string and returns the encoded string.
- """
- if isinstance(mixed, text_type):
- mixed = mixed.encode(self.encoding)
- return mixed
- # }}}
- # class POFile {{{
-
-
- class POFile(_BaseFile):
- """
- Po (or Pot) file reader/writer.
- This class inherits the :class:`~polib._BaseFile` class and, by extension,
- the python ``list`` type.
- """
-
- def __unicode__(self):
- """
- Returns the unicode representation of the po file.
- """
- ret, headers = '', self.header.split('\n')
- for header in headers:
- if not len(header):
- ret += "#\n"
- elif header[:1] in [',', ':']:
- ret += '#%s\n' % header
- else:
- ret += '# %s\n' % header
-
- if not isinstance(ret, text_type):
- ret = ret.decode(self.encoding)
-
- return ret + _BaseFile.__unicode__(self)
-
- def save_as_mofile(self, fpath):
- """
- Saves the binary representation of the file to given ``fpath``.
-
- Keyword argument:
-
- ``fpath``
- string, full or relative path to the mo file.
- """
- _BaseFile.save(self, fpath, 'to_binary')
-
- def percent_translated(self):
- """
- Convenience method that returns the percentage of translated
- messages.
- """
- total = len([e for e in self if not e.obsolete])
- if total == 0:
- return 100
- translated = len(self.translated_entries())
- return int(translated * 100 / float(total))
-
- def translated_entries(self):
- """
- Convenience method that returns the list of translated entries.
- """
- return [e for e in self if e.translated()]
-
- def untranslated_entries(self):
- """
- Convenience method that returns the list of untranslated entries.
- """
- return [e for e in self if not e.translated() and not e.obsolete
- and not 'fuzzy' in e.flags]
-
- def fuzzy_entries(self):
- """
- Convenience method that returns the list of fuzzy entries.
- """
- return [e for e in self if 'fuzzy' in e.flags]
-
- def obsolete_entries(self):
- """
- Convenience method that returns the list of obsolete entries.
- """
- return [e for e in self if e.obsolete]
-
- def merge(self, refpot):
- """
- Convenience method that merges the current pofile with the pot file
- provided. It behaves exactly as the gettext msgmerge utility:
-
- * comments of this file will be preserved, but extracted comments and
- occurrences will be discarded;
- * any translations or comments in the file will be discarded, however,
- dot comments and file positions will be preserved;
- * the fuzzy flags are preserved.
-
- Keyword argument:
-
- ``refpot``
- object POFile, the reference catalog.
- """
- # Store entries in dict/set for faster access
- self_entries = dict((entry.msgid, entry) for entry in self)
- refpot_msgids = set(entry.msgid for entry in refpot)
- # Merge entries that are in the refpot
- for entry in refpot:
- e = self_entries.get(entry.msgid)
- if e is None:
- e = POEntry()
- self.append(e)
- e.merge(entry)
- # ok, now we must "obsolete" entries that are not in the refpot anymore
- for entry in self:
- if entry.msgid not in refpot_msgids:
- entry.obsolete = True
- # }}}
- # class MOFile {{{
-
-
- class MOFile(_BaseFile):
- """
- Mo file reader/writer.
- This class inherits the :class:`~polib._BaseFile` class and, by
- extension, the python ``list`` type.
- """
- MAGIC = 0x950412de
- MAGIC_SWAPPED = 0xde120495
-
- def __init__(self, *args, **kwargs):
- """
- Constructor, accepts all keywords arguments accepted by
- :class:`~polib._BaseFile` class.
- """
- _BaseFile.__init__(self, *args, **kwargs)
- self.magic_number = None
- self.version = 0
-
- def save_as_pofile(self, fpath):
- """
- Saves the mofile as a pofile to ``fpath``.
-
- Keyword argument:
-
- ``fpath``
- string, full or relative path to the file.
- """
- _BaseFile.save(self, fpath)
-
- def save(self, fpath=None):
- """
- Saves the mofile to ``fpath``.
-
- Keyword argument:
-
- ``fpath``
- string, full or relative path to the file.
- """
- _BaseFile.save(self, fpath, 'to_binary')
-
- def percent_translated(self):
- """
- Convenience method to keep the same interface with POFile instances.
- """
- return 100
-
- def translated_entries(self):
- """
- Convenience method to keep the same interface with POFile instances.
- """
- return self
-
- def untranslated_entries(self):
- """
- Convenience method to keep the same interface with POFile instances.
- """
- return []
-
- def fuzzy_entries(self):
- """
- Convenience method to keep the same interface with POFile instances.
- """
- return []
-
- def obsolete_entries(self):
- """
- Convenience method to keep the same interface with POFile instances.
- """
- return []
- # }}}
- # class _BaseEntry {{{
-
-
- class _BaseEntry(object):
- """
- Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
- This class should **not** be instantiated directly.
- """
-
- def __init__(self, *args, **kwargs):
- """
- Constructor, accepts the following keyword arguments:
-
- ``msgid``
- string, the entry msgid.
-
- ``msgstr``
- string, the entry msgstr.
-
- ``msgid_plural``
- string, the entry msgid_plural.
-
- ``msgstr_plural``
- list, the entry msgstr_plural lines.
-
- ``msgctxt``
- string, the entry context (msgctxt).
-
- ``obsolete``
- bool, whether the entry is "obsolete" or not.
-
- ``encoding``
- string, the encoding to use, defaults to ``default_encoding``
- global variable (optional).
- """
- self.msgid = kwargs.get('msgid', '')
- self.msgstr = kwargs.get('msgstr', '')
- self.msgid_plural = kwargs.get('msgid_plural', '')
- self.msgstr_plural = kwargs.get('msgstr_plural', {})
- self.msgctxt = kwargs.get('msgctxt', None)
- self.obsolete = kwargs.get('obsolete', False)
- self.encoding = kwargs.get('encoding', default_encoding)
-
- def __unicode__(self, wrapwidth=78):
- """
- Returns the unicode representation of the entry.
- """
- if self.obsolete:
- delflag = '#~ '
- else:
- delflag = ''
- ret = []
- # write the msgctxt if any
- if self.msgctxt is not None:
- ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
- wrapwidth)
- # write the msgid
- ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
- # write the msgid_plural if any
- if self.msgid_plural:
- ret += self._str_field("msgid_plural", delflag, "",
- self.msgid_plural, wrapwidth)
- if self.msgstr_plural:
- # write the msgstr_plural if any
- msgstrs = self.msgstr_plural
- keys = list(msgstrs)
- keys.sort()
- for index in keys:
- msgstr = msgstrs[index]
- plural_index = '[%s]' % index
- ret += self._str_field("msgstr", delflag, plural_index, msgstr,
- wrapwidth)
- else:
- # otherwise write the msgstr
- ret += self._str_field("msgstr", delflag, "", self.msgstr,
- wrapwidth)
- ret.append('')
- ret = u('\n').join(ret)
- return ret
-
- if PY3:
- def __str__(self):
- return self.__unicode__()
- else:
- def __str__(self):
- """
- Returns the string representation of the entry.
- """
- return unicode(self).encode(self.encoding)
-
- def __eq__(self, other):
- return str(self) == str(other)
-
- def _str_field(self, fieldname, delflag, plural_index, field,
- wrapwidth=78):
- lines = field.splitlines(True)
- if len(lines) > 1:
- lines = [''] + lines # start with initial empty line
- else:
- escaped_field = escape(field)
- specialchars_count = 0
- for c in ['\\', '\n', '\r', '\t', '"']:
- specialchars_count += field.count(c)
- # comparison must take into account fieldname length + one space
- # + 2 quotes (eg. msgid "<string>")
- flength = len(fieldname) + 3
- if plural_index:
- flength += len(plural_index)
- real_wrapwidth = wrapwidth - flength + specialchars_count
- if wrapwidth > 0 and len(field) > real_wrapwidth:
- # Wrap the line but take field name into account
- lines = [''] + [unescape(item) for item in wrap(
- escaped_field,
- wrapwidth - 2, # 2 for quotes ""
- drop_whitespace=False,
- break_long_words=False
- )]
- else:
- lines = [field]
- if fieldname.startswith('previous_'):
- # quick and dirty trick to get the real field name
- fieldname = fieldname[9:]
-
- ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
- escape(lines.pop(0)))]
- for line in lines:
- ret.append('%s"%s"' % (delflag, escape(line)))
- return ret
- # }}}
- # class POEntry {{{
-
-
- class POEntry(_BaseEntry):
- """
- Represents a po file entry.
- """
-
- def __init__(self, *args, **kwargs):
- """
- Constructor, accepts the following keyword arguments:
-
- ``comment``
- string, the entry comment.
-
- ``tcomment``
- string, the entry translator comment.
-
- ``occurrences``
- list, the entry occurrences.
-
- ``flags``
- list, the entry flags.
-
- ``previous_msgctxt``
- string, the entry previous context.
-
- ``previous_msgid``
- string, the entry previous msgid.
-
- ``previous_msgid_plural``
- string, the entry previous msgid_plural.
-
- ``linenum``
- integer, the line number of the entry
- """
- _BaseEntry.__init__(self, *args, **kwargs)
- self.comment = kwargs.get('comment', '')
- self.tcomment = kwargs.get('tcomment', '')
- self.occurrences = kwargs.get('occurrences', [])
- self.flags = kwargs.get('flags', [])
- self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
- self.previous_msgid = kwargs.get('previous_msgid', None)
- self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
- self.linenum = kwargs.get('linenum', None)
-
- def __unicode__(self, wrapwidth=78):
- """
- Returns the unicode representation of the entry.
- """
- ret = []
- # comments first, if any (with text wrapping as xgettext does)
- if self.obsolete:
- comments = [('tcomment', '# ')]
- else:
- comments = [('comment', '#. '), ('tcomment', '# ')]
- for c in comments:
- val = getattr(self, c[0])
- if val:
- for comment in val.split('\n'):
- if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
- ret += wrap(
- comment,
- wrapwidth,
- initial_indent=c[1],
- subsequent_indent=c[1],
- break_long_words=False
- )
- else:
- ret.append('%s%s' % (c[1], comment))
-
- # occurrences (with text wrapping as xgettext does)
- if not self.obsolete and self.occurrences:
- filelist = []
- for fpath, lineno in self.occurrences:
- if lineno:
- filelist.append('%s:%s' % (fpath, lineno))
- else:
- filelist.append(fpath)
- filestr = ' '.join(filelist)
- if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
- # textwrap split words that contain hyphen, this is not
- # what we want for filenames, so the dirty hack is to
- # temporally replace hyphens with a char that a file cannot
- # contain, like "*"
- ret += [l.replace('*', '-') for l in wrap(
- filestr.replace('-', '*'),
- wrapwidth,
- initial_indent='#: ',
- subsequent_indent='#: ',
- break_long_words=False
- )]
- else:
- ret.append('#: ' + filestr)
-
- # flags (TODO: wrapping ?)
- if self.flags:
- ret.append('#, %s' % ', '.join(self.flags))
-
- # previous context and previous msgid/msgid_plural
- fields = ['previous_msgctxt', 'previous_msgid',
- 'previous_msgid_plural']
- if self.obsolete:
- prefix = "#~| "
- else:
- prefix = "#| "
- for f in fields:
- val = getattr(self, f)
- if val:
- ret += self._str_field(f, prefix, "", val, wrapwidth)
-
- ret.append(_BaseEntry.__unicode__(self, wrapwidth))
- ret = u('\n').join(ret)
- return ret
-
- def __cmp__(self, other):
- """
- Called by comparison operations if rich comparison is not defined.
- """
-
- # First: Obsolete test
- if self.obsolete != other.obsolete:
- if self.obsolete:
- return -1
- else:
- return 1
- # Work on a copy to protect original
- occ1 = sorted(self.occurrences[:])
- occ2 = sorted(other.occurrences[:])
- pos = 0
- for entry1 in occ1:
- try:
- entry2 = occ2[pos]
- except IndexError:
- return 1
- pos = pos + 1
- if entry1[0] != entry2[0]:
- if entry1[0] > entry2[0]:
- return 1
- else:
- return -1
- if entry1[1] != entry2[1]:
- if entry1[1] > entry2[1]:
- return 1
- else:
- return -1
- # Compare msgid_plural if set
- if self.msgid_plural:
- if not other.msgid_plural:
- return 1
- for pos in self.msgid_plural:
- if pos not in other.msgid_plural:
- return 1
- if self.msgid_plural[pos] > other.msgid_plural[pos]:
- return 1
- if self.msgid_plural[pos] < other.msgid_plural[pos]:
- return -1
- # Finally: Compare message ID
- if self.msgid > other.msgid:
- return 1
- elif self.msgid < other.msgid:
- return -1
- return 0
-
- def __gt__(self, other):
- return self.__cmp__(other) > 0
-
- def __lt__(self, other):
- return self.__cmp__(other) < 0
-
- def __ge__(self, other):
- return self.__cmp__(other) >= 0
-
- def __le__(self, other):
- return self.__cmp__(other) <= 0
-
- def __eq__(self, other):
- return self.__cmp__(other) == 0
-
- def __ne__(self, other):
- return self.__cmp__(other) != 0
-
- def translated(self):
- """
- Returns ``True`` if the entry has been translated or ``False``
- otherwise.
- """
- if self.obsolete or 'fuzzy' in self.flags:
- return False
- if self.msgstr != '':
- return True
- if self.msgstr_plural:
- for pos in self.msgstr_plural:
- if self.msgstr_plural[pos] == '':
- return False
- return True
- return False
-
- def merge(self, other):
- """
- Merge the current entry with the given pot entry.
- """
- self.msgid = other.msgid
- self.msgctxt = other.msgctxt
- self.occurrences = other.occurrences
- self.comment = other.comment
- fuzzy = 'fuzzy' in self.flags
- self.flags = other.flags[:] # clone flags
- if fuzzy:
- self.flags.append('fuzzy')
- self.msgid_plural = other.msgid_plural
- self.obsolete = other.obsolete
- self.previous_msgctxt = other.previous_msgctxt
- self.previous_msgid = other.previous_msgid
- self.previous_msgid_plural = other.previous_msgid_plural
- if other.msgstr_plural:
- for pos in other.msgstr_plural:
- try:
- # keep existing translation at pos if any
- self.msgstr_plural[pos]
- except KeyError:
- self.msgstr_plural[pos] = ''
-
- def __hash__(self):
- return hash((self.msgid, self.msgstr))
- # }}}
- # class MOEntry {{{
-
-
- class MOEntry(_BaseEntry):
- """
- Represents a mo file entry.
- """
- def __init__(self, *args, **kwargs):
- """
- Constructor, accepts the following keyword arguments,
- for consistency with :class:`~polib.POEntry`:
-
- ``comment``
- ``tcomment``
- ``occurrences``
- ``flags``
- ``previous_msgctxt``
- ``previous_msgid``
- ``previous_msgid_plural``
-
- Note: even though these keyword arguments are accepted,
- they hold no real meaning in the context of MO files
- and are simply ignored.
- """
- _BaseEntry.__init__(self, *args, **kwargs)
- self.comment = ''
- self.tcomment = ''
- self.occurrences = []
- self.flags = []
- self.previous_msgctxt = None
- self.previous_msgid = None
- self.previous_msgid_plural = None
-
- def __hash__(self):
- return hash((self.msgid, self.msgstr))
-
- # }}}
- # class _POFileParser {{{
-
-
- class _POFileParser(object):
- """
- A finite state machine to parse efficiently and correctly po
- file format.
- """
-
- def __init__(self, pofile, *args, **kwargs):
- """
- Constructor.
-
- Keyword arguments:
-
- ``pofile``
- string, path to the po file or its content
-
- ``encoding``
- string, the encoding to use, defaults to ``default_encoding``
- global variable (optional).
-
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file (optional, default: ``False``).
- """
- enc = kwargs.get('encoding', default_encoding)
- if _is_file(pofile):
- try:
- self.fhandle = io.open(pofile, 'rt', encoding=enc)
- except LookupError:
- enc = default_encoding
- self.fhandle = io.open(pofile, 'rt', encoding=enc)
- else:
- self.fhandle = pofile.splitlines()
-
- klass = kwargs.get('klass')
- if klass is None:
- klass = POFile
- self.instance = klass(
- pofile=pofile,
- encoding=enc,
- check_for_duplicates=kwargs.get('check_for_duplicates', False)
- )
- self.transitions = {}
- self.current_line = 0
- self.current_entry = POEntry(linenum=self.current_line)
- self.current_state = 'st'
- self.current_token = None
- # two memo flags used in handlers
- self.msgstr_index = 0
- self.entry_obsolete = 0
- # Configure the state machine, by adding transitions.
- # Signification of symbols:
- # * ST: Beginning of the file (start)
- # * HE: Header
- # * TC: a translation comment
- # * GC: a generated comment
- # * OC: a file/line occurrence
- # * FL: a flags line
- # * CT: a message context
- # * PC: a previous msgctxt
- # * PM: a previous msgid
- # * PP: a previous msgid_plural
- # * MI: a msgid
- # * MP: a msgid plural
- # * MS: a msgstr
- # * MX: a msgstr plural
- # * MC: a msgid or msgstr continuation line
- all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
- 'ms', 'mp', 'mx', 'mi']
-
- self.add('tc', ['st', 'he'], 'he')
- self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
- 'mp', 'mx', 'mi'], 'tc')
- self.add('gc', all, 'gc')
- self.add('oc', all, 'oc')
- self.add('fl', all, 'fl')
- self.add('pc', all, 'pc')
- self.add('pm', all, 'pm')
- self.add('pp', all, 'pp')
- self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
- 'pp', 'ms', 'mx'], 'ct')
- self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
- 'pm', 'pp', 'ms', 'mx'], 'mi')
- self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
- self.add('ms', ['mi', 'mp', 'tc'], 'ms')
- self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
- self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
-
- def parse(self):
- """
- Run the state machine, parse the file line by line and call process()
- with the current matched symbol.
- """
-
- keywords = {
- 'msgctxt': 'ct',
- 'msgid': 'mi',
- 'msgstr': 'ms',
- 'msgid_plural': 'mp',
- }
- prev_keywords = {
- 'msgid_plural': 'pp',
- 'msgid': 'pm',
- 'msgctxt': 'pc',
- }
- tokens = []
- for line in self.fhandle:
- self.current_line += 1
- line = line.strip()
- if line == '':
- continue
-
- tokens = line.split(None, 2)
- nb_tokens = len(tokens)
-
- if tokens[0] == '#~|':
- continue
-
- if tokens[0] == '#~' and nb_tokens > 1:
- line = line[3:].strip()
- tokens = tokens[1:]
- nb_tokens -= 1
- self.entry_obsolete = 1
- else:
- self.entry_obsolete = 0
-
- # Take care of keywords like
- # msgid, msgid_plural, msgctxt & msgstr.
- if tokens[0] in keywords and nb_tokens > 1:
- line = line[len(tokens[0]):].lstrip()
- if re.search(r'([^\\]|^)"', line[1:-1]):
- raise IOError('Syntax error in po file %s (line %s): '
- 'unescaped double quote found' %
- (self.instance.fpath, self.current_line))
- self.current_token = line
- self.process(keywords[tokens[0]])
- continue
-
- self.current_token = line
-
- if tokens[0] == '#:':
- if nb_tokens <= 1:
- continue
- # we are on a occurrences line
- self.process('oc')
-
- elif line[:1] == '"':
- # we are on a continuation line
- if re.search(r'([^\\]|^)"', line[1:-1]):
- raise IOError('Syntax error in po file %s (line %s): '
- 'unescaped double quote found' %
- (self.instance.fpath, self.current_line))
- self.process('mc')
-
- elif line[:7] == 'msgstr[':
- # we are on a msgstr plural
- self.process('mx')
-
- elif tokens[0] == '#,':
- if nb_tokens <= 1:
- continue
- # we are on a flags line
- self.process('fl')
-
- elif tokens[0] == '#' or tokens[0].startswith('##'):
- if line == '#':
- line += ' '
- # we are on a translator comment line
- self.process('tc')
-
- elif tokens[0] == '#.':
- if nb_tokens <= 1:
- continue
- # we are on a generated comment line
- self.process('gc')
-
- elif tokens[0] == '#|':
- if nb_tokens <= 1:
- raise IOError('Syntax error in po file %s (line %s)' %
- (self.instance.fpath, self.current_line))
-
- # Remove the marker and any whitespace right after that.
- line = line[2:].lstrip()
- self.current_token = line
-
- if tokens[1].startswith('"'):
- # Continuation of previous metadata.
- self.process('mc')
- continue
-
- if nb_tokens == 2:
- # Invalid continuation line.
- raise IOError('Syntax error in po file %s (line %s): '
- 'invalid continuation line' %
- (self.instance.fpath, self.current_line))
-
- # we are on a "previous translation" comment line,
- if tokens[1] not in prev_keywords:
- # Unknown keyword in previous translation comment.
- raise IOError('Syntax error in po file %s (line %s): '
- 'unknown keyword %s' %
- (self.instance.fpath, self.current_line,
- tokens[1]))
-
- # Remove the keyword and any whitespace
- # between it and the starting quote.
- line = line[len(tokens[1]):].lstrip()
- self.current_token = line
- self.process(prev_keywords[tokens[1]])
-
- else:
- raise IOError('Syntax error in po file %s (line %s)' %
- (self.instance.fpath, self.current_line))
-
- if self.current_entry and len(tokens) > 0 and \
- not tokens[0].startswith('#'):
- # since entries are added when another entry is found, we must add
- # the last entry here (only if there are lines). Trailing comments
- # are ignored
- self.instance.append(self.current_entry)
-
- # before returning the instance, check if there's metadata and if
- # so extract it in a dict
- metadataentry = self.instance.find('')
- if metadataentry: # metadata found
- # remove the entry
- self.instance.remove(metadataentry)
- self.instance.metadata_is_fuzzy = metadataentry.flags
- key = None
- for msg in metadataentry.msgstr.splitlines():
- try:
- key, val = msg.split(':', 1)
- self.instance.metadata[key] = val.strip()
- except (ValueError, KeyError):
- if key is not None:
- self.instance.metadata[key] += '\n' + msg.strip()
- # close opened file
- if not isinstance(self.fhandle, list): # must be file
- self.fhandle.close()
- return self.instance
-
- def add(self, symbol, states, next_state):
- """
- Add a transition to the state machine.
-
- Keywords arguments:
-
- ``symbol``
- string, the matched token (two chars symbol).
-
- ``states``
- list, a list of states (two chars symbols).
-
- ``next_state``
- the next state the fsm will have after the action.
- """
- for state in states:
- action = getattr(self, 'handle_%s' % next_state)
- self.transitions[(symbol, state)] = (action, next_state)
-
- def process(self, symbol):
- """
- Process the transition corresponding to the current state and the
- symbol provided.
-
- Keywords arguments:
-
- ``symbol``
- string, the matched token (two chars symbol).
-
- ``linenum``
- integer, the current line number of the parsed file.
- """
- try:
- (action, state) = self.transitions[(symbol, self.current_state)]
- if action():
- self.current_state = state
- except Exception:
- raise IOError('Syntax error in po file (line %s)' %
- self.current_line)
-
- # state handlers
-
- def handle_he(self):
- """Handle a header comment."""
- if self.instance.header != '':
- self.instance.header += '\n'
- self.instance.header += self.current_token[2:]
- return 1
-
- def handle_tc(self):
- """Handle a translator comment."""
- if self.current_state in ['mc', 'ms', 'mx']:
- self.instance.append(self.current_entry)
- self.current_entry = POEntry(linenum=self.current_line)
- if self.current_entry.tcomment != '':
- self.current_entry.tcomment += '\n'
- tcomment = self.current_token.lstrip('#')
- if tcomment.startswith(' '):
- tcomment = tcomment[1:]
- self.current_entry.tcomment += tcomment
- return True
-
- def handle_gc(self):
- """Handle a generated comment."""
- if self.current_state in ['mc', 'ms', 'mx']:
- self.instance.append(self.current_entry)
- self.current_entry = POEntry(linenum=self.current_line)
- if self.current_entry.comment != '':
- self.current_entry.comment += '\n'
- self.current_entry.comment += self.current_token[3:]
- return True
-
- def handle_oc(self):
- """Handle a file:num occurrence."""
- if self.current_state in ['mc', 'ms', 'mx']:
- self.instance.append(self.current_entry)
- self.current_entry = POEntry(linenum=self.current_line)
- occurrences = self.current_token[3:].split()
- for occurrence in occurrences:
- if occurrence != '':
- try:
- fil, line = occurrence.rsplit(':', 1)
- if not line.isdigit():
- fil = fil + line
- line = ''
- self.current_entry.occurrences.append((fil, line))
- except (ValueError, AttributeError):
- self.current_entry.occurrences.append((occurrence, ''))
- return True
-
- def handle_fl(self):
- """Handle a flags line."""
- if self.current_state in ['mc', 'ms', 'mx']:
- self.instance.append(self.current_entry)
- self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.flags += [c.strip() for c in
- self.current_token[3:].split(',')]
- return True
-
- def handle_pp(self):
- """Handle a previous msgid_plural line."""
- if self.current_state in ['mc', 'ms', 'mx']:
- self.instance.append(self.current_entry)
- self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.previous_msgid_plural = \
- unescape(self.current_token[1:-1])
- return True
-
- def handle_pm(self):
- """Handle a previous msgid line."""
- if self.current_state in ['mc', 'ms', 'mx']:
- self.instance.append(self.current_entry)
- self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.previous_msgid = \
- unescape(self.current_token[1:-1])
- return True
-
- def handle_pc(self):
- """Handle a previous msgctxt line."""
- if self.current_state in ['mc', 'ms', 'mx']:
- self.instance.append(self.current_entry)
- self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.previous_msgctxt = \
- unescape(self.current_token[1:-1])
- return True
-
- def handle_ct(self):
- """Handle a msgctxt."""
- if self.current_state in ['mc', 'ms', 'mx']:
- self.instance.append(self.current_entry)
- self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.msgctxt = unescape(self.current_token[1:-1])
- return True
-
- def handle_mi(self):
- """Handle a msgid."""
- if self.current_state in ['mc', 'ms', 'mx']:
- self.instance.append(self.current_entry)
- self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.obsolete = self.entry_obsolete
- self.current_entry.msgid = unescape(self.current_token[1:-1])
- return True
-
- def handle_mp(self):
- """Handle a msgid plural."""
- self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
- return True
-
- def handle_ms(self):
- """Handle a msgstr."""
- self.current_entry.msgstr = unescape(self.current_token[1:-1])
- return True
-
- def handle_mx(self):
- """Handle a msgstr plural."""
- index = self.current_token[7]
- value = self.current_token[self.current_token.find('"') + 1:-1]
- self.current_entry.msgstr_plural[int(index)] = unescape(value)
- self.msgstr_index = int(index)
- return True
-
- def handle_mc(self):
- """Handle a msgid or msgstr continuation line."""
- token = unescape(self.current_token[1:-1])
- if self.current_state == 'ct':
- self.current_entry.msgctxt += token
- elif self.current_state == 'mi':
- self.current_entry.msgid += token
- elif self.current_state == 'mp':
- self.current_entry.msgid_plural += token
- elif self.current_state == 'ms':
- self.current_entry.msgstr += token
- elif self.current_state == 'mx':
- self.current_entry.msgstr_plural[self.msgstr_index] += token
- elif self.current_state == 'pp':
- self.current_entry.previous_msgid_plural += token
- elif self.current_state == 'pm':
- self.current_entry.previous_msgid += token
- elif self.current_state == 'pc':
- self.current_entry.previous_msgctxt += token
- # don't change the current state
- return False
- # }}}
- # class _MOFileParser {{{
-
-
- class _MOFileParser(object):
- """
- A class to parse binary mo files.
- """
-
- def __init__(self, mofile, *args, **kwargs):
- """
- Constructor.
-
- Keyword arguments:
-
- ``mofile``
- string, path to the mo file or its content
-
- ``encoding``
- string, the encoding to use, defaults to ``default_encoding``
- global variable (optional).
-
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file (optional, default: ``False``).
- """
- self.fhandle = open(mofile, 'rb')
-
- klass = kwargs.get('klass')
- if klass is None:
- klass = MOFile
- self.instance = klass(
- fpath=mofile,
- encoding=kwargs.get('encoding', default_encoding),
- check_for_duplicates=kwargs.get('check_for_duplicates', False)
- )
-
- def __del__(self):
- """
- Make sure the file is closed, this prevents warnings on unclosed file
- when running tests with python >= 3.2.
- """
- if self.fhandle:
- self.fhandle.close()
-
- def parse(self):
- """
- Build the instance with the file handle provided in the
- constructor.
- """
- # parse magic number
- magic_number = self._readbinary('<I', 4)
- if magic_number == MOFile.MAGIC:
- ii = '<II'
- elif magic_number == MOFile.MAGIC_SWAPPED:
- ii = '>II'
- else:
- raise IOError('Invalid mo file, magic number is incorrect !')
- self.instance.magic_number = magic_number
- # parse the version number and the number of strings
- version, numofstrings = self._readbinary(ii, 8)
- # from MO file format specs: "A program seeing an unexpected major
- # revision number should stop reading the MO file entirely"
- if version not in (0, 1):
- raise IOError('Invalid mo file, unexpected major revision number')
- self.instance.version = version
- # original strings and translation strings hash table offset
- msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
- # move to msgid hash table and read length and offset of msgids
- self.fhandle.seek(msgids_hash_offset)
- msgids_index = []
- for i in range(numofstrings):
- msgids_index.append(self._readbinary(ii, 8))
- # move to msgstr hash table and read length and offset of msgstrs
- self.fhandle.seek(msgstrs_hash_offset)
- msgstrs_index = []
- for i in range(numofstrings):
- msgstrs_index.append(self._readbinary(ii, 8))
- # build entries
- encoding = self.instance.encoding
- for i in range(numofstrings):
- self.fhandle.seek(msgids_index[i][1])
- msgid = self.fhandle.read(msgids_index[i][0])
-
- self.fhandle.seek(msgstrs_index[i][1])
- msgstr = self.fhandle.read(msgstrs_index[i][0])
- if i == 0 and not msgid: # metadata
- raw_metadata, metadata = msgstr.split(b('\n')), {}
- for line in raw_metadata:
- tokens = line.split(b(':'), 1)
- if tokens[0] != b(''):
- try:
- k = tokens[0].decode(encoding)
- v = tokens[1].decode(encoding)
- metadata[k] = v.strip()
- except IndexError:
- metadata[k] = u('')
- self.instance.metadata = metadata
- continue
- # test if we have a plural entry
- msgid_tokens = msgid.split(b('\0'))
- if len(msgid_tokens) > 1:
- entry = self._build_entry(
- msgid=msgid_tokens[0],
- msgid_plural=msgid_tokens[1],
- msgstr_plural=dict((k, v) for k, v in
- enumerate(msgstr.split(b('\0'))))
- )
- else:
- entry = self._build_entry(msgid=msgid, msgstr=msgstr)
- self.instance.append(entry)
- # close opened file
- self.fhandle.close()
- return self.instance
-
- def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
- msgstr_plural=None):
- msgctxt_msgid = msgid.split(b('\x04'))
- encoding = self.instance.encoding
- if len(msgctxt_msgid) > 1:
- kwargs = {
- 'msgctxt': msgctxt_msgid[0].decode(encoding),
- 'msgid': msgctxt_msgid[1].decode(encoding),
- }
- else:
- kwargs = {'msgid': msgid.decode(encoding)}
- if msgstr:
- kwargs['msgstr'] = msgstr.decode(encoding)
- if msgid_plural:
- kwargs['msgid_plural'] = msgid_plural.decode(encoding)
- if msgstr_plural:
- for k in msgstr_plural:
- msgstr_plural[k] = msgstr_plural[k].decode(encoding)
- kwargs['msgstr_plural'] = msgstr_plural
- return MOEntry(**kwargs)
-
- def _readbinary(self, fmt, numbytes):
- """
- Private method that unpack n bytes of data using format <fmt>.
- It returns a tuple or a mixed value if the tuple length is 1.
- """
- bytes = self.fhandle.read(numbytes)
- tup = struct.unpack(fmt, bytes)
- if len(tup) == 1:
- return tup[0]
- return tup
- # }}}
- # class TextWrapper {{{
-
-
- class TextWrapper(textwrap.TextWrapper):
- """
- Subclass of textwrap.TextWrapper that backport the
- drop_whitespace option.
- """
- def __init__(self, *args, **kwargs):
- drop_whitespace = kwargs.pop('drop_whitespace', True)
- textwrap.TextWrapper.__init__(self, *args, **kwargs)
- self.drop_whitespace = drop_whitespace
-
- def _wrap_chunks(self, chunks):
- """_wrap_chunks(chunks : [string]) -> [string]
-
- Wrap a sequence of text chunks and return a list of lines of
- length 'self.width' or less. (If 'break_long_words' is false,
- some lines may be longer than this.) Chunks correspond roughly
- to words and the whitespace between them: each chunk is
- indivisible (modulo 'break_long_words'), but a line break can
- come between any two chunks. Chunks should not have internal
- whitespace; ie. a chunk is either all whitespace or a "word".
- Whitespace chunks will be removed from the beginning and end of
- lines, but apart from that whitespace is preserved.
- """
- lines = []
- if self.width <= 0:
- raise ValueError("invalid width %r (must be > 0)" % self.width)
-
- # Arrange in reverse order so items can be efficiently popped
- # from a stack of chucks.
- chunks.reverse()
-
- while chunks:
-
- # Start the list of chunks that will make up the current line.
- # cur_len is just the length of all the chunks in cur_line.
- cur_line = []
- cur_len = 0
-
- # Figure out which static string will prefix this line.
- if lines:
- indent = self.subsequent_indent
- else:
- indent = self.initial_indent
-
- # Maximum width for this line.
- width = self.width - len(indent)
-
- # First chunk on line is whitespace -- drop it, unless this
- # is the very beginning of the text (ie. no lines started yet).
- if self.drop_whitespace and chunks[-1].strip() == '' and lines:
- del chunks[-1]
-
- while chunks:
- l = len(chunks[-1])
-
- # Can at least squeeze this chunk onto the current line.
- if cur_len + l <= width:
- cur_line.append(chunks.pop())
- cur_len += l
-
- # Nope, this line is full.
- else:
- break
-
- # The current line is full, and the next chunk is too big to
- # fit on *any* line (not just this one).
- if chunks and len(chunks[-1]) > width:
- self._handle_long_word(chunks, cur_line, cur_len, width)
-
- # If the last chunk on this line is all whitespace, drop it.
- if self.drop_whitespace and cur_line and not cur_line[-1].strip():
- del cur_line[-1]
-
- # Convert current line back to a string and store it in list
- # of all lines (return value).
- if cur_line:
- lines.append(indent + ''.join(cur_line))
-
- return lines
- # }}}
- # function wrap() {{{
-
-
- def wrap(text, width=70, **kwargs):
- """
- Wrap a single paragraph of text, returning a list of wrapped lines.
- """
- if sys.version_info < (2, 6):
- return TextWrapper(width=width, **kwargs).wrap(text)
- return textwrap.wrap(text, width=width, **kwargs)
-
- # }}}
|