#!/usr/bin/python """ This gettext translation file parser is written by Ask Hjorth Larsen and will be released under the GPL in a couple of days or something Copyright (C) 2007, Ask Hjorth Larsen """ import sys, traceback class EntrySet: def __init__(self, entries): self.entries = entries self.__stats__ = None def get(self, propertyname): return [getattr(entry,propertyname) for entry in self.entries] def getfuzzy(self): return EntrySet([entry for entry in self.entries if entry.isfuzzy]) def gettranslated(self): return EntrySet([entry for entry in self.entries if entry.istranslated]) def getuntranslated(self): return EntrySet([entry for entry in self.entries if not entry.isfuzzy and not entry.istranslated]) def stats(self): if self.__stats__ == None: self.__stats__ = Stats(self.entries) return self.__stats__ class PoFile(EntrySet): def __init__(self, lines): entries = parselines(lines) EntrySet.__init__(self, entries) header = self.entries[0] self.headercomments = header.translatorcomments props = {} self.headerproperties = props for line in header.msgstr.split('\\n'): kv = line.split(':') if len(kv) == 2: props[kv[0].strip()] = kv[1].strip() self.name = props['Project-Id-Version'] self.lasttranslator = props['Last-Translator'] class Stats: def __init__(self, entries): fuzzy = untranslated = total = translated = pluralentries = 0 msgid_chars = msgstr_chars = 0 msgid_words = msgstr_words = 0 for entry in entries[1:]: total += 1 if entry.istranslated: translated += 1 elif entry.isfuzzy: fuzzy += 1 else: untranslated += 1 msgid_chars += len(entry.msgid) msgid_words += len(entry.msgid.split()) if entry.hasplurals: msgid_chars += len(entry.msgid_plural) msgid_words += len(entry.msgid_plural.split()) msgstr_chars += sum([len(string) for string in entry.msgstrs]) msgstr_words += sum([len(string.split()) for string in entry.msgstrs]) pluralentries += 1 else: msgstr_chars += len(entry.msgstr) msgstr_words += len(entry.msgstr.split()) self.fuzzy = fuzzy self.untranslated = untranslated self.total = total self.pluralentries = pluralentries self.translated = translated self.msgid_chars = msgid_chars self.msgstr_chars = msgstr_chars self.msgid_words = msgid_words self.msgstr_words = msgstr_words self.avg_msgid_chars = msgid_chars / total self.avg_msgstr_chars = msgstr_chars / total def __str__(self): keyvalstrings = [''.join([key, ': ', str(val),'\n']) for key, val in self.__dict__.items()] return ''.join(keyvalstrings) class Entry: def __init__(self): self.translatorcomments = [] # Comments starting with '# ' self.autocomments = [] # Comments starting with '#. ' self.referencecomments = [] # Comments starting with '# self.flagcomments = [] self.msgid = None self.msgid_plural = None self.msgstr = None # This is ONLY the first, if there is more than one self.msgstrs = [] self.hasplurals = False self.entryline = None # Line number of first comment self.linenumber = None # Line number of msgid self.rawlines = [] # A list of the actual lines of this entry self.istranslated = False # Translated: not fuzzy, and no empty msgstr self.isfuzzy = False # Marked as fuzzy (having possibly empty msgstr) def load(self, lines, entryline=None): self.entryline = entryline self.rawlines = list(lines) # Note: comment order has NOT been verified. comments = [line for line in lines if line.startswith('#')] commentcount = len(comments) if commentcount == len(lines): return False (self.translatorcomments, self.autocomments, self.referencecomments, self.flagcomments) = sortcomments(comments) self.isfuzzy = False for comment in self.flagcomments: if comment.rfind('fuzzy') > 0: # There might be trouble with strings that are not translated, # but marked as fuzzy nonetheless. self.isfuzzy = True # Store the actual line number of the msgid self.linenumber = self.entryline + commentcount # Next thing should be the msgid self.msgid, index = extract_string('msgid ', lines, commentcount) # Check for plural entries self.hasplurals = lines[index].startswith('msgid_plural ') if self.hasplurals: self.msgid_plural, index = extract_string('msgid_plural ', lines, index) plurcount = 0 while index < len(lines) and lines[index].startswith('msgstr['): string, index = extract_string('msgstr['+str(plurcount)+'] ', lines, index) plurcount += 1 self.msgstrs.append(string) self.msgstr = self.msgstrs[0] else: self.msgstr, index = extract_string('msgstr ', lines, index) self.msgstrs = [self.msgstr] self.istranslated = (not self.isfuzzy) and \ (self.msgstrs.count('') == 0) return True def getcomments(self): return ''.join([line for line in self.rawlines if line.startswith('#')]) def readentry(lines, entryline): entry = Entry() successful = entry.load(lines, entryline) if successful: return entry else: return None def extract_string(pattern, lines, index=0): # Rearrange indices lines = lines[index:] if not lines[0].startswith(pattern): raise Exception('Pattern "'+pattern+'" not found at start of string "' + lines[0] + '".') lines[0] = lines[0][len(pattern):] # Strip pattern msglines = [] for line in lines: if line.startswith('"'): msglines.append(line[1:-2]) # Strip quotation marks and newline else: break return ''.join(msglines), index + len(msglines) def sortcomments(comments): transl = [] auto = [] ref = [] flag = [] for comment in comments: if comment.startswith('#. '): transl.append(comment) elif comment.startswith('#: '): ref.append(comment) elif comment.startswith('#, '): flag.append(comment) elif comment.startswith('#~ '): raise Exception('Antiquated comment '+comment) else: transl.append(comment) # Note: comment order has NOT been verified. return transl, auto, ref, flag def grab_sub_string(string, pattern, terminator=None, start=0): startindex = string.index(pattern) + len(pattern) if terminator is None: terminator = pattern endindex = string.index(terminator, startindex) return (string[startindex:endindex], startindex, endindex) def loadfile(name): input = open(name) pofile = PoFile() pofile.load(input) return pofile def parselines(lines): # The plan is to find the empty lines, then make one entry # for each chunk between two empty lines. # # Note: possible trouble with multiple adjacent empty lines whitespacelines = [lnum for lnum, line in enumerate(lines) if line == '\n'] previous = 0 entrychunks = [] for next in whitespacelines: entrychunks.append(lines[previous+1:next]) previous = next entries = [] # Note: prepend [0] as a white-space line, since this would # logically be white space by continuation (sorry) for whitelinenum, chunk in zip([0]+whitespacelines, entrychunks): linecount = whitelinenum + 1 try: entry = readentry(chunk, linecount) if entry is not None: entries.append(entry) except: traceback.print_exc() sys.exit() return entries def main(): lines = open('seahorse.gnome-2-18.da.po').readlines() f = PoFile(lines) #for msgid, msgstr in zip(f.get('msgid'), f.get('msgstr')): # print msgid # print msgstr # print #print f.getfuzzy().entries[0].msgid #print f.getuntranslated().entries #print f[4:6] #print f.stats #print dir(f) #print len(f) return f if __name__ == '__main__': main()