#!/usr/bin/python
"""
This gettext translation file parser is written by Ask Hjorth Larsen
and will be released under the GPL in a couple of days or something

Copyright (C) 2007, Ask Hjorth Larsen <asklarsen@gmail.com>
"""

import sys, traceback

class EntrySet:
    def __init__(self, entries):
        self.entries = entries
        self.__stats__ = None

    def get(self, propertyname):
        return [getattr(entry,propertyname) for entry in self.entries]

    def getfuzzy(self):
        return EntrySet([entry for entry in self.entries
                         if entry.isfuzzy])

    def gettranslated(self):
        return EntrySet([entry for entry in self.entries
                         if entry.istranslated])

    def getuntranslated(self):
        return EntrySet([entry for entry in self.entries
                         if not entry.isfuzzy and not entry.istranslated])

    def stats(self):
        if self.__stats__ == None:
            self.__stats__ = Stats(self.entries)
        return self.__stats__


class PoFile(EntrySet):

    def __init__(self, lines):
        entries = parselines(lines)
        EntrySet.__init__(self, entries)
        header = self.entries[0]
        self.headercomments = header.translatorcomments
        props = {}
        self.headerproperties = props

        for line in header.msgstr.split('\\n'):
            kv = line.split(':')
            if len(kv) == 2:
                props[kv[0].strip()] = kv[1].strip()

        self.name = props['Project-Id-Version']
        self.lasttranslator = props['Last-Translator']

class Stats:
    def __init__(self, entries):

        fuzzy = untranslated = total = translated = pluralentries = 0
        msgid_chars = msgstr_chars = 0
        msgid_words = msgstr_words = 0
        
        for entry in entries[1:]:
            total += 1
            if entry.istranslated:
                translated += 1
            elif entry.isfuzzy:
                fuzzy += 1
            else:
                untranslated += 1

            msgid_chars += len(entry.msgid)
            msgid_words += len(entry.msgid.split())
            if entry.hasplurals:
                msgid_chars += len(entry.msgid_plural)
                msgid_words += len(entry.msgid_plural.split())
                msgstr_chars += sum([len(string) for string in entry.msgstrs])
                msgstr_words += sum([len(string.split()) for string
                                     in entry.msgstrs])
                pluralentries += 1
            else:
                msgstr_chars += len(entry.msgstr)
                msgstr_words += len(entry.msgstr.split())

        self.fuzzy = fuzzy
        self.untranslated = untranslated
        self.total = total
        self.pluralentries = pluralentries
        self.translated = translated

        self.msgid_chars = msgid_chars
        self.msgstr_chars = msgstr_chars
        self.msgid_words = msgid_words
        self.msgstr_words = msgstr_words

        self.avg_msgid_chars = msgid_chars / total
        self.avg_msgstr_chars = msgstr_chars / total

    def __str__(self):
        keyvalstrings = [''.join([key, ': ', str(val),'\n'])
                         for key, val in self.__dict__.items()]
        return ''.join(keyvalstrings)
        

class Entry:
    def __init__(self):
        self.translatorcomments = [] # Comments starting with '# '
        self.autocomments = [] # Comments starting with '#. '
        self.referencecomments = [] # Comments starting with '#
        self.flagcomments = []
        self.msgid = None
        self.msgid_plural = None
        self.msgstr = None # This is ONLY the first, if there is more than one
        self.msgstrs = []
        self.hasplurals = False
        self.entryline = None # Line number of first comment
        self.linenumber = None # Line number of msgid
        self.rawlines = [] # A list of the actual lines of this entry
        self.istranslated = False # Translated: not fuzzy, and no empty msgstr
        self.isfuzzy = False # Marked as fuzzy (having possibly empty msgstr)
        
    def load(self, lines, entryline=None):
        self.entryline = entryline
        self.rawlines = list(lines)
        
        # Note: comment order has NOT been verified.
        comments = [line for line in lines if line.startswith('#')]
        commentcount = len(comments)

        if commentcount == len(lines):
            return False

        (self.translatorcomments, self.autocomments, self.referencecomments,
         self.flagcomments) = sortcomments(comments)

        self.isfuzzy = False
        for comment in self.flagcomments:
            if comment.rfind('fuzzy') > 0:
                # There might be trouble with strings that are not translated,
                # but marked as fuzzy nonetheless.
                self.isfuzzy = True

        # Store the actual line number of the msgid
        self.linenumber = self.entryline + commentcount

        # Next thing should be the msgid
        self.msgid, index = extract_string('msgid ', lines, commentcount)

        # Check for plural entries
        self.hasplurals = lines[index].startswith('msgid_plural ')
        if self.hasplurals:
            self.msgid_plural, index = extract_string('msgid_plural ',
                                                      lines, index)

            plurcount = 0
            while index < len(lines) and lines[index].startswith('msgstr['):
                string, index = extract_string('msgstr['+str(plurcount)+'] ',
                                               lines, index)
                plurcount += 1
                self.msgstrs.append(string)

            self.msgstr = self.msgstrs[0]

        else:
            self.msgstr, index = extract_string('msgstr ', lines, index)
            self.msgstrs = [self.msgstr]


        self.istranslated = (not self.isfuzzy) and \
                            (self.msgstrs.count('') == 0)

        return True

    def getcomments(self):
        return ''.join([line for line in self.rawlines
                        if line.startswith('#')])

def readentry(lines, entryline):
    entry = Entry()
    successful = entry.load(lines, entryline)
    if successful:
        return entry
    else:
        return None
    
def extract_string(pattern, lines, index=0):
    # Rearrange indices
    lines = lines[index:]

    if not lines[0].startswith(pattern):
        raise Exception('Pattern "'+pattern+'" not found at start of string "'
                        + lines[0] + '".')


    lines[0] = lines[0][len(pattern):] # Strip pattern
    msglines = []
    for line in lines:
        if line.startswith('"'):
            msglines.append(line[1:-2]) # Strip quotation marks and newline
        else:
            break

    return ''.join(msglines), index + len(msglines)

def sortcomments(comments):
    transl = []
    auto = []
    ref = []
    flag = []
    for comment in comments:
        if comment.startswith('#. '):
            transl.append(comment)
        elif comment.startswith('#: '):
            ref.append(comment)
        elif comment.startswith('#, '):
            flag.append(comment)
        elif comment.startswith('#~ '):
            raise Exception('Antiquated comment '+comment)
        else:
            transl.append(comment)
    # Note: comment order has NOT been verified.
    return transl, auto, ref, flag

def grab_sub_string(string, pattern, terminator=None, start=0):
    startindex = string.index(pattern) + len(pattern)
    if terminator is None:
        terminator = pattern
    endindex = string.index(terminator, startindex)
    
    return (string[startindex:endindex], startindex, endindex)

def loadfile(name):
    input = open(name)
    pofile = PoFile()
    pofile.load(input)
    return pofile

def parselines(lines):
    # The plan is to find the empty lines, then make one entry
    # for each chunk between two empty lines.
    #
    # Note: possible trouble with multiple adjacent empty lines
    whitespacelines = [lnum for lnum, line in enumerate(lines)
                       if line == '\n']
    
    previous = 0
    entrychunks = []
    for next in whitespacelines:
        entrychunks.append(lines[previous+1:next])
        previous = next
        
    entries = []
    
    # Note: prepend [0] as a white-space line, since this would
    # logically be  white space by continuation (sorry)
    for whitelinenum, chunk in zip([0]+whitespacelines, entrychunks):
        linecount = whitelinenum + 1
        try:
            entry = readentry(chunk, linecount)
            if entry is not None:
                entries.append(entry)
        except:
            traceback.print_exc()
            sys.exit()

    return entries

def main():
    lines = open('seahorse.gnome-2-18.da.po').readlines()

    f = PoFile(lines)

    #for msgid, msgstr in zip(f.get('msgid'), f.get('msgstr')):
    #    print msgid
    #    print msgstr
    #    print

    #print f.getfuzzy().entries[0].msgid

    #print f.getuntranslated().entries

    #print f[4:6]

    #print f.stats
    #print dir(f)
    #print len(f)

    return f
    
if __name__ == '__main__':
    main()