#!/usr/local/bin/python ## My comments are marked with double "numbersign" import re import sys from optparse import OptionParser def isMsgId(s): # true if the string is MsgId - starts with msgid """true if the string is msgid """ result = 0 result = re.match("^msgid\s+\".+|^msgid_plural\s+\".+", s) if result != None: result = 1 else: result = 0 return result def similar2MsgId(s): # true if the string is MsgId - not case sensitive """true if the string is msgid - not case sensitive""" result = 1 result = re.match("^\s*[mM][sS][gG][iI][dD].*", s) if result != None: result = 1 else: result = 0 return result def isMsgStr(s): # true if the string is MsgStr - starts with msgstr """true if the string is msgstr """ result = 1 result = re.match("^msgstr\s+\".+|^msgstr\[\d\]\s+\".+", s) if result != None: result = 1 else: result = 0 return result def similar2MsgStr(s): # true if the string is MsgStr - not case sensitive """true if the string is msgstr - not case sensitive""" result = 1 result = re.match("^\s*[mM][sS][gG][sS][tT][rR].*", s) if result != None: result = 1 else: result = 0 return result def isNewLine(s): # true if the string is just new line """true if the string contains just whitespace characters """ result=True result = re.match("^[\s]*\n", s) if result != None: #print "'" + result.group(0) + "'" result = True else: result = False return result def isCommentSep(s): # true if the string is comment (string starts with #) """True is the string is only comment""" result=True result = re.match("^#$", s) if result != None: #print "'" + result.group(0) + "'" result = True else: result = False return result def isComment(s): # true if the string is comment (string starts with #) """True is the string is comment""" result=True result = re.match("^\s*#", s) if result != None: #print "'" + result.group(0) + "'" result = True else: result = False return result def isUndefined(s): # true if the string is comment (string starts with #) """True is the undefined line element""" result = 1 result = re.match("^\s*\"|^#|^msgid|^msgstr|^\s*$", s) if result != None: #print "'" + result.group(0) + "'" result = 0 else: result = 1 return result def isString(s): # true if the the line starts with double quotes '''This is the quoted string''' result = 1 # Matches line with fist non white character is double quote result = re.match("^\s*\"", s) if result != None: #print "'" + result.group(0) + "'" result = 1 else: result = 0 return result def getString(s): # Extract the quoted string '''This extracts the quoted string in the line''' result = 0 result = re.match(".*\s+(\".*\")$|^[ ^J]*(\".*\")$", s) text = result.group(1) return text ## Commandline options parcing parser = OptionParser() parser.add_option("-f", "--file", dest="filename", help="target po file to check", metavar="FILE") parser.add_option("-o", "--output", dest="output", help="output file with corrupted etries") parser.add_option("-s", "--strict", dest="strict", action="store_true", default=False, help="make checking Uniforum .po file format checking") parser.add_option("-d", "--duplicate", dest="duplicate", action="store_true", default=False, help="check the file for duplicates") parser.add_option("-i", "--interactive", dest="imsglist", action="store_true", default=False, help="run interactive message listing") parser.add_option("-D", "--debug", dest="debug", action="store_true", default=True, help="debug messages precission level") (options, args) = parser.parse_args() ## Get the options parameters ## Open specified file if (options.filename != None): try: f=open(options.filename, 'r') print "\nUsing file " + options.filename except IOError: print "\nI/O Error: Correct file name?\n" parser.print_help() sys.exit(); else: print "\nError: You did not specify file name\n" parser.print_help() sys.exit() ########################################### #### Parcing .po file in "entries" ########################################### file_name=options.filename newline = msgstr = msgid = comment = similarmsgid = similarmsgstr = False old_newline = old_msgstr = old_msgid = old_comment = False line_counter = count_msgstr = 0 msgid_list=[] finish_entry = False # Checker for entry entry = [] # one entry: comentaries, msgid and msgstr last_entry = [] # last found entry - the one in proccess entries = [] # list of entries for line in f: # 'f' is a input file line_counter = line_counter + 1 # Pointer to current line sim_msgstr = similar2MsgStr(line) # The first string on page resembles msgstr str_line = isString(line) # Checking if the line contains only quoted string ## Add the msgstr continuation to the last entry if (finish_entry == True) and (str_line == True): last_entry = last_entry[:] + [line] else: # for lentry_line in last_entry: # DEBUG: print the current entry # print lentry_line, # if (finish_entry == True): # The entry is complete add it to list entries[len(entries):] = [last_entry] last_entry = [] finish_entry = False # Checker for msgstr continuation entry = entry[:] + [line] # Add the line in current entry ## Prelimitary mark the entries using the ## msgstr lines (or similar lines - to make it robust) if (sim_msgstr == True): finish_entry = True # Checker to search for multiline msgstr count_msgstr += 1 # for entry_line in entry: # DEBUG: print the current entry # print entry_line, # last_entry = entry[:] # The last entry entry[:] = [] # Clear the current entry # resp = raw_input("New entry, Press a key to continue") f.close() ########################################### #### Check all enteries ########################################### # The checking is performed on each entry. # this means that it is dependat on succesful # parcing of .po file. last_error = 0 indic = 0 msgids = [] msgstrs = [] error_list = [] error_count = {} error_count["General"] = 0 error_count["NotGNU"] = 0 error_count["NotUniforum"] = 0 error_count["UnDef"] = 0 error_count["IdStrEr"] = 0 error_count["MisID"] = 0 error_count["MisStr"] = 0 error_count["Enteries"] = 0 redirected = False if (options.output != None): try: print "\nUsing file " + options.output + " to output report...\n\n" file_output=open(options.output, 'w') except IOError: print "I/O Error: Correct file name?" sys.exit(); saveout = sys.stdout sys.stdout = file_output redirected = True print "\n\tCHECK REPORT\n" for entry in entries: error_mark = {} ## Check 1. Pootle/Gnu compatibility newline = 0 newline = isNewLine(entries[indic][0]) commentSep = isCommentSep(entries[indic][0]) if (newline != True): # How to make it faster? if ( indic != 0): error_count["General"] = error_count["General"] + 1 error_count["NotGNU"] = error_count["NotGNU"] + 1 # error_mark["NotGNU"] = True error_mark["NotGNU"] = "Not GNU compatible: missing freeline at the beggining of entry" elif (commentSep != True) and (options.strict == True) : if ( indic != 0): error_count["General"] = error_count["General"] + 1 error_count["NotUniforum"] = error_count["NotUniforum"] + 1 # error_mark["NotUniforum"] = True error_mark["NotUniforum"] = "Not Uniforum compatible: does not starts with \"#\" line" ## Cycle through all lines in entry and gather ## informations about line valitidy msgid = msgstr = similarmsgid = similarmsgstr = 0 newline = string = undefined = comment = 0 cs_msgstr = "" # Complete multiline msgstr cs_msgid = "" # Complete multiline msgid for line in entries[indic][:]: comment = comment + isComment(line) undefined = undefined + isUndefined(line) newline = newline + isNewLine(line) string = string + isString(line) similarmsgid = similarmsgid + similar2MsgId(line) similarmsgstr = similarmsgstr + similar2MsgStr(line) if (isMsgId(line) == 1): msgid = 1 cs_msgid = [getString(line)] # print cs_msgid elif (isMsgStr(line) == 1): msgstr = 1 cs_msgstr = [getString(line)] # print cs_msgstr ## Check 5. Not a string between msgid and msgstr elif (msgid == 1) and (similarmsgstr != 1): if (isString(line) == 1): cs_msgid = cs_msgid + [getString(line)] else: error_count["General"] = error_count["General"] + 1 error_count["IdStrEr"] = error_count["IdStrEr"] + 1 # error_mark["IdStrEr"] = True error_mark["IdStrEr"] = "Between msgid and msgstr is non string line" elif (msgstr == 1) and (isString(line) == 1): cs_msgstr = cs_msgstr + [getString(line)] msgids[len(msgids):] = [cs_msgid] msgstrs[len(msgstrs):] = [cs_msgstr] cs_msgstr = [] cs_msgid = [] ## Check 2. MsgId check if (msgid != 1): error_count["General"] = error_count["General"] + 1 error_count["MisID"] = error_count["MisID"] + 1 # error_mark["MisID"] = True error_mark["MisID"] = "Missing msgid or it is corrupted" msgids[-1] = ["###CORRUPTED###"] ## Check 3. MsgStr check if (msgstr != 1): error_count["General"] = error_count["General"] + 1 error_count["MisStr"] = error_count["MisStr"] + 1 # error_mark["MisStr"] = True error_mark["MisStr"] = "Missing msgstr or it is corrupted" msgstrs[-1] = ["###UNTRANSLATED###"] ## Check 4. Undefined lines if (undefined != 0): error_count["General"] = error_count["General"] + 1 error_count["UnDef"] = error_count["UnDef"] + 1 # error_mark["UnDef"] = True error_mark["UnDef"] = "Contains undefined lines" ## List the errors for the last message if (last_error != error_count["General"]): error_count["Enteries"] = error_count["Enteries"] + 1 for key in error_mark.keys(): print error_mark[key] print error_count["General"]-last_error, "error found in the message number", str(indic) + "." print "------------------------------" for lentry_line in entries[indic][:]: # DEBUG: print the current entry print lentry_line, print "------------------------------" last_error = error_count["General"] error_list[len(error_list):] = error_mark indic = indic + 1 # Finish of entries checking if (redirected == True): # redirect the output back sys.stdout = saveout file_output.close() ########################################### #### Check for duplicates ########################################### # Should be rewriten as a function independent # on input list dup_list=[] if (options.duplicate == True): dindex = indic = 0 #!!! list = list2 is not equivalent to list = list2[:] list = [] list.extend(msgids) for mpid in msgids: while (list[indic] != "### ###") and (list.count(msgids[indic]) >= 2): list[indic] = "### ###" dindex = list.index(msgids[indic]) list[dindex] = "### ###" dup_list.append("Corresponding ids "+str(indic)+":"+str(dindex)) while list.count(msgids[indic]) >= 1: dindex = list.index(msgids[indic]) list[dindex]="### ###" dup_list.append("Corresponding ids "+str(indic)+":"+str(dindex)) # resp = raw_input("New entry, Press a key to continue") indic += 1 if (len(dup_list) > 20): print "\n There is too many duplicates, write it to file duplicates.txt?" resp = raw_input("Type Yes to confirm it:") if resp in ["yes","Yes","Y","y"]: try: dup_file=open("duplicates.txt", 'w') print "\nPrinting to file duplicates.txt\n" except IOError: print "\nI/O Error: Cannot open file for writing?\n" sys.exit(); for line in dup_list: dup_file.write(line+"\n") dup_file.close() else: print "\nPrinting to terminal:" for line in dup_list: print "\t", line print "\n" ########################################### #### Listing 'msgids' and 'msgstrs' ########################################### if (options.imsglist == True): resp = 0 while (resp != "exit"): resp = raw_input("Set the number of entery for MSG listing: ") result = re.match("^(\d*):*(\d*)$", resp) if (result != None) and (result.group(2) != ''): indic = int(result.group(1)) indic2 = int(result.group(2)) if msgids[indic]==msgids[indic2]: print "Strings are the same" for msg_line in msgids[indic]: # DEBUG: print the current entry print msg_line for msg_line in msgids[indic2]: # DEBUG: print the current entry print msg_line else: print "Different" for msg_line in msgids[indic]: # DEBUG: print the current entry print msg_line for msg_line in msgids[indic2]: # DEBUG: print the current entry print msg_line elif (result != None) and (result.group(1) != None): indic = int(result.group(1)) print indic print "------------------------------" print "ID:" for msg_line in msgids[indic]: # DEBUG: print the current entry print msg_line print "STR:" for str_line in msgstrs[indic]: # DEBUG: print the current entry print str_line print "------------------------------" # print entries[indic], elif (re.match(".*[Ee]xit.*$", resp) == None ): print "Please type a number!" else: print "Exiting from enteries listing" ########################################### #### Entery listing ########################################### if (options.imsglist == True): resp = 0; while (resp != "exit"): resp = raw_input("Set the number of entery to be listed: ") result = re.match("^\d*$", resp) if result != None: indic = int(resp) print "------------------------------" for lentry_line in entries[indic][:]: # DEBUG: print the current entry print lentry_line, print "------------------------------" elif (re.match(".*[Ee]xit.*$", resp) == None ): print "Please type a number!" else: print "Exiting from enteries listing" ########################################### #### Results summary ########################################### # Entry list print "The PO file contains", len(entries) ,"translation entries." print "The PO file contains", count_msgstr ,"msgstr lines.\n" # Error list print "The PO file contains", error_count["General"] ,"errors in", error_count["Enteries"], "enteries:" print "\t", error_count["NotGNU"] ,"errors, caused by:\n\t Missing empty line at the beggining." print "\t", error_count["NotUniforum"] ,"errors, caused by:\n\t Missing numbersign(#) separator at the first line." print "\t", error_count["UnDef"] ,"errors, caused by:\n\t Invalid strings in the file." print "\t", error_count["IdStrEr"] ,"errors, caused by:\n\t Comentaries or empty lines between msgid and msgstr." print "\t", error_count["MisID"] ,"errors, caused by:\n\t Missing msgids, or additional spurious msgstr lines." print "\t", error_count["MisStr"] ,"errors, caused by:\n\t Corrupted msgstr enteries." print "By correcting one line you can get rid of several errors." # Duplicates listing print "There was found",len(dup_list),"duplicate msgid strings.\n"