#! /usr/bin/python -O # -*- coding: iso-8859-1 -*- # this script iterates through the 'html' and 'lib' directories # looking for php scripts that contain a include_once("xxx_po.inc") # line and __() functions. It creates/appends to the corresponding # "xxx_po.inc" file in the 'lang' subdirectory and places the # i18n strings into the file in the proper format. # # usage: genpopo [-v] [-f] # -v: verbose, print duplicate terms that could be moved to common_po # -f: force, overwrite existing translated files, otherwise append # INC_HEADER = """\ <?php # INSTRUCTIONS TO TRANSLATORS # # This file contains the i18n translations for a subset of the # Arch Linux User-community Repository (AUR). This is a PHP # script, and as such, you MUST pay great attention to the syntax. # If your text contains any double-quotes ("), you MUST escape # them with the backslash character (\). # include_once("translator.inc"); global $_t; """ import sys print_dupes = '-v' in sys.argv force = '-f' in sys.argv import re, os up = re.compile('_\(\s*"(([^"]|(?<=\\\\)["])+)"') lang = { 'common_po.inc': {} } current_dir = os.getcwd() # Find the common_po.inc file. # common = {} for dir in ['../lang', 'lang']: if os.path.exists(dir): os.chdir(dir) if os.path.exists('common_po.list'): f = open('common_po.list','r') lines = f.readlines() f.close() for line in lines: if line[0] != '#': common[line[:-1]] = 0 lang['common_po.inc'][line[:-1]] = 1 os.chdir(current_dir) break os.chdir(current_dir) else: print "Can't find common_po.list file." raise SystemExit # Find the lang directory. # for dir in ['../lang', 'lang']: if os.path.exists(dir): lang_dir = dir break else: print "Can't find the lang directory." raise SystemExit # Iterate through various places where the php files might be. # for dir in ['../html', '../lib', 'html', 'lib']: if os.path.exists(dir): # Find all the PHP files in the current directory. # files = [x for x in os.listdir(dir) if (x[-4:] == '.inc' and x[-7:] != '_po.inc') or x[-6:] == '.class' or x[-4:] == '.php' or x[-6:] == '.phtml' ] os.chdir(dir) for file in files: f = open(file,'r') lines = f.readlines() f.close() # Is this file one we need to parse for internationalized strings? # parse_file = 0 for line in lines: match = re.search("include(_once|)\s*\(\s*[\"']([A-Za-z_]+_po.inc)[\"']\s*\);",line) if match and match.group(2) != "common_po.inc": po = match.group(2) if not lang.has_key(po): lang[po] = {} parse_file = 1 break # If we need to parse the file, do so. # if parse_file: print "Parsing %s..." % file for line in lines: match = up.search(line) while match: term = match.group(1).replace('\\"','"') if common.has_key(term): common[term] += 1 else: if print_dupes: for key in lang.keys(): if key != po and lang[key].has_key(term): print "...Duplicate term: \"%s\" is also in %s." % (term,key) lang[po][term] = 1 line = line[match.end(1):] match = up.search(line) os.chdir(current_dir) # Now generate all the .inc files if they don't already exist. # if they do exist, only append new stuff to the end. If the 'force' # option is passed, just overwrite the entire thing. # os.chdir(lang_dir) if not os.path.exists('en'): os.mkdir('en') if force: # just going to overwrite any existing files # for po in lang.keys(): print "Generating %s..." % po f = open(po,'w') f.write(INC_HEADER) f.write('\ninclude_once(\"en/%s\");\n' % po) f.write('\n?>') f.close() f = open("en/"+po,'w') f.write(INC_HEADER) for term in lang[po].keys(): f.write("\n") f.write('$_t["en"]["%s"] = "%s";\n' % (term, term)) f.write("\n"); f.write("?>"); f.close() else: # need to leave existing file intact, and only append on terms that are new # incre = re.compile('^include_once\("en\/(.*)"\);') mapre = re.compile('^\$_t\["en"\]\["(.*)"\].*$') for po in lang.keys(): got_match = False print "Updating %s..." % po try: f = open(po, 'r') new_file = 0 except: new_file = 1 if not new_file: contents = f.readlines() f.close() # strip off beginning/ending empty lines # while contents[0] == '': del contents[0] while contents[-1] in ['', "\n", "?>", "?>\n", "\n?>"]: del contents[-1] for line in contents: match = incre.search(line) if match: got_match = True if not got_match: f = open(po,'w') f.write("".join(contents)) f.write('\ninclude_once(\"en/%s\");\n' % po) f.write("\n?>"); f.close() else: f = open(po,'w') f.write(INC_HEADER) f.write('\ninclude_once(\"en/%s\");\n' % po) f.write('\n?>') f.close() # first read in file contents so we can hash what already exists # try: f = open('en/'+po, 'r') new_file = 0 except: new_file = 1 existing_terms = [] if not new_file: contents = f.readlines() f.close() # strip off beginning/ending empty lines # while contents[0] == '': del contents[0] while contents[-1] in ['', "\n", "?>", "?>\n", "\n?>"]: del contents[-1] # next, collect existing terms # for line in contents: match = mapre.search(line) if match: existing_terms.append(match.group(1)) # now append any new terms to EOF # f = open('en/'+po, 'w') if not new_file: f.write("".join(contents)) else: f.write(INC_HEADER) for term in lang[po].keys(): if term not in existing_terms: f.write("\n"); f.write('$_t["en"]["%s"] = "%s";\n' % (term, term)) f.write("\n?>"); f.close() # Print out warnings for unused and little-used common entries. # for key in common.keys(): if common[key] == 1: print "Warning: common entry '%s' is only used once." % key for key in common.keys(): if common[key] == 0: print "Warning: unused common entry '%s'." % key # vim: ts=2 sw=2 noet ft=python