跳转到内容

User:PhiLiP/字词转换/转换代码

维基百科,自由的百科全书
import re

variants = ['zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw']
fallback = {'zh-hans': ('zh-cn','zh-sg','zh-my'),
            'zh-cn': ('zh-sg','zh-my'),
			'zh-sg': ('zh-cn','zh-my'),
			'zh-my': ('zh-sg','zh-cn'),
			'zh-hant': ('zh-tw','zh-hk','zh-mo'),
            'zh-tw': ('zh-hk','zh-mo'),
			'zh-hk': ('zh-mo','zh-tw'),
			'zh-mo': ('zh-hk','zh-tw')
            }

varsep_pattern = ';\s*(?='
for variant in variants:
    varsep_pattern += '%s\s*:|' % variant # zh-hans:xxx;zh-hant:yyy
    varsep_pattern += '[^;]*?=>\s*%s\s*:|' % variant # xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
varsep_pattern += '\s*$)'

varsep = re.compile(varsep_pattern)

fi = open('test.txt', 'r')
data = fi.read()
fi.close()
allrules = re.findall('-\{\s*[AH]\s*\|\s*([\s\S]+?)\s*\}-',data)

convertrules = {'zh-hans': {},
                'zh-hant': {},
                'zh-cn':   {},
                'zh-hk':   {},
                'zh-sg':   {},
                'zh-mo':   {},
                'zh-my':   {},
                'zh-tw':   {}}

for rule in allrules:
    bidtable = {}
    unidtable = {}
    choices = varsep.split(rule)
    for choice in choices:
        variant = choice.split(':', 1)
        if len(variant) != 2:
            continue # syntax error, skip
        to = variant[1].strip()
        variant = variant[0].strip()
        unid = variant.split('=>', 1)
        # if to is empty, strtr() could return a wrong result
        if len(unid) == 1 and to and variant in variants:
            bidtable[variant] = to
        elif len(unid) == 2:
            frm = unid[0].strip()
            variant = unid[1].strip()
            if not unidtable.get(variant):
                unidtable[variant] = {}
            if to and variant in variants:
                unidtable[variant][frm] = to

    bidfroms = bidtable.values()
    for (variant, to) in bidtable.items():
        for frm in bidfroms:
            if frm == to:
                continue
            else:
                convertrules[variant][frm] = to
                fbs = fallback[variant]
                for fbv in fbs:
                    if not convertrules[fbv].get(frm):
                        convertrules[fbv][frm] = to

    for variant in unidtable.keys():
        convertrules[variant].update(unidtable[variant])

fo = open('output.txt', 'w')
data = '-{H|\n'
for variant in variants:
    froms = list(convertrules[variant].keys())
    froms.sort()
    for frm in froms:
        data += '  %s=>%s:%s;\n' % (frm, variant, convertrules[variant][frm])
data += '}-'
fo.write(data)
fo.close()