User:PhiLiP/字词转换/转换代码
< User:PhiLiP | 字词转换
import re
variants = ['zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw']
fallback = {'zh-hans': ('zh-cn','zh-sg','zh-my'),
'zh-cn': ('zh-sg','zh-my'),
'zh-sg': ('zh-cn','zh-my'),
'zh-my': ('zh-sg','zh-cn'),
'zh-hant': ('zh-tw','zh-hk','zh-mo'),
'zh-tw': ('zh-hk','zh-mo'),
'zh-hk': ('zh-mo','zh-tw'),
'zh-mo': ('zh-hk','zh-tw')
}
varsep_pattern = ';\s*(?='
for variant in variants:
varsep_pattern += '%s\s*:|' % variant # zh-hans:xxx;zh-hant:yyy
varsep_pattern += '[^;]*?=>\s*%s\s*:|' % variant # xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
varsep_pattern += '\s*$)'
varsep = re.compile(varsep_pattern)
fi = open('test.txt', 'r')
data = fi.read()
fi.close()
allrules = re.findall('-\{\s*[AH]\s*\|\s*([\s\S]+?)\s*\}-',data)
convertrules = {'zh-hans': {},
'zh-hant': {},
'zh-cn': {},
'zh-hk': {},
'zh-sg': {},
'zh-mo': {},
'zh-my': {},
'zh-tw': {}}
for rule in allrules:
bidtable = {}
unidtable = {}
choices = varsep.split(rule)
for choice in choices:
variant = choice.split(':', 1)
if len(variant) != 2:
continue # syntax error, skip
to = variant[1].strip()
variant = variant[0].strip()
unid = variant.split('=>', 1)
# if to is empty, strtr() could return a wrong result
if len(unid) == 1 and to and variant in variants:
bidtable[variant] = to
elif len(unid) == 2:
frm = unid[0].strip()
variant = unid[1].strip()
if not unidtable.get(variant):
unidtable[variant] = {}
if to and variant in variants:
unidtable[variant][frm] = to
bidfroms = bidtable.values()
for (variant, to) in bidtable.items():
for frm in bidfroms:
if frm == to:
continue
else:
convertrules[variant][frm] = to
fbs = fallback[variant]
for fbv in fbs:
if not convertrules[fbv].get(frm):
convertrules[fbv][frm] = to
for variant in unidtable.keys():
convertrules[variant].update(unidtable[variant])
fo = open('output.txt', 'w')
data = '-{H|\n'
for variant in variants:
froms = list(convertrules[variant].keys())
froms.sort()
for frm in froms:
data += ' %s=>%s:%s;\n' % (frm, variant, convertrules[variant][frm])
data += '}-'
fo.write(data)
fo.close()