User:Abacn/detectinvisible.py

维基百科,自由的百科全书
pua_ranges = ( (0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD) )
def is_pua_codepoint(c):
    return any(a <= c <= b for (a,b) in pua_ranges)
    
fin = open("in.txt", 'r')
fout = open("out.txt", 'w')
lines = fin.read()
for i in lines.decode('utf-8'):
    try: 
        if is_pua_codepoint(ord(i)):
            fout.write('<!--Private Area Character-->')
            print "HeiHei"
        else:
            fout.write(i.encode('utf-8'))
    except UnicodeEncodeError:
        fout.write('<!--Invisible Character-->')
        print "Haha"