处理各种编码的字符串,处理编码字符串,# file: Unic


# file: Unicode2.py# -*- coding: utf-8 -*-import chilkat # The CkString object can handle any character encoding.s1 = chilkat.CkString()# The appendEnc method allows us to append a string in any encoding.s1.appendEnc(\'èéêëabc\',\'utf-8\')# If you\'re working with different encodings, you may wish# to name your string variables to reflect the encoding.strAnsi = s1.getAnsi()strUtf8 = s1.getUtf8()# Prints \"7\"print len(strAnsi)# Prints \"11\"print len(strUtf8)# getNumChars returns the number of charactersprint \'Num Chars: \' + str(s1.getNumChars())# utf-8 chars do not have a constant number of bytes/char.# A single utf-8 char is represented in 1 to 6 bytes.print \'utf-8: \' + str(s1.getSizeUtf8())# ANSI is typically 1 byte per/char, but for some languages# such as Japanese, ANSI equates to a character encoding that may# not be 1 byte/char.  (Shift_JIS is the ANSI encoding for Japanese)print \'ANSI: \' + str(s1.getSizeAnsi())# Let\'s create an English/Japanese string.s2 = chilkat.CkString()s2.appendEnc(\'abc愛知県新城市の\',\'utf-8\')# We can get the string in any multibyte encoding.print \'s2 num chars = \' + str(s2.getNumChars())strShiftJIS = s2.getEnc(\'shift_JIS\')print \'Shift-JIS num bytes = \' + str(len(strShiftJIS))strIso2022JP = s2.getEnc(\'iso-2022-jp\')print \'iso-2022-jp num bytes = \' + str(len(strIso2022JP))strEucJp = s2.getEnc(\'euc-jp\')print \'euc-jp num bytes = \' + str(len(strEucJp))# We can save the string in any encodings2.saveToFile(\'out_shift_jis.txt\',\'shift_JIS\')s2.saveToFile(\'out_iso_2022_jp.txt\',\'iso-2022-jp\')s2.saveToFile(\'out_utf8.txt\',\'utf-8\')s2.saveToFile(\'out_euc_jp.txt\',\'euc-jp\')# You may mix any number of languages in a utf-8 string# because utf-8 can encode characters in all languages.# (utf-8 is the multi-byte encoding of Unicode)## An ANSI string can generally hold us-ascii + the native language.# For example, Shift_JIS can represent us-ascii characters# in addition to Japanese characters.# For example, this is OKstrShiftJis = \'abc123\' + s2.getEnc(\'shift_JIS\')# This is not OK:strShiftJis2 = \'ςστυφ\' + s2.getEnc(\'shift_JIS\')print \"Done!\"#该片段来自于http://byrx.net

评论关闭