1 # -*- coding:gbk -*- 2 # 即使设置文件编码为gbk,下方定义的字符串s1依旧为unicode 3 4 # 获取默认编码格式 5 import sys 6 print(sys.getdefaultencoding()) 7 # >>> utf-8 8 9 # 编码转换 10 # --------------python 2---------------- 11 # utf-8 --> decode --> unicode 12 # unicode --> encode --> gbk 13 14 s = "你好" 15 # 无decode方法,由于python3默认unicode, utf-8为原本的字符集,传给decode识别 16 s_unicode = s.decode("utf-8") 17 # gbk为目标字符集,指定给encode进行转换 18 s_gbk = s_unicode.encode("gbk") 19 20 # 示例 21 s1 = "大家好" 22 s1_to_utf8 = s1.encode("utf-8") 23 print(s1_to_utf8) # >>> b'\xe5\xa4\xa7\xe5\xae\xb6\xe5\xa5\xbd' 24 s1_to_gbk = s1_to_utf8.decode("utf-8").encode("gbk") 25 s1_to_gb2312 = s1_to_utf8.decode("utf-8").encode("gb2312") 26 s1_to_gb2312_chinese = s1_to_gb2312.decode("gb2312") 27 # decode转换为unicode后可以输出中文,所以证明默认为unicode 28 print(s1_to_gb2312_chinese) # >>> 大家好 29 print(s1_to_gb2312) # >>> b'\xb4\xf3\xbc\xd2\xba\xc3' 30 print(s1_to_gbk) # >>> b'\xb4\xf3\xbc\xd2\xba\xc3' 31 print(s1) # >>> 大家好