Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pymysql/charset.py: 94%

182 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:28 +0000

1# Internal use only. Do not use directly. 

2 

3MBLENGTH = {8: 1, 33: 3, 88: 2, 91: 2} 

4 

5 

6class Charset: 

7 def __init__(self, id, name, collation, is_default=False): 

8 self.id, self.name, self.collation = id, name, collation 

9 self.is_default = is_default 

10 

11 def __repr__(self): 

12 return ( 

13 f"Charset(id={self.id}, name={self.name!r}, collation={self.collation!r})" 

14 ) 

15 

16 @property 

17 def encoding(self): 

18 name = self.name 

19 if name in ("utf8mb4", "utf8mb3"): 

20 return "utf8" 

21 if name == "latin1": 

22 return "cp1252" 

23 if name == "koi8r": 

24 return "koi8_r" 

25 if name == "koi8u": 

26 return "koi8_u" 

27 return name 

28 

29 @property 

30 def is_binary(self): 

31 return self.id == 63 

32 

33 

34class Charsets: 

35 def __init__(self): 

36 self._by_id = {} 

37 self._by_name = {} 

38 

39 def add(self, c): 

40 self._by_id[c.id] = c 

41 if c.is_default: 

42 self._by_name[c.name] = c 

43 

44 def by_id(self, id): 

45 return self._by_id[id] 

46 

47 def by_name(self, name): 

48 if name == "utf8": 

49 name = "utf8mb4" 

50 return self._by_name.get(name.lower()) 

51 

52 

53_charsets = Charsets() 

54charset_by_name = _charsets.by_name 

55charset_by_id = _charsets.by_id 

56 

57""" 

58TODO: update this script. 

59 

60Generated with: 

61 

62mysql -N -s -e "select id, character_set_name, collation_name, is_default 

63from information_schema.collations order by id;" | python -c "import sys 

64for l in sys.stdin.readlines(): 

65 id, name, collation, is_default = l.split(chr(9)) 

66 if is_default.strip() == "Yes": 

67 print('_charsets.add(Charset(%s, \'%s\', \'%s\', True))' \ 

68 % (id, name, collation)) 

69 else: 

70 print('_charsets.add(Charset(%s, \'%s\', \'%s\'))' \ 

71 % (id, name, collation, bool(is_default.strip())) 

72""" 

73 

74_charsets.add(Charset(1, "big5", "big5_chinese_ci", True)) 

75_charsets.add(Charset(2, "latin2", "latin2_czech_cs")) 

76_charsets.add(Charset(3, "dec8", "dec8_swedish_ci", True)) 

77_charsets.add(Charset(4, "cp850", "cp850_general_ci", True)) 

78_charsets.add(Charset(5, "latin1", "latin1_german1_ci")) 

79_charsets.add(Charset(6, "hp8", "hp8_english_ci", True)) 

80_charsets.add(Charset(7, "koi8r", "koi8r_general_ci", True)) 

81_charsets.add(Charset(8, "latin1", "latin1_swedish_ci", True)) 

82_charsets.add(Charset(9, "latin2", "latin2_general_ci", True)) 

83_charsets.add(Charset(10, "swe7", "swe7_swedish_ci", True)) 

84_charsets.add(Charset(11, "ascii", "ascii_general_ci", True)) 

85_charsets.add(Charset(12, "ujis", "ujis_japanese_ci", True)) 

86_charsets.add(Charset(13, "sjis", "sjis_japanese_ci", True)) 

87_charsets.add(Charset(14, "cp1251", "cp1251_bulgarian_ci")) 

88_charsets.add(Charset(15, "latin1", "latin1_danish_ci")) 

89_charsets.add(Charset(16, "hebrew", "hebrew_general_ci", True)) 

90_charsets.add(Charset(18, "tis620", "tis620_thai_ci", True)) 

91_charsets.add(Charset(19, "euckr", "euckr_korean_ci", True)) 

92_charsets.add(Charset(20, "latin7", "latin7_estonian_cs")) 

93_charsets.add(Charset(21, "latin2", "latin2_hungarian_ci")) 

94_charsets.add(Charset(22, "koi8u", "koi8u_general_ci", True)) 

95_charsets.add(Charset(23, "cp1251", "cp1251_ukrainian_ci")) 

96_charsets.add(Charset(24, "gb2312", "gb2312_chinese_ci", True)) 

97_charsets.add(Charset(25, "greek", "greek_general_ci", True)) 

98_charsets.add(Charset(26, "cp1250", "cp1250_general_ci", True)) 

99_charsets.add(Charset(27, "latin2", "latin2_croatian_ci")) 

100_charsets.add(Charset(28, "gbk", "gbk_chinese_ci", True)) 

101_charsets.add(Charset(29, "cp1257", "cp1257_lithuanian_ci")) 

102_charsets.add(Charset(30, "latin5", "latin5_turkish_ci", True)) 

103_charsets.add(Charset(31, "latin1", "latin1_german2_ci")) 

104_charsets.add(Charset(32, "armscii8", "armscii8_general_ci", True)) 

105_charsets.add(Charset(33, "utf8mb3", "utf8mb3_general_ci", True)) 

106_charsets.add(Charset(34, "cp1250", "cp1250_czech_cs")) 

107_charsets.add(Charset(36, "cp866", "cp866_general_ci", True)) 

108_charsets.add(Charset(37, "keybcs2", "keybcs2_general_ci", True)) 

109_charsets.add(Charset(38, "macce", "macce_general_ci", True)) 

110_charsets.add(Charset(39, "macroman", "macroman_general_ci", True)) 

111_charsets.add(Charset(40, "cp852", "cp852_general_ci", True)) 

112_charsets.add(Charset(41, "latin7", "latin7_general_ci", True)) 

113_charsets.add(Charset(42, "latin7", "latin7_general_cs")) 

114_charsets.add(Charset(43, "macce", "macce_bin")) 

115_charsets.add(Charset(44, "cp1250", "cp1250_croatian_ci")) 

116_charsets.add(Charset(45, "utf8mb4", "utf8mb4_general_ci", True)) 

117_charsets.add(Charset(46, "utf8mb4", "utf8mb4_bin")) 

118_charsets.add(Charset(47, "latin1", "latin1_bin")) 

119_charsets.add(Charset(48, "latin1", "latin1_general_ci")) 

120_charsets.add(Charset(49, "latin1", "latin1_general_cs")) 

121_charsets.add(Charset(50, "cp1251", "cp1251_bin")) 

122_charsets.add(Charset(51, "cp1251", "cp1251_general_ci", True)) 

123_charsets.add(Charset(52, "cp1251", "cp1251_general_cs")) 

124_charsets.add(Charset(53, "macroman", "macroman_bin")) 

125_charsets.add(Charset(57, "cp1256", "cp1256_general_ci", True)) 

126_charsets.add(Charset(58, "cp1257", "cp1257_bin")) 

127_charsets.add(Charset(59, "cp1257", "cp1257_general_ci", True)) 

128_charsets.add(Charset(63, "binary", "binary", True)) 

129_charsets.add(Charset(64, "armscii8", "armscii8_bin")) 

130_charsets.add(Charset(65, "ascii", "ascii_bin")) 

131_charsets.add(Charset(66, "cp1250", "cp1250_bin")) 

132_charsets.add(Charset(67, "cp1256", "cp1256_bin")) 

133_charsets.add(Charset(68, "cp866", "cp866_bin")) 

134_charsets.add(Charset(69, "dec8", "dec8_bin")) 

135_charsets.add(Charset(70, "greek", "greek_bin")) 

136_charsets.add(Charset(71, "hebrew", "hebrew_bin")) 

137_charsets.add(Charset(72, "hp8", "hp8_bin")) 

138_charsets.add(Charset(73, "keybcs2", "keybcs2_bin")) 

139_charsets.add(Charset(74, "koi8r", "koi8r_bin")) 

140_charsets.add(Charset(75, "koi8u", "koi8u_bin")) 

141_charsets.add(Charset(76, "utf8mb3", "utf8mb3_tolower_ci")) 

142_charsets.add(Charset(77, "latin2", "latin2_bin")) 

143_charsets.add(Charset(78, "latin5", "latin5_bin")) 

144_charsets.add(Charset(79, "latin7", "latin7_bin")) 

145_charsets.add(Charset(80, "cp850", "cp850_bin")) 

146_charsets.add(Charset(81, "cp852", "cp852_bin")) 

147_charsets.add(Charset(82, "swe7", "swe7_bin")) 

148_charsets.add(Charset(83, "utf8mb3", "utf8mb3_bin")) 

149_charsets.add(Charset(84, "big5", "big5_bin")) 

150_charsets.add(Charset(85, "euckr", "euckr_bin")) 

151_charsets.add(Charset(86, "gb2312", "gb2312_bin")) 

152_charsets.add(Charset(87, "gbk", "gbk_bin")) 

153_charsets.add(Charset(88, "sjis", "sjis_bin")) 

154_charsets.add(Charset(89, "tis620", "tis620_bin")) 

155_charsets.add(Charset(91, "ujis", "ujis_bin")) 

156_charsets.add(Charset(92, "geostd8", "geostd8_general_ci", True)) 

157_charsets.add(Charset(93, "geostd8", "geostd8_bin")) 

158_charsets.add(Charset(94, "latin1", "latin1_spanish_ci")) 

159_charsets.add(Charset(95, "cp932", "cp932_japanese_ci", True)) 

160_charsets.add(Charset(96, "cp932", "cp932_bin")) 

161_charsets.add(Charset(97, "eucjpms", "eucjpms_japanese_ci", True)) 

162_charsets.add(Charset(98, "eucjpms", "eucjpms_bin")) 

163_charsets.add(Charset(99, "cp1250", "cp1250_polish_ci")) 

164_charsets.add(Charset(192, "utf8mb3", "utf8mb3_unicode_ci")) 

165_charsets.add(Charset(193, "utf8mb3", "utf8mb3_icelandic_ci")) 

166_charsets.add(Charset(194, "utf8mb3", "utf8mb3_latvian_ci")) 

167_charsets.add(Charset(195, "utf8mb3", "utf8mb3_romanian_ci")) 

168_charsets.add(Charset(196, "utf8mb3", "utf8mb3_slovenian_ci")) 

169_charsets.add(Charset(197, "utf8mb3", "utf8mb3_polish_ci")) 

170_charsets.add(Charset(198, "utf8mb3", "utf8mb3_estonian_ci")) 

171_charsets.add(Charset(199, "utf8mb3", "utf8mb3_spanish_ci")) 

172_charsets.add(Charset(200, "utf8mb3", "utf8mb3_swedish_ci")) 

173_charsets.add(Charset(201, "utf8mb3", "utf8mb3_turkish_ci")) 

174_charsets.add(Charset(202, "utf8mb3", "utf8mb3_czech_ci")) 

175_charsets.add(Charset(203, "utf8mb3", "utf8mb3_danish_ci")) 

176_charsets.add(Charset(204, "utf8mb3", "utf8mb3_lithuanian_ci")) 

177_charsets.add(Charset(205, "utf8mb3", "utf8mb3_slovak_ci")) 

178_charsets.add(Charset(206, "utf8mb3", "utf8mb3_spanish2_ci")) 

179_charsets.add(Charset(207, "utf8mb3", "utf8mb3_roman_ci")) 

180_charsets.add(Charset(208, "utf8mb3", "utf8mb3_persian_ci")) 

181_charsets.add(Charset(209, "utf8mb3", "utf8mb3_esperanto_ci")) 

182_charsets.add(Charset(210, "utf8mb3", "utf8mb3_hungarian_ci")) 

183_charsets.add(Charset(211, "utf8mb3", "utf8mb3_sinhala_ci")) 

184_charsets.add(Charset(212, "utf8mb3", "utf8mb3_german2_ci")) 

185_charsets.add(Charset(213, "utf8mb3", "utf8mb3_croatian_ci")) 

186_charsets.add(Charset(214, "utf8mb3", "utf8mb3_unicode_520_ci")) 

187_charsets.add(Charset(215, "utf8mb3", "utf8mb3_vietnamese_ci")) 

188_charsets.add(Charset(223, "utf8mb3", "utf8mb3_general_mysql500_ci")) 

189_charsets.add(Charset(224, "utf8mb4", "utf8mb4_unicode_ci")) 

190_charsets.add(Charset(225, "utf8mb4", "utf8mb4_icelandic_ci")) 

191_charsets.add(Charset(226, "utf8mb4", "utf8mb4_latvian_ci")) 

192_charsets.add(Charset(227, "utf8mb4", "utf8mb4_romanian_ci")) 

193_charsets.add(Charset(228, "utf8mb4", "utf8mb4_slovenian_ci")) 

194_charsets.add(Charset(229, "utf8mb4", "utf8mb4_polish_ci")) 

195_charsets.add(Charset(230, "utf8mb4", "utf8mb4_estonian_ci")) 

196_charsets.add(Charset(231, "utf8mb4", "utf8mb4_spanish_ci")) 

197_charsets.add(Charset(232, "utf8mb4", "utf8mb4_swedish_ci")) 

198_charsets.add(Charset(233, "utf8mb4", "utf8mb4_turkish_ci")) 

199_charsets.add(Charset(234, "utf8mb4", "utf8mb4_czech_ci")) 

200_charsets.add(Charset(235, "utf8mb4", "utf8mb4_danish_ci")) 

201_charsets.add(Charset(236, "utf8mb4", "utf8mb4_lithuanian_ci")) 

202_charsets.add(Charset(237, "utf8mb4", "utf8mb4_slovak_ci")) 

203_charsets.add(Charset(238, "utf8mb4", "utf8mb4_spanish2_ci")) 

204_charsets.add(Charset(239, "utf8mb4", "utf8mb4_roman_ci")) 

205_charsets.add(Charset(240, "utf8mb4", "utf8mb4_persian_ci")) 

206_charsets.add(Charset(241, "utf8mb4", "utf8mb4_esperanto_ci")) 

207_charsets.add(Charset(242, "utf8mb4", "utf8mb4_hungarian_ci")) 

208_charsets.add(Charset(243, "utf8mb4", "utf8mb4_sinhala_ci")) 

209_charsets.add(Charset(244, "utf8mb4", "utf8mb4_german2_ci")) 

210_charsets.add(Charset(245, "utf8mb4", "utf8mb4_croatian_ci")) 

211_charsets.add(Charset(246, "utf8mb4", "utf8mb4_unicode_520_ci")) 

212_charsets.add(Charset(247, "utf8mb4", "utf8mb4_vietnamese_ci")) 

213_charsets.add(Charset(248, "gb18030", "gb18030_chinese_ci", True)) 

214_charsets.add(Charset(249, "gb18030", "gb18030_bin")) 

215_charsets.add(Charset(250, "gb18030", "gb18030_unicode_520_ci")) 

216_charsets.add(Charset(255, "utf8mb4", "utf8mb4_0900_ai_ci"))