Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

koi8_t.py 13 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
  1. """ Python Character Mapping Codec koi8_t
  2. """
  3. # http://ru.wikipedia.org/wiki/КОИ-8
  4. # http://www.opensource.apple.com/source/libiconv/libiconv-4/libiconv/tests/KOI8-T.TXT
  5. import codecs
  6. ### Codec APIs
  7. class Codec(codecs.Codec):
  8. def encode(self,input,errors='strict'):
  9. return codecs.charmap_encode(input,errors,encoding_table)
  10. def decode(self,input,errors='strict'):
  11. return codecs.charmap_decode(input,errors,decoding_table)
  12. class IncrementalEncoder(codecs.IncrementalEncoder):
  13. def encode(self, input, final=False):
  14. return codecs.charmap_encode(input,self.errors,encoding_table)[0]
  15. class IncrementalDecoder(codecs.IncrementalDecoder):
  16. def decode(self, input, final=False):
  17. return codecs.charmap_decode(input,self.errors,decoding_table)[0]
  18. class StreamWriter(Codec,codecs.StreamWriter):
  19. pass
  20. class StreamReader(Codec,codecs.StreamReader):
  21. pass
  22. ### encodings module API
  23. def getregentry():
  24. return codecs.CodecInfo(
  25. name='koi8-t',
  26. encode=Codec().encode,
  27. decode=Codec().decode,
  28. incrementalencoder=IncrementalEncoder,
  29. incrementaldecoder=IncrementalDecoder,
  30. streamreader=StreamReader,
  31. streamwriter=StreamWriter,
  32. )
  33. ### Decoding Table
  34. decoding_table = (
  35. '\x00' # 0x00 -> NULL
  36. '\x01' # 0x01 -> START OF HEADING
  37. '\x02' # 0x02 -> START OF TEXT
  38. '\x03' # 0x03 -> END OF TEXT
  39. '\x04' # 0x04 -> END OF TRANSMISSION
  40. '\x05' # 0x05 -> ENQUIRY
  41. '\x06' # 0x06 -> ACKNOWLEDGE
  42. '\x07' # 0x07 -> BELL
  43. '\x08' # 0x08 -> BACKSPACE
  44. '\t' # 0x09 -> HORIZONTAL TABULATION
  45. '\n' # 0x0A -> LINE FEED
  46. '\x0b' # 0x0B -> VERTICAL TABULATION
  47. '\x0c' # 0x0C -> FORM FEED
  48. '\r' # 0x0D -> CARRIAGE RETURN
  49. '\x0e' # 0x0E -> SHIFT OUT
  50. '\x0f' # 0x0F -> SHIFT IN
  51. '\x10' # 0x10 -> DATA LINK ESCAPE
  52. '\x11' # 0x11 -> DEVICE CONTROL ONE
  53. '\x12' # 0x12 -> DEVICE CONTROL TWO
  54. '\x13' # 0x13 -> DEVICE CONTROL THREE
  55. '\x14' # 0x14 -> DEVICE CONTROL FOUR
  56. '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
  57. '\x16' # 0x16 -> SYNCHRONOUS IDLE
  58. '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
  59. '\x18' # 0x18 -> CANCEL
  60. '\x19' # 0x19 -> END OF MEDIUM
  61. '\x1a' # 0x1A -> SUBSTITUTE
  62. '\x1b' # 0x1B -> ESCAPE
  63. '\x1c' # 0x1C -> FILE SEPARATOR
  64. '\x1d' # 0x1D -> GROUP SEPARATOR
  65. '\x1e' # 0x1E -> RECORD SEPARATOR
  66. '\x1f' # 0x1F -> UNIT SEPARATOR
  67. ' ' # 0x20 -> SPACE
  68. '!' # 0x21 -> EXCLAMATION MARK
  69. '"' # 0x22 -> QUOTATION MARK
  70. '#' # 0x23 -> NUMBER SIGN
  71. '$' # 0x24 -> DOLLAR SIGN
  72. '%' # 0x25 -> PERCENT SIGN
  73. '&' # 0x26 -> AMPERSAND
  74. "'" # 0x27 -> APOSTROPHE
  75. '(' # 0x28 -> LEFT PARENTHESIS
  76. ')' # 0x29 -> RIGHT PARENTHESIS
  77. '*' # 0x2A -> ASTERISK
  78. '+' # 0x2B -> PLUS SIGN
  79. ',' # 0x2C -> COMMA
  80. '-' # 0x2D -> HYPHEN-MINUS
  81. '.' # 0x2E -> FULL STOP
  82. '/' # 0x2F -> SOLIDUS
  83. '0' # 0x30 -> DIGIT ZERO
  84. '1' # 0x31 -> DIGIT ONE
  85. '2' # 0x32 -> DIGIT TWO
  86. '3' # 0x33 -> DIGIT THREE
  87. '4' # 0x34 -> DIGIT FOUR
  88. '5' # 0x35 -> DIGIT FIVE
  89. '6' # 0x36 -> DIGIT SIX
  90. '7' # 0x37 -> DIGIT SEVEN
  91. '8' # 0x38 -> DIGIT EIGHT
  92. '9' # 0x39 -> DIGIT NINE
  93. ':' # 0x3A -> COLON
  94. ';' # 0x3B -> SEMICOLON
  95. '<' # 0x3C -> LESS-THAN SIGN
  96. '=' # 0x3D -> EQUALS SIGN
  97. '>' # 0x3E -> GREATER-THAN SIGN
  98. '?' # 0x3F -> QUESTION MARK
  99. '@' # 0x40 -> COMMERCIAL AT
  100. 'A' # 0x41 -> LATIN CAPITAL LETTER A
  101. 'B' # 0x42 -> LATIN CAPITAL LETTER B
  102. 'C' # 0x43 -> LATIN CAPITAL LETTER C
  103. 'D' # 0x44 -> LATIN CAPITAL LETTER D
  104. 'E' # 0x45 -> LATIN CAPITAL LETTER E
  105. 'F' # 0x46 -> LATIN CAPITAL LETTER F
  106. 'G' # 0x47 -> LATIN CAPITAL LETTER G
  107. 'H' # 0x48 -> LATIN CAPITAL LETTER H
  108. 'I' # 0x49 -> LATIN CAPITAL LETTER I
  109. 'J' # 0x4A -> LATIN CAPITAL LETTER J
  110. 'K' # 0x4B -> LATIN CAPITAL LETTER K
  111. 'L' # 0x4C -> LATIN CAPITAL LETTER L
  112. 'M' # 0x4D -> LATIN CAPITAL LETTER M
  113. 'N' # 0x4E -> LATIN CAPITAL LETTER N
  114. 'O' # 0x4F -> LATIN CAPITAL LETTER O
  115. 'P' # 0x50 -> LATIN CAPITAL LETTER P
  116. 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
  117. 'R' # 0x52 -> LATIN CAPITAL LETTER R
  118. 'S' # 0x53 -> LATIN CAPITAL LETTER S
  119. 'T' # 0x54 -> LATIN CAPITAL LETTER T
  120. 'U' # 0x55 -> LATIN CAPITAL LETTER U
  121. 'V' # 0x56 -> LATIN CAPITAL LETTER V
  122. 'W' # 0x57 -> LATIN CAPITAL LETTER W
  123. 'X' # 0x58 -> LATIN CAPITAL LETTER X
  124. 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
  125. 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
  126. '[' # 0x5B -> LEFT SQUARE BRACKET
  127. '\\' # 0x5C -> REVERSE SOLIDUS
  128. ']' # 0x5D -> RIGHT SQUARE BRACKET
  129. '^' # 0x5E -> CIRCUMFLEX ACCENT
  130. '_' # 0x5F -> LOW LINE
  131. '`' # 0x60 -> GRAVE ACCENT
  132. 'a' # 0x61 -> LATIN SMALL LETTER A
  133. 'b' # 0x62 -> LATIN SMALL LETTER B
  134. 'c' # 0x63 -> LATIN SMALL LETTER C
  135. 'd' # 0x64 -> LATIN SMALL LETTER D
  136. 'e' # 0x65 -> LATIN SMALL LETTER E
  137. 'f' # 0x66 -> LATIN SMALL LETTER F
  138. 'g' # 0x67 -> LATIN SMALL LETTER G
  139. 'h' # 0x68 -> LATIN SMALL LETTER H
  140. 'i' # 0x69 -> LATIN SMALL LETTER I
  141. 'j' # 0x6A -> LATIN SMALL LETTER J
  142. 'k' # 0x6B -> LATIN SMALL LETTER K
  143. 'l' # 0x6C -> LATIN SMALL LETTER L
  144. 'm' # 0x6D -> LATIN SMALL LETTER M
  145. 'n' # 0x6E -> LATIN SMALL LETTER N
  146. 'o' # 0x6F -> LATIN SMALL LETTER O
  147. 'p' # 0x70 -> LATIN SMALL LETTER P
  148. 'q' # 0x71 -> LATIN SMALL LETTER Q
  149. 'r' # 0x72 -> LATIN SMALL LETTER R
  150. 's' # 0x73 -> LATIN SMALL LETTER S
  151. 't' # 0x74 -> LATIN SMALL LETTER T
  152. 'u' # 0x75 -> LATIN SMALL LETTER U
  153. 'v' # 0x76 -> LATIN SMALL LETTER V
  154. 'w' # 0x77 -> LATIN SMALL LETTER W
  155. 'x' # 0x78 -> LATIN SMALL LETTER X
  156. 'y' # 0x79 -> LATIN SMALL LETTER Y
  157. 'z' # 0x7A -> LATIN SMALL LETTER Z
  158. '{' # 0x7B -> LEFT CURLY BRACKET
  159. '|' # 0x7C -> VERTICAL LINE
  160. '}' # 0x7D -> RIGHT CURLY BRACKET
  161. '~' # 0x7E -> TILDE
  162. '\x7f' # 0x7F -> DELETE
  163. '\u049b' # 0x80 -> CYRILLIC SMALL LETTER KA WITH DESCENDER
  164. '\u0493' # 0x81 -> CYRILLIC SMALL LETTER GHE WITH STROKE
  165. '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
  166. '\u0492' # 0x83 -> CYRILLIC CAPITAL LETTER GHE WITH STROKE
  167. '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
  168. '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
  169. '\u2020' # 0x86 -> DAGGER
  170. '\u2021' # 0x87 -> DOUBLE DAGGER
  171. '\ufffe' # 0x88 -> UNDEFINED
  172. '\u2030' # 0x89 -> PER MILLE SIGN
  173. '\u04b3' # 0x8A -> CYRILLIC SMALL LETTER HA WITH DESCENDER
  174. '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
  175. '\u04b2' # 0x8C -> CYRILLIC CAPITAL LETTER HA WITH DESCENDER
  176. '\u04b7' # 0x8D -> CYRILLIC SMALL LETTER CHE WITH DESCENDER
  177. '\u04b6' # 0x8E -> CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
  178. '\ufffe' # 0x8F -> UNDEFINED
  179. '\u049a' # 0x90 -> CYRILLIC CAPITAL LETTER KA WITH DESCENDER
  180. '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
  181. '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
  182. '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
  183. '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
  184. '\u2022' # 0x95 -> BULLET
  185. '\u2013' # 0x96 -> EN DASH
  186. '\u2014' # 0x97 -> EM DASH
  187. '\ufffe' # 0x98 -> UNDEFINED
  188. '\u2122' # 0x99 -> TRADE MARK SIGN
  189. '\ufffe' # 0x9A -> UNDEFINED
  190. '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
  191. '\ufffe' # 0x9C -> UNDEFINED
  192. '\ufffe' # 0x9D -> UNDEFINED
  193. '\ufffe' # 0x9E -> UNDEFINED
  194. '\ufffe' # 0x9F -> UNDEFINED
  195. '\ufffe' # 0xA0 -> UNDEFINED
  196. '\u04ef' # 0xA1 -> CYRILLIC SMALL LETTER U WITH MACRON
  197. '\u04ee' # 0xA2 -> CYRILLIC CAPITAL LETTER U WITH MACRON
  198. '\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO
  199. '\xa4' # 0xA4 -> CURRENCY SIGN
  200. '\u04e3' # 0xA5 -> CYRILLIC SMALL LETTER I WITH MACRON
  201. '\xa6' # 0xA6 -> BROKEN BAR
  202. '\xa7' # 0xA7 -> SECTION SIGN
  203. '\ufffe' # 0xA8 -> UNDEFINED
  204. '\ufffe' # 0xA9 -> UNDEFINED
  205. '\ufffe' # 0xAA -> UNDEFINED
  206. '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
  207. '\xac' # 0xAC -> NOT SIGN
  208. '\xad' # 0xAD -> SOFT HYPHEN
  209. '\xae' # 0xAE -> REGISTERED SIGN
  210. '\ufffe' # 0xAF -> UNDEFINED
  211. '\xb0' # 0xB0 -> DEGREE SIGN
  212. '\xb1' # 0xB1 -> PLUS-MINUS SIGN
  213. '\xb2' # 0xB2 -> SUPERSCRIPT TWO
  214. '\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO
  215. '\ufffe' # 0xB4 -> UNDEFINED
  216. '\u04e2' # 0xB5 -> CYRILLIC CAPITAL LETTER I WITH MACRON
  217. '\xb6' # 0xB6 -> PILCROW SIGN
  218. '\xb7' # 0xB7 -> MIDDLE DOT
  219. '\ufffe' # 0xB8 -> UNDEFINED
  220. '\u2116' # 0xB9 -> NUMERO SIGN
  221. '\ufffe' # 0xBA -> UNDEFINED
  222. '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
  223. '\ufffe' # 0xBC -> UNDEFINED
  224. '\ufffe' # 0xBD -> UNDEFINED
  225. '\ufffe' # 0xBE -> UNDEFINED
  226. '\xa9' # 0xBF -> COPYRIGHT SIGN
  227. '\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU
  228. '\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A
  229. '\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE
  230. '\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE
  231. '\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE
  232. '\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE
  233. '\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF
  234. '\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE
  235. '\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA
  236. '\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I
  237. '\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I
  238. '\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA
  239. '\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL
  240. '\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM
  241. '\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN
  242. '\u043e' # 0xCF -> CYRILLIC SMALL LETTER O
  243. '\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE
  244. '\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA
  245. '\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER
  246. '\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES
  247. '\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE
  248. '\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U
  249. '\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE
  250. '\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE
  251. '\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN
  252. '\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU
  253. '\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE
  254. '\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA
  255. '\u044d' # 0xDC -> CYRILLIC SMALL LETTER E
  256. '\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA
  257. '\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE
  258. '\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN
  259. '\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU
  260. '\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A
  261. '\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE
  262. '\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE
  263. '\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE
  264. '\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE
  265. '\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF
  266. '\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE
  267. '\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA
  268. '\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I
  269. '\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I
  270. '\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA
  271. '\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL
  272. '\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM
  273. '\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN
  274. '\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O
  275. '\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE
  276. '\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA
  277. '\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER
  278. '\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES
  279. '\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE
  280. '\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U
  281. '\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE
  282. '\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE
  283. '\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN
  284. '\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU
  285. '\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE
  286. '\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA
  287. '\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E
  288. '\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA
  289. '\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE
  290. '\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN
  291. )
  292. ### Encoding table
  293. encoding_table=codecs.charmap_build(decoding_table)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...