4 * Native language support--charsets and unicode translations.
5 * By Gordon Chaffee 1996, 1997
7 * Unicode based case conversion 1999 by Wolfram Pienkoss
11 #include <linux/module.h>
12 #include <linux/string.h>
13 #include <linux/nls.h>
14 #include <linux/kernel.h>
15 #include <linux/errno.h>
17 #include <linux/kmod.h>
19 #include <linux/spinlock.h>
21 static struct nls_table default_table;
22 static struct nls_table *tables = &default_table;
23 static DEFINE_SPINLOCK(nls_lock);
26 * Sample implementation from Unicode home page.
27 * http://www.stonehand.com/unicode/standard/fss-utf.html
37 static struct utf8_table utf8_table[] =
39 {0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */},
40 {0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */},
41 {0xF0, 0xE0, 2*6, 0xFFFF, 0x800, /* 3 byte sequence */},
42 {0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, /* 4 byte sequence */},
43 {0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */},
44 {0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */},
45 {0, /* end of table */}
49 utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
58 for (t = utf8_table; t->cmask; t++) {
60 if ((c0 & t->cmask) == t->cval) {
70 c = (*s ^ 0x80) & 0xFF;
79 utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
87 while (*ip && n > 0) {
89 size = utf8_mbtowc(op, ip, n);
91 /* Ignore character and move on */
108 utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
112 struct utf8_table *t;
119 for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
123 *s = t->cval | (l >> c);
127 *s = 0x80 | ((l >> c) & 0x3F);
136 utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
144 while (*ip && maxlen > 0) {
146 size = utf8_wctomb(op, *ip, maxlen);
148 /* Ignore character and move on */
162 int register_nls(struct nls_table * nls)
164 struct nls_table ** tmp = &tables;
171 spin_lock(&nls_lock);
174 spin_unlock(&nls_lock);
181 spin_unlock(&nls_lock);
185 int unregister_nls(struct nls_table * nls)
187 struct nls_table ** tmp = &tables;
189 spin_lock(&nls_lock);
193 spin_unlock(&nls_lock);
198 spin_unlock(&nls_lock);
202 static struct nls_table *find_nls(char *charset)
204 struct nls_table *nls;
205 spin_lock(&nls_lock);
206 for (nls = tables; nls; nls = nls->next) {
207 if (!strcmp(nls->charset, charset))
209 if (nls->alias && !strcmp(nls->alias, charset))
212 if (nls && !try_module_get(nls->owner))
214 spin_unlock(&nls_lock);
218 struct nls_table *load_nls(char *charset)
220 struct nls_table *nls;
225 nls = find_nls(charset);
230 ret = request_module("nls_%s", charset);
232 printk("Unable to load NLS charset %s\n", charset);
235 nls = find_nls(charset);
240 void unload_nls(struct nls_table *nls)
242 module_put(nls->owner);
245 static wchar_t charset2uni[256] = {
247 0x0000, 0x0001, 0x0002, 0x0003,
248 0x0004, 0x0005, 0x0006, 0x0007,
249 0x0008, 0x0009, 0x000a, 0x000b,
250 0x000c, 0x000d, 0x000e, 0x000f,
252 0x0010, 0x0011, 0x0012, 0x0013,
253 0x0014, 0x0015, 0x0016, 0x0017,
254 0x0018, 0x0019, 0x001a, 0x001b,
255 0x001c, 0x001d, 0x001e, 0x001f,
257 0x0020, 0x0021, 0x0022, 0x0023,
258 0x0024, 0x0025, 0x0026, 0x0027,
259 0x0028, 0x0029, 0x002a, 0x002b,
260 0x002c, 0x002d, 0x002e, 0x002f,
262 0x0030, 0x0031, 0x0032, 0x0033,
263 0x0034, 0x0035, 0x0036, 0x0037,
264 0x0038, 0x0039, 0x003a, 0x003b,
265 0x003c, 0x003d, 0x003e, 0x003f,
267 0x0040, 0x0041, 0x0042, 0x0043,
268 0x0044, 0x0045, 0x0046, 0x0047,
269 0x0048, 0x0049, 0x004a, 0x004b,
270 0x004c, 0x004d, 0x004e, 0x004f,
272 0x0050, 0x0051, 0x0052, 0x0053,
273 0x0054, 0x0055, 0x0056, 0x0057,
274 0x0058, 0x0059, 0x005a, 0x005b,
275 0x005c, 0x005d, 0x005e, 0x005f,
277 0x0060, 0x0061, 0x0062, 0x0063,
278 0x0064, 0x0065, 0x0066, 0x0067,
279 0x0068, 0x0069, 0x006a, 0x006b,
280 0x006c, 0x006d, 0x006e, 0x006f,
282 0x0070, 0x0071, 0x0072, 0x0073,
283 0x0074, 0x0075, 0x0076, 0x0077,
284 0x0078, 0x0079, 0x007a, 0x007b,
285 0x007c, 0x007d, 0x007e, 0x007f,
287 0x0080, 0x0081, 0x0082, 0x0083,
288 0x0084, 0x0085, 0x0086, 0x0087,
289 0x0088, 0x0089, 0x008a, 0x008b,
290 0x008c, 0x008d, 0x008e, 0x008f,
292 0x0090, 0x0091, 0x0092, 0x0093,
293 0x0094, 0x0095, 0x0096, 0x0097,
294 0x0098, 0x0099, 0x009a, 0x009b,
295 0x009c, 0x009d, 0x009e, 0x009f,
297 0x00a0, 0x00a1, 0x00a2, 0x00a3,
298 0x00a4, 0x00a5, 0x00a6, 0x00a7,
299 0x00a8, 0x00a9, 0x00aa, 0x00ab,
300 0x00ac, 0x00ad, 0x00ae, 0x00af,
302 0x00b0, 0x00b1, 0x00b2, 0x00b3,
303 0x00b4, 0x00b5, 0x00b6, 0x00b7,
304 0x00b8, 0x00b9, 0x00ba, 0x00bb,
305 0x00bc, 0x00bd, 0x00be, 0x00bf,
307 0x00c0, 0x00c1, 0x00c2, 0x00c3,
308 0x00c4, 0x00c5, 0x00c6, 0x00c7,
309 0x00c8, 0x00c9, 0x00ca, 0x00cb,
310 0x00cc, 0x00cd, 0x00ce, 0x00cf,
312 0x00d0, 0x00d1, 0x00d2, 0x00d3,
313 0x00d4, 0x00d5, 0x00d6, 0x00d7,
314 0x00d8, 0x00d9, 0x00da, 0x00db,
315 0x00dc, 0x00dd, 0x00de, 0x00df,
317 0x00e0, 0x00e1, 0x00e2, 0x00e3,
318 0x00e4, 0x00e5, 0x00e6, 0x00e7,
319 0x00e8, 0x00e9, 0x00ea, 0x00eb,
320 0x00ec, 0x00ed, 0x00ee, 0x00ef,
322 0x00f0, 0x00f1, 0x00f2, 0x00f3,
323 0x00f4, 0x00f5, 0x00f6, 0x00f7,
324 0x00f8, 0x00f9, 0x00fa, 0x00fb,
325 0x00fc, 0x00fd, 0x00fe, 0x00ff,
328 static unsigned char page00[256] = {
329 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
330 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
331 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
332 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
333 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
334 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
335 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
336 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
337 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
338 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
339 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
340 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
341 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
342 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
343 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
344 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
346 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
347 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
348 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
349 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
350 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
351 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
352 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
353 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
354 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
355 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
356 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
357 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
358 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
359 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
360 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
361 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
364 static unsigned char *page_uni2charset[256] = {
368 static unsigned char charset2lower[256] = {
369 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
370 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
371 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
372 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
373 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
374 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
375 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
376 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
377 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */
378 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */
379 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */
380 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
381 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
382 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
383 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
384 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
386 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
387 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
388 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
389 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
390 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
391 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
392 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
393 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
394 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
395 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
396 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
397 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
398 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
399 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
400 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
401 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
404 static unsigned char charset2upper[256] = {
405 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
406 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
407 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
408 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
409 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
410 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
411 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
412 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
413 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
414 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
415 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
416 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
417 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */
418 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */
419 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */
420 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
422 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
423 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
424 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
425 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
426 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
427 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
428 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
429 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
430 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
431 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
432 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
433 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
434 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
435 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
436 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
437 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
441 static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
443 unsigned char *uni2charset;
444 unsigned char cl = uni & 0x00ff;
445 unsigned char ch = (uni & 0xff00) >> 8;
448 return -ENAMETOOLONG;
450 uni2charset = page_uni2charset[ch];
451 if (uni2charset && uni2charset[cl])
452 out[0] = uni2charset[cl];
458 static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
460 *uni = charset2uni[*rawstring];
466 static struct nls_table default_table = {
467 .charset = "default",
468 .uni2char = uni2char,
469 .char2uni = char2uni,
470 .charset2lower = charset2lower,
471 .charset2upper = charset2upper,
474 /* Returns a simple default translation table */
475 struct nls_table *load_nls_default(void)
477 struct nls_table *default_nls;
479 default_nls = load_nls(CONFIG_NLS_DEFAULT);
480 if (default_nls != NULL)
483 return &default_table;
486 EXPORT_SYMBOL(register_nls);
487 EXPORT_SYMBOL(unregister_nls);
488 EXPORT_SYMBOL(unload_nls);
489 EXPORT_SYMBOL(load_nls);
490 EXPORT_SYMBOL(load_nls_default);
491 EXPORT_SYMBOL(utf8_mbtowc);
492 EXPORT_SYMBOL(utf8_mbstowcs);
493 EXPORT_SYMBOL(utf8_wctomb);
494 EXPORT_SYMBOL(utf8_wcstombs);
496 MODULE_LICENSE("Dual BSD/GPL");