fs/cifs/cifs_unicode.c

   1 /*
   2  *   fs/cifs/cifs_unicode.c
   3  *
   4  *   Copyright (c) International Business Machines  Corp., 2000,2009
   5  *   Modified by Steve French (sfrench@us.ibm.com)
   6  *
   7  *   This program is free software;  you can redistribute it and/or modify
   8  *   it under the terms of the GNU General Public License as published by
   9  *   the Free Software Foundation; either version 2 of the License, or
  10  *   (at your option) any later version.
  11  *
  12  *   This program is distributed in the hope that it will be useful,
  13  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  15  *   the GNU General Public License for more details.
  16  *
  17  *   You should have received a copy of the GNU General Public License
  18  *   along with this program;  if not, write to the Free Software
  19  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20  */
  21 #include <linux/fs.h>
  22 #include <linux/slab.h>
  23 #include "cifs_unicode.h"
  24 #include "cifs_uniupr.h"
  25 #include "cifspdu.h"
  26 #include "cifsglob.h"
  27 #include "cifs_debug.h"
  28
  29 /*
  30  * cifs_utf16_bytes - how long will a string be after conversion?
  31  * @utf16 - pointer to input string
  32  * @maxbytes - don't go past this many bytes of input string
  33  * @codepage - destination codepage
  34  *
  35  * Walk a utf16le string and return the number of bytes that the string will
  36  * be after being converted to the given charset, not including any null
  37  * termination required. Don't walk past maxbytes in the source buffer.
  38  */
  39 int
  40 cifs_utf16_bytes(const __le16 *from, int maxbytes,
  41                 const struct nls_table *codepage)
  42 {
  43         int i;
  44         int charlen, outlen = 0;
  45         int maxwords = maxbytes / 2;
  46         char tmp[NLS_MAX_CHARSET_SIZE];
  47         __u16 ftmp;
  48
  49         for (i = 0; i < maxwords; i++) {
  50                 ftmp = get_unaligned_le16(&from[i]);
  51                 if (ftmp == 0)
  52                         break;
  53
  54                 charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
  55                 if (charlen > 0)
  56                         outlen += charlen;
  57                 else
  58                         outlen++;
  59         }
  60
  61         return outlen;
  62 }
  63
  64 /* Convert character using the SFU - "Services for Unix" remapping range */
  65 static bool
  66 convert_sfu_char(const __u16 src_char, char *target)
  67 {
  68         /*
  69          * BB: Cannot handle remapping UNI_SLASH until all the calls to
  70          *     build_path_from_dentry are modified, as they use slash as
  71          *     separator.
  72          */
  73         switch (src_char) {
  74         case UNI_COLON:
  75                 *target = ':';
  76                 break;
  77         case UNI_ASTERISK:
  78                 *target = '*';
  79                 break;
  80         case UNI_QUESTION:
  81                 *target = '?';
  82                 break;
  83         case UNI_PIPE:
  84                 *target = '|';
  85                 break;
  86         case UNI_GRTRTHAN:
  87                 *target = '>';
  88                 break;
  89         case UNI_LESSTHAN:
  90                 *target = '<';
  91                 break;
  92         default:
  93                 return false;
  94         }
  95         return true;
  96 }
  97
  98 /* Convert character using the SFM - "Services for Mac" remapping range */
  99 static bool
 100 convert_sfm_char(const __u16 src_char, char *target)
 101 {
 102         switch (src_char) {
 103         case SFM_COLON:
 104                 *target = ':';
 105                 break;
 106         case SFM_ASTERISK:
 107                 *target = '*';
 108                 break;
 109         case SFM_QUESTION:
 110                 *target = '?';
 111                 break;
 112         case SFM_PIPE:
 113                 *target = '|';
 114                 break;
 115         case SFM_GRTRTHAN:
 116                 *target = '>';
 117                 break;
 118         case SFM_LESSTHAN:
 119                 *target = '<';
 120                 break;
 121         case SFM_SLASH:
 122                 *target = '\\';
 123                 break;
 124         default:
 125                 return false;
 126         }
 127         return true;
 128 }
 129
 130
 131 /*
 132  * cifs_mapchar - convert a host-endian char to proper char in codepage
 133  * @target - where converted character should be copied
 134  * @src_char - 2 byte host-endian source character
 135  * @cp - codepage to which character should be converted
 136  * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2?
 137  *
 138  * This function handles the conversion of a single character. It is the
 139  * responsibility of the caller to ensure that the target buffer is large
 140  * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
 141  */
 142 static int
 143 cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
 144              int maptype)
 145 {
 146         int len = 1;
 147
 148         if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
 149                 return len;
 150         else if ((maptype == SFU_MAP_UNI_RSVD) &&
 151                   convert_sfu_char(src_char, target))
 152                 return len;
 153
 154         /* if character not one of seven in special remap set */
 155         len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
 156         if (len <= 0) {
 157                 *target = '?';
 158                 len = 1;
 159         }
 160         return len;
 161 }
 162
 163 /*
 164  * cifs_from_utf16 - convert utf16le string to local charset
 165  * @to - destination buffer
 166  * @from - source buffer
 167  * @tolen - destination buffer size (in bytes)
 168  * @fromlen - source buffer size (in bytes)
 169  * @codepage - codepage to which characters should be converted
 170  * @mapchar - should characters be remapped according to the mapchars option?
 171  *
 172  * Convert a little-endian utf16le string (as sent by the server) to a string
 173  * in the provided codepage. The tolen and fromlen parameters are to ensure
 174  * that the code doesn't walk off of the end of the buffer (which is always
 175  * a danger if the alignment of the source buffer is off). The destination
 176  * string is always properly null terminated and fits in the destination
 177  * buffer. Returns the length of the destination string in bytes (including
 178  * null terminator).
 179  *
 180  * Note that some windows versions actually send multiword UTF-16 characters
 181  * instead of straight UTF16-2. The linux nls routines however aren't able to
 182  * deal with those characters properly. In the event that we get some of
 183  * those characters, they won't be translated properly.
 184  */
 185 int
 186 cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
 187                 const struct nls_table *codepage, int map_type)
 188 {
 189         int i, charlen, safelen;
 190         int outlen = 0;
 191         int nullsize = nls_nullsize(codepage);
 192         int fromwords = fromlen / 2;
 193         char tmp[NLS_MAX_CHARSET_SIZE];
 194         __u16 ftmp;
 195
 196         /*
 197          * because the chars can be of varying widths, we need to take care
 198          * not to overflow the destination buffer when we get close to the
 199          * end of it. Until we get to this offset, we don't need to check
 200          * for overflow however.
 201          */
 202         safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
 203
 204         for (i = 0; i < fromwords; i++) {
 205                 ftmp = get_unaligned_le16(&from[i]);
 206                 if (ftmp == 0)
 207                         break;
 208
 209                 /*
 210                  * check to see if converting this character might make the
 211                  * conversion bleed into the null terminator
 212                  */
 213                 if (outlen >= safelen) {
 214                         charlen = cifs_mapchar(tmp, ftmp, codepage, map_type);
 215                         if ((outlen + charlen) > (tolen - nullsize))
 216                                 break;
 217                 }
 218
 219                 /* put converted char into 'to' buffer */
 220                 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
 221                 outlen += charlen;
 222         }
 223
 224         /* properly null-terminate string */
 225         for (i = 0; i < nullsize; i++)
 226                 to[outlen++] = 0;
 227
 228         return outlen;
 229 }
 230
 231 /*
 232  * NAME:        cifs_strtoUTF16()
 233  *
 234  * FUNCTION:    Convert character string to unicode string
 235  *
 236  */
 237 int
 238 cifs_strtoUTF16(__le16 *to, const char *from, int len,
 239               const struct nls_table *codepage)
 240 {
 241         int charlen;
 242         int i;
 243         wchar_t wchar_to; /* needed to quiet sparse */
 244
 245         /* special case for utf8 to handle no plane0 chars */
 246         if (!strcmp(codepage->charset, "utf8")) {
 247                 /*
 248                  * convert utf8 -> utf16, we assume we have enough space
 249                  * as caller should have assumed conversion does not overflow
 250                  * in destination len is length in wchar_t units (16bits)
 251                  */
 252                 i  = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
 253                                        (wchar_t *) to, len);
 254
 255                 /* if success terminate and exit */
 256                 if (i >= 0)
 257                         goto success;
 258                 /*
 259                  * if fails fall back to UCS encoding as this
 260                  * function should not return negative values
 261                  * currently can fail only if source contains
 262                  * invalid encoded characters
 263                  */
 264         }
 265
 266         for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
 267                 charlen = codepage->char2uni(from, len, &wchar_to);
 268                 if (charlen < 1) {
 269                         cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
 270                                  *from, charlen);
 271                         /* A question mark */
 272                         wchar_to = 0x003f;
 273                         charlen = 1;
 274                 }
 275                 put_unaligned_le16(wchar_to, &to[i]);
 276         }
 277
 278 success:
 279         put_unaligned_le16(0, &to[i]);
 280         return i;
 281 }
 282
 283 /*
 284  * cifs_strndup_from_utf16 - copy a string from wire format to the local
 285  * codepage
 286  * @src - source string
 287  * @maxlen - don't walk past this many bytes in the source string
 288  * @is_unicode - is this a unicode string?
 289  * @codepage - destination codepage
 290  *
 291  * Take a string given by the server, convert it to the local codepage and
 292  * put it in a new buffer. Returns a pointer to the new string or NULL on
 293  * error.
 294  */
 295 char *
 296 cifs_strndup_from_utf16(const char *src, const int maxlen,
 297                         const bool is_unicode, const struct nls_table *codepage)
 298 {
 299         int len;
 300         char *dst;
 301
 302         if (is_unicode) {
 303                 len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);
 304                 len += nls_nullsize(codepage);
 305                 dst = kmalloc(len, GFP_KERNEL);
 306                 if (!dst)
 307                         return NULL;
 308                 cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
 309                                NO_MAP_UNI_RSVD);
 310         } else {
 311                 len = strnlen(src, maxlen);
 312                 len++;
 313                 dst = kmalloc(len, GFP_KERNEL);
 314                 if (!dst)
 315                         return NULL;
 316                 strlcpy(dst, src, len);
 317         }
 318
 319         return dst;
 320 }
 321
 322 /*
 323  * Convert 16 bit Unicode pathname to wire format from string in current code
 324  * page. Conversion may involve remapping up the six characters that are
 325  * only legal in POSIX-like OS (if they are present in the string). Path
 326  * names are little endian 16 bit Unicode on the wire
 327  */
 328 int
 329 cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
 330                  const struct nls_table *cp, int mapChars)
 331 {
 332         int i, charlen;
 333         int j = 0;
 334         char src_char;
 335         __le16 dst_char;
 336         wchar_t tmp;
 337
 338         if (!mapChars)
 339                 return cifs_strtoUTF16(target, source, PATH_MAX, cp);
 340
 341         for (i = 0; i < srclen; j++) {
 342                 src_char = source[i];
 343                 charlen = 1;
 344                 switch (src_char) {
 345                 case 0:
 346                         goto ctoUTF16_out;
 347                 case ':':
 348                         dst_char = cpu_to_le16(UNI_COLON);
 349                         break;
 350                 case '*':
 351                         dst_char = cpu_to_le16(UNI_ASTERISK);
 352                         break;
 353                 case '?':
 354                         dst_char = cpu_to_le16(UNI_QUESTION);
 355                         break;
 356                 case '<':
 357                         dst_char = cpu_to_le16(UNI_LESSTHAN);
 358                         break;
 359                 case '>':
 360                         dst_char = cpu_to_le16(UNI_GRTRTHAN);
 361                         break;
 362                 case '|':
 363                         dst_char = cpu_to_le16(UNI_PIPE);
 364                         break;
 365                 /*
 366                  * FIXME: We can not handle remapping backslash (UNI_SLASH)
 367                  * until all the calls to build_path_from_dentry are modified,
 368                  * as they use backslash as separator.
 369                  */
 370                 default:
 371                         charlen = cp->char2uni(source + i, srclen - i, &tmp);
 372                         dst_char = cpu_to_le16(tmp);
 373
 374                         /*
 375                          * if no match, use question mark, which at least in
 376                          * some cases serves as wild card
 377                          */
 378                         if (charlen < 1) {
 379                                 dst_char = cpu_to_le16(0x003f);
 380                                 charlen = 1;
 381                         }
 382                 }
 383                 /*
 384                  * character may take more than one byte in the source string,
 385                  * but will take exactly two bytes in the target string
 386                  */
 387                 i += charlen;
 388                 put_unaligned(dst_char, &target[j]);
 389         }
 390
 391 ctoUTF16_out:
 392         put_unaligned(0, &target[j]); /* Null terminate target unicode string */
 393         return j;
 394 }
 395
 396 #ifdef CONFIG_CIFS_SMB2
 397 /*
 398  * cifs_local_to_utf16_bytes - how long will a string be after conversion?
 399  * @from - pointer to input string
 400  * @maxbytes - don't go past this many bytes of input string
 401  * @codepage - source codepage
 402  *
 403  * Walk a string and return the number of bytes that the string will
 404  * be after being converted to the given charset, not including any null
 405  * termination required. Don't walk past maxbytes in the source buffer.
 406  */
 407
 408 static int
 409 cifs_local_to_utf16_bytes(const char *from, int len,
 410                           const struct nls_table *codepage)
 411 {
 412         int charlen;
 413         int i;
 414         wchar_t wchar_to;
 415
 416         for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
 417                 charlen = codepage->char2uni(from, len, &wchar_to);
 418                 /* Failed conversion defaults to a question mark */
 419                 if (charlen < 1)
 420                         charlen = 1;
 421         }
 422         return 2 * i; /* UTF16 characters are two bytes */
 423 }
 424
 425 /*
 426  * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage
 427  * @src - source string
 428  * @maxlen - don't walk past this many bytes in the source string
 429  * @utf16_len - the length of the allocated string in bytes (including null)
 430  * @cp - source codepage
 431  * @remap - map special chars
 432  *
 433  * Take a string convert it from the local codepage to UTF16 and
 434  * put it in a new buffer. Returns a pointer to the new string or NULL on
 435  * error.
 436  */
 437 __le16 *
 438 cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len,
 439                       const struct nls_table *cp, int remap)
 440 {
 441         int len;
 442         __le16 *dst;
 443
 444         len = cifs_local_to_utf16_bytes(src, maxlen, cp);
 445         len += 2; /* NULL */
 446         dst = kmalloc(len, GFP_KERNEL);
 447         if (!dst) {
 448                 *utf16_len = 0;
 449                 return NULL;
 450         }
 451         cifsConvertToUTF16(dst, src, strlen(src), cp, remap);
 452         *utf16_len = len;
 453         return dst;
 454 }
 455 #endif /* CONFIG_CIFS_SMB2 */