fs/cifs/cifs_unicode.c

   1 /*
   2  *   fs/cifs/cifs_unicode.c
   3  *
   4  *   Copyright (c) International Business Machines  Corp., 2000,2005
   5  *   Modified by Steve French (sfrench@us.ibm.com)
   6  *
   7  *   This program is free software;  you can redistribute it and/or modify
   8  *   it under the terms of the GNU General Public License as published by
   9  *   the Free Software Foundation; either version 2 of the License, or
  10  *   (at your option) any later version.
  11  *
  12  *   This program is distributed in the hope that it will be useful,
  13  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  15  *   the GNU General Public License for more details.
  16  *
  17  *   You should have received a copy of the GNU General Public License
  18  *   along with this program;  if not, write to the Free Software
  19  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20  */
  21 #include <linux/fs.h>
  22 #include "cifs_unicode.h"
  23 #include "cifs_uniupr.h"
  24 #include "cifspdu.h"
  25 #include "cifsglob.h"
  26 #include "cifs_debug.h"
  27
  28 /*
  29  * cifs_mapchar - convert a little-endian char to proper char in codepage
  30  * @target - where converted character should be copied
  31  * @src_char - 2 byte little-endian source character
  32  * @cp - codepage to which character should be converted
  33  * @mapchar - should character be mapped according to mapchars mount option?
  34  *
  35  * This function handles the conversion of a single character. It is the
  36  * responsibility of the caller to ensure that the target buffer is large
  37  * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
  38  */
  39 static int
  40 cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp,
  41              bool mapchar)
  42 {
  43         int len = 1;
  44
  45         if (!mapchar)
  46                 goto cp_convert;
  47
  48         /*
  49          * BB: Cannot handle remapping UNI_SLASH until all the calls to
  50          *     build_path_from_dentry are modified, as they use slash as
  51          *     separator.
  52          */
  53         switch (le16_to_cpu(src_char)) {
  54         case UNI_COLON:
  55                 *target = ':';
  56                 break;
  57         case UNI_ASTERIK:
  58                 *target = '*';
  59                 break;
  60         case UNI_QUESTION:
  61                 *target = '?';
  62                 break;
  63         case UNI_PIPE:
  64                 *target = '|';
  65                 break;
  66         case UNI_GRTRTHAN:
  67                 *target = '>';
  68                 break;
  69         case UNI_LESSTHAN:
  70                 *target = '<';
  71                 break;
  72         default:
  73                 goto cp_convert;
  74         }
  75
  76 out:
  77         return len;
  78
  79 cp_convert:
  80         len = cp->uni2char(le16_to_cpu(src_char), target,
  81                            NLS_MAX_CHARSET_SIZE);
  82         if (len <= 0) {
  83                 *target = '?';
  84                 len = 1;
  85         }
  86         goto out;
  87 }
  88
  89 /*
  90  * cifs_from_ucs2 - convert utf16le string to local charset
  91  * @to - destination buffer
  92  * @from - source buffer
  93  * @tolen - destination buffer size (in bytes)
  94  * @fromlen - source buffer size (in bytes)
  95  * @codepage - codepage to which characters should be converted
  96  * @mapchar - should characters be remapped according to the mapchars option?
  97  *
  98  * Convert a little-endian ucs2le string (as sent by the server) to a string
  99  * in the provided codepage. The tolen and fromlen parameters are to ensure
 100  * that the code doesn't walk off of the end of the buffer (which is always
 101  * a danger if the alignment of the source buffer is off). The destination
 102  * string is always properly null terminated and fits in the destination
 103  * buffer. Returns the length of the destination string in bytes (including
 104  * null terminator).
 105  *
 106  * Note that some windows versions actually send multiword UTF-16 characters
 107  * instead of straight UCS-2. The linux nls routines however aren't able to
 108  * deal with those characters properly. In the event that we get some of
 109  * those characters, they won't be translated properly.
 110  */
 111 int
 112 cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,
 113                  const struct nls_table *codepage, bool mapchar)
 114 {
 115         int i, charlen, safelen;
 116         int outlen = 0;
 117         int nullsize = nls_nullsize(codepage);
 118         int fromwords = fromlen / 2;
 119         char tmp[NLS_MAX_CHARSET_SIZE];
 120
 121         /*
 122          * because the chars can be of varying widths, we need to take care
 123          * not to overflow the destination buffer when we get close to the
 124          * end of it. Until we get to this offset, we don't need to check
 125          * for overflow however.
 126          */
 127         safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
 128
 129         for (i = 0; i < fromwords && from[i]; i++) {
 130                 /*
 131                  * check to see if converting this character might make the
 132                  * conversion bleed into the null terminator
 133                  */
 134                 if (outlen >= safelen) {
 135                         charlen = cifs_mapchar(tmp, from[i], codepage, mapchar);
 136                         if ((outlen + charlen) > (tolen - nullsize))
 137                                 break;
 138                 }
 139
 140                 /* put converted char into 'to' buffer */
 141                 charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar);
 142                 outlen += charlen;
 143         }
 144
 145         /* properly null-terminate string */
 146         for (i = 0; i < nullsize; i++)
 147                 to[outlen++] = 0;
 148
 149         return outlen;
 150 }
 151
 152 /*
 153  * NAME:        cifs_strfromUCS()
 154  *
 155  * FUNCTION:    Convert little-endian unicode string to character string
 156  *
 157  */
 158 int
 159 cifs_strfromUCS_le(char *to, const __le16 *from,
 160                    int len, const struct nls_table *codepage)
 161 {
 162         int i;
 163         int outlen = 0;
 164
 165         for (i = 0; (i < len) && from[i]; i++) {
 166                 int charlen;
 167                 /* 2.4.0 kernel or greater */
 168                 charlen =
 169                     codepage->uni2char(le16_to_cpu(from[i]), &to[outlen],
 170                                        NLS_MAX_CHARSET_SIZE);
 171                 if (charlen > 0) {
 172                         outlen += charlen;
 173                 } else {
 174                         to[outlen++] = '?';
 175                 }
 176         }
 177         to[outlen] = 0;
 178         return outlen;
 179 }
 180
 181 /*
 182  * NAME:        cifs_strtoUCS()
 183  *
 184  * FUNCTION:    Convert character string to unicode string
 185  *
 186  */
 187 int
 188 cifs_strtoUCS(__le16 *to, const char *from, int len,
 189               const struct nls_table *codepage)
 190 {
 191         int charlen;
 192         int i;
 193         wchar_t *wchar_to = (wchar_t *)to; /* needed to quiet sparse */
 194
 195         for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
 196
 197                 /* works for 2.4.0 kernel or later */
 198                 charlen = codepage->char2uni(from, len, &wchar_to[i]);
 199                 if (charlen < 1) {
 200                         cERROR(1,
 201                                ("strtoUCS: char2uni of %d returned %d",
 202                                 (int)*from, charlen));
 203                         /* A question mark */
 204                         to[i] = cpu_to_le16(0x003f);
 205                         charlen = 1;
 206                 } else
 207                         to[i] = cpu_to_le16(wchar_to[i]);
 208
 209         }
 210
 211         to[i] = 0;
 212         return i;
 213 }
 214