udf: Check path length when reading symlink
[pandora-kernel.git] / fs / udf / unicode.c
1 /*
2  * unicode.c
3  *
4  * PURPOSE
5  *      Routines for converting between UTF-8 and OSTA Compressed Unicode.
6  *      Also handles filename mangling
7  *
8  * DESCRIPTION
9  *      OSTA Compressed Unicode is explained in the OSTA UDF specification.
10  *              http://www.osta.org/
11  *      UTF-8 is explained in the IETF RFC XXXX.
12  *              ftp://ftp.internic.net/rfc/rfcxxxx.txt
13  *
14  * COPYRIGHT
15  *      This file is distributed under the terms of the GNU General Public
16  *      License (GPL). Copies of the GPL can be obtained from:
17  *              ftp://prep.ai.mit.edu/pub/gnu/GPL
18  *      Each contributing author retains all rights to their own work.
19  */
20
21 #include "udfdecl.h"
22
23 #include <linux/kernel.h>
24 #include <linux/string.h>       /* for memset */
25 #include <linux/nls.h>
26 #include <linux/crc-itu-t.h>
27 #include <linux/slab.h>
28
29 #include "udf_sb.h"
30
31 static int udf_translate_to_linux(uint8_t *, int, uint8_t *, int, uint8_t *,
32                                   int);
33
34 static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
35 {
36         if ((!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN - 2))
37                 return 0;
38
39         memset(dest, 0, sizeof(struct ustr));
40         memcpy(dest->u_name, src, strlen);
41         dest->u_cmpID = 0x08;
42         dest->u_len = strlen;
43
44         return strlen;
45 }
46
47 /*
48  * udf_build_ustr
49  */
50 int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
51 {
52         int usesize;
53
54         if (!dest || !ptr || !size)
55                 return -1;
56         BUG_ON(size < 2);
57
58         usesize = min_t(size_t, ptr[size - 1], sizeof(dest->u_name));
59         usesize = min(usesize, size - 2);
60         dest->u_cmpID = ptr[0];
61         dest->u_len = usesize;
62         memcpy(dest->u_name, ptr + 1, usesize);
63         memset(dest->u_name + usesize, 0, sizeof(dest->u_name) - usesize);
64
65         return 0;
66 }
67
68 /*
69  * udf_build_ustr_exact
70  */
71 static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
72 {
73         if ((!dest) || (!ptr) || (!exactsize))
74                 return -1;
75
76         memset(dest, 0, sizeof(struct ustr));
77         dest->u_cmpID = ptr[0];
78         dest->u_len = exactsize - 1;
79         memcpy(dest->u_name, ptr + 1, exactsize - 1);
80
81         return 0;
82 }
83
84 /*
85  * udf_ocu_to_utf8
86  *
87  * PURPOSE
88  *      Convert OSTA Compressed Unicode to the UTF-8 equivalent.
89  *
90  * PRE-CONDITIONS
91  *      utf                     Pointer to UTF-8 output buffer.
92  *      ocu                     Pointer to OSTA Compressed Unicode input buffer
93  *                              of size UDF_NAME_LEN bytes.
94  *                              both of type "struct ustr *"
95  *
96  * POST-CONDITIONS
97  *      <return>                Zero on success.
98  *
99  * HISTORY
100  *      November 12, 1997 - Andrew E. Mileski
101  *      Written, tested, and released.
102  */
103 int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
104 {
105         const uint8_t *ocu;
106         uint8_t cmp_id, ocu_len;
107         int i;
108
109         ocu_len = ocu_i->u_len;
110         if (ocu_len == 0) {
111                 memset(utf_o, 0, sizeof(struct ustr));
112                 return 0;
113         }
114
115         cmp_id = ocu_i->u_cmpID;
116         if (cmp_id != 8 && cmp_id != 16) {
117                 memset(utf_o, 0, sizeof(struct ustr));
118                 pr_err("unknown compression code (%d) stri=%s\n",
119                        cmp_id, ocu_i->u_name);
120                 return 0;
121         }
122
123         ocu = ocu_i->u_name;
124         utf_o->u_len = 0;
125         for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
126
127                 /* Expand OSTA compressed Unicode to Unicode */
128                 uint32_t c = ocu[i++];
129                 if (cmp_id == 16)
130                         c = (c << 8) | ocu[i++];
131
132                 /* Compress Unicode to UTF-8 */
133                 if (c < 0x80U)
134                         utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
135                 else if (c < 0x800U) {
136                         utf_o->u_name[utf_o->u_len++] =
137                                                 (uint8_t)(0xc0 | (c >> 6));
138                         utf_o->u_name[utf_o->u_len++] =
139                                                 (uint8_t)(0x80 | (c & 0x3f));
140                 } else {
141                         utf_o->u_name[utf_o->u_len++] =
142                                                 (uint8_t)(0xe0 | (c >> 12));
143                         utf_o->u_name[utf_o->u_len++] =
144                                                 (uint8_t)(0x80 |
145                                                           ((c >> 6) & 0x3f));
146                         utf_o->u_name[utf_o->u_len++] =
147                                                 (uint8_t)(0x80 | (c & 0x3f));
148                 }
149         }
150         utf_o->u_cmpID = 8;
151
152         return utf_o->u_len;
153 }
154
155 /*
156  *
157  * udf_utf8_to_ocu
158  *
159  * PURPOSE
160  *      Convert UTF-8 to the OSTA Compressed Unicode equivalent.
161  *
162  * DESCRIPTION
163  *      This routine is only called by udf_lookup().
164  *
165  * PRE-CONDITIONS
166  *      ocu                     Pointer to OSTA Compressed Unicode output
167  *                              buffer of size UDF_NAME_LEN bytes.
168  *      utf                     Pointer to UTF-8 input buffer.
169  *      utf_len                 Length of UTF-8 input buffer in bytes.
170  *
171  * POST-CONDITIONS
172  *      <return>                Zero on success.
173  *
174  * HISTORY
175  *      November 12, 1997 - Andrew E. Mileski
176  *      Written, tested, and released.
177  */
178 static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
179 {
180         unsigned c, i, max_val, utf_char;
181         int utf_cnt, u_len;
182
183         memset(ocu, 0, sizeof(dstring) * length);
184         ocu[0] = 8;
185         max_val = 0xffU;
186
187 try_again:
188         u_len = 0U;
189         utf_char = 0U;
190         utf_cnt = 0U;
191         for (i = 0U; i < utf->u_len; i++) {
192                 c = (uint8_t)utf->u_name[i];
193
194                 /* Complete a multi-byte UTF-8 character */
195                 if (utf_cnt) {
196                         utf_char = (utf_char << 6) | (c & 0x3fU);
197                         if (--utf_cnt)
198                                 continue;
199                 } else {
200                         /* Check for a multi-byte UTF-8 character */
201                         if (c & 0x80U) {
202                                 /* Start a multi-byte UTF-8 character */
203                                 if ((c & 0xe0U) == 0xc0U) {
204                                         utf_char = c & 0x1fU;
205                                         utf_cnt = 1;
206                                 } else if ((c & 0xf0U) == 0xe0U) {
207                                         utf_char = c & 0x0fU;
208                                         utf_cnt = 2;
209                                 } else if ((c & 0xf8U) == 0xf0U) {
210                                         utf_char = c & 0x07U;
211                                         utf_cnt = 3;
212                                 } else if ((c & 0xfcU) == 0xf8U) {
213                                         utf_char = c & 0x03U;
214                                         utf_cnt = 4;
215                                 } else if ((c & 0xfeU) == 0xfcU) {
216                                         utf_char = c & 0x01U;
217                                         utf_cnt = 5;
218                                 } else {
219                                         goto error_out;
220                                 }
221                                 continue;
222                         } else {
223                                 /* Single byte UTF-8 character (most common) */
224                                 utf_char = c;
225                         }
226                 }
227
228                 /* Choose no compression if necessary */
229                 if (utf_char > max_val) {
230                         if (max_val == 0xffU) {
231                                 max_val = 0xffffU;
232                                 ocu[0] = (uint8_t)0x10U;
233                                 goto try_again;
234                         }
235                         goto error_out;
236                 }
237
238                 if (max_val == 0xffffU)
239                         ocu[++u_len] = (uint8_t)(utf_char >> 8);
240                 ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
241         }
242
243         if (utf_cnt) {
244 error_out:
245                 ocu[++u_len] = '?';
246                 printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
247         }
248
249         ocu[length - 1] = (uint8_t)u_len + 1;
250
251         return u_len + 1;
252 }
253
254 static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
255                         const struct ustr *ocu_i)
256 {
257         const uint8_t *ocu;
258         uint8_t cmp_id, ocu_len;
259         int i, len;
260
261
262         ocu_len = ocu_i->u_len;
263         if (ocu_len == 0) {
264                 memset(utf_o, 0, sizeof(struct ustr));
265                 return 0;
266         }
267
268         cmp_id = ocu_i->u_cmpID;
269         if (cmp_id != 8 && cmp_id != 16) {
270                 memset(utf_o, 0, sizeof(struct ustr));
271                 pr_err("unknown compression code (%d) stri=%s\n",
272                        cmp_id, ocu_i->u_name);
273                 return 0;
274         }
275
276         ocu = ocu_i->u_name;
277         utf_o->u_len = 0;
278         for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
279                 /* Expand OSTA compressed Unicode to Unicode */
280                 uint32_t c = ocu[i++];
281                 if (cmp_id == 16)
282                         c = (c << 8) | ocu[i++];
283
284                 len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
285                                     UDF_NAME_LEN - utf_o->u_len);
286                 /* Valid character? */
287                 if (len >= 0)
288                         utf_o->u_len += len;
289                 else
290                         utf_o->u_name[utf_o->u_len++] = '?';
291         }
292         utf_o->u_cmpID = 8;
293
294         return utf_o->u_len;
295 }
296
297 static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
298                         int length)
299 {
300         int len;
301         unsigned i, max_val;
302         uint16_t uni_char;
303         int u_len;
304
305         memset(ocu, 0, sizeof(dstring) * length);
306         ocu[0] = 8;
307         max_val = 0xffU;
308
309 try_again:
310         u_len = 0U;
311         for (i = 0U; i < uni->u_len; i++) {
312                 len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
313                 if (!len)
314                         continue;
315                 /* Invalid character, deal with it */
316                 if (len < 0) {
317                         len = 1;
318                         uni_char = '?';
319                 }
320
321                 if (uni_char > max_val) {
322                         max_val = 0xffffU;
323                         ocu[0] = (uint8_t)0x10U;
324                         goto try_again;
325                 }
326
327                 if (max_val == 0xffffU)
328                         ocu[++u_len] = (uint8_t)(uni_char >> 8);
329                 ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
330                 i += len - 1;
331         }
332
333         ocu[length - 1] = (uint8_t)u_len + 1;
334         return u_len + 1;
335 }
336
337 int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
338                      uint8_t *dname, int dlen)
339 {
340         struct ustr *filename, *unifilename;
341         int len = 0;
342
343         filename = kmalloc(sizeof(struct ustr), GFP_NOFS);
344         if (!filename)
345                 return 0;
346
347         unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS);
348         if (!unifilename)
349                 goto out1;
350
351         if (udf_build_ustr_exact(unifilename, sname, slen))
352                 goto out2;
353
354         if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
355                 if (!udf_CS0toUTF8(filename, unifilename)) {
356                         udf_debug("Failed in udf_get_filename: sname = %s\n",
357                                   sname);
358                         goto out2;
359                 }
360         } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
361                 if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
362                                   unifilename)) {
363                         udf_debug("Failed in udf_get_filename: sname = %s\n",
364                                   sname);
365                         goto out2;
366                 }
367         } else
368                 goto out2;
369
370         len = udf_translate_to_linux(dname, dlen,
371                                      filename->u_name, filename->u_len,
372                                      unifilename->u_name, unifilename->u_len);
373 out2:
374         kfree(unifilename);
375 out1:
376         kfree(filename);
377         return len;
378 }
379
380 int udf_put_filename(struct super_block *sb, const uint8_t *sname,
381                      uint8_t *dname, int flen)
382 {
383         struct ustr unifilename;
384         int namelen;
385
386         if (!udf_char_to_ustr(&unifilename, sname, flen))
387                 return 0;
388
389         if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
390                 namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN);
391                 if (!namelen)
392                         return 0;
393         } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
394                 namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname,
395                                         &unifilename, UDF_NAME_LEN);
396                 if (!namelen)
397                         return 0;
398         } else
399                 return 0;
400
401         return namelen;
402 }
403
404 #define ILLEGAL_CHAR_MARK       '_'
405 #define EXT_MARK                '.'
406 #define CRC_MARK                '#'
407 #define EXT_SIZE                5
408 /* Number of chars we need to store generated CRC to make filename unique */
409 #define CRC_LEN                 5
410
411 static int udf_translate_to_linux(uint8_t *newName, int newLen,
412                                   uint8_t *udfName, int udfLen,
413                                   uint8_t *fidName, int fidNameLen)
414 {
415         int index, newIndex = 0, needsCRC = 0;
416         int extIndex = 0, newExtIndex = 0, hasExt = 0;
417         unsigned short valueCRC;
418         uint8_t curr;
419         const uint8_t hexChar[] = "0123456789ABCDEF";
420
421         if (udfName[0] == '.' &&
422             (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) {
423                 needsCRC = 1;
424                 newIndex = udfLen;
425                 memcpy(newName, udfName, udfLen);
426         } else {
427                 for (index = 0; index < udfLen; index++) {
428                         curr = udfName[index];
429                         if (curr == '/' || curr == 0) {
430                                 needsCRC = 1;
431                                 curr = ILLEGAL_CHAR_MARK;
432                                 while (index + 1 < udfLen &&
433                                                 (udfName[index + 1] == '/' ||
434                                                  udfName[index + 1] == 0))
435                                         index++;
436                         }
437                         if (curr == EXT_MARK &&
438                                         (udfLen - index - 1) <= EXT_SIZE) {
439                                 if (udfLen == index + 1)
440                                         hasExt = 0;
441                                 else {
442                                         hasExt = 1;
443                                         extIndex = index;
444                                         newExtIndex = newIndex;
445                                 }
446                         }
447                         if (newIndex < newLen)
448                                 newName[newIndex++] = curr;
449                         else
450                                 needsCRC = 1;
451                 }
452         }
453         if (needsCRC) {
454                 uint8_t ext[EXT_SIZE];
455                 int localExtIndex = 0;
456
457                 if (hasExt) {
458                         int maxFilenameLen;
459                         for (index = 0;
460                              index < EXT_SIZE && extIndex + index + 1 < udfLen;
461                              index++) {
462                                 curr = udfName[extIndex + index + 1];
463
464                                 if (curr == '/' || curr == 0) {
465                                         needsCRC = 1;
466                                         curr = ILLEGAL_CHAR_MARK;
467                                         while (extIndex + index + 2 < udfLen &&
468                                               (index + 1 < EXT_SIZE &&
469                                                 (udfName[extIndex + index + 2] == '/' ||
470                                                  udfName[extIndex + index + 2] == 0)))
471                                                 index++;
472                                 }
473                                 ext[localExtIndex++] = curr;
474                         }
475                         maxFilenameLen = newLen - CRC_LEN - localExtIndex;
476                         if (newIndex > maxFilenameLen)
477                                 newIndex = maxFilenameLen;
478                         else
479                                 newIndex = newExtIndex;
480                 } else if (newIndex > newLen - CRC_LEN)
481                         newIndex = newLen - CRC_LEN;
482                 newName[newIndex++] = CRC_MARK;
483                 valueCRC = crc_itu_t(0, fidName, fidNameLen);
484                 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
485                 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
486                 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
487                 newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
488
489                 if (hasExt) {
490                         newName[newIndex++] = EXT_MARK;
491                         for (index = 0; index < localExtIndex; index++)
492                                 newName[newIndex++] = ext[index];
493                 }
494         }
495
496         return newIndex;
497 }