lib/string_helpers: introduce generic string_unescape
authorAndy Shevchenko <andriy.shevchenko@linux.intel.com>
Tue, 30 Apr 2013 22:27:30 +0000 (15:27 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 May 2013 00:04:03 +0000 (17:04 -0700)
There are several places in kernel where modules unescapes input to convert
C-Style Escape Sequences into byte codes.

The patch provides generic implementation of such approach. Test cases are
also included into the patch.

[akpm@linux-foundation.org: clarify comment]
[akpm@linux-foundation.org: export get_random_int() to modules]
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Samuel Thibault <samuel.thibault@ens-lyon.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: William Hubbs <w.d.hubbs@gmail.com>
Cc: Chris Brannon <chris@the-brannons.com>
Cc: Kirk Reiser <kirk@braille.uwo.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
drivers/char/random.c
include/linux/string_helpers.h
lib/Kconfig.debug
lib/Makefile
lib/string_helpers.c
lib/test-string_helpers.c [new file with mode: 0644]

index 32a6c57..cd9a621 100644 (file)
@@ -1485,6 +1485,7 @@ unsigned int get_random_int(void)
 
        return ret;
 }
+EXPORT_SYMBOL(get_random_int);
 
 /*
  * randomize_range() returns a start address such that
index a3eb2f6..3eeee96 100644 (file)
@@ -13,4 +13,62 @@ enum string_size_units {
 int string_get_size(u64 size, enum string_size_units units,
                    char *buf, int len);
 
+#define UNESCAPE_SPACE         0x01
+#define UNESCAPE_OCTAL         0x02
+#define UNESCAPE_HEX           0x04
+#define UNESCAPE_SPECIAL       0x08
+#define UNESCAPE_ANY           \
+       (UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL)
+
+/**
+ * string_unescape - unquote characters in the given string
+ * @src:       source buffer (escaped)
+ * @dst:       destination buffer (unescaped)
+ * @size:      size of the destination buffer (0 to unlimit)
+ * @flags:     combination of the flags (bitwise OR):
+ *     %UNESCAPE_SPACE:
+ *             '\f' - form feed
+ *             '\n' - new line
+ *             '\r' - carriage return
+ *             '\t' - horizontal tab
+ *             '\v' - vertical tab
+ *     %UNESCAPE_OCTAL:
+ *             '\NNN' - byte with octal value NNN (1 to 3 digits)
+ *     %UNESCAPE_HEX:
+ *             '\xHH' - byte with hexadecimal value HH (1 to 2 digits)
+ *     %UNESCAPE_SPECIAL:
+ *             '\"' - double quote
+ *             '\\' - backslash
+ *             '\a' - alert (BEL)
+ *             '\e' - escape
+ *     %UNESCAPE_ANY:
+ *             all previous together
+ *
+ * Returns amount of characters processed to the destination buffer excluding
+ * trailing '\0'.
+ *
+ * Because the size of the output will be the same as or less than the size of
+ * the input, the transformation may be performed in place.
+ *
+ * Caller must provide valid source and destination pointers. Be aware that
+ * destination buffer will always be NULL-terminated. Source string must be
+ * NULL-terminated as well.
+ */
+int string_unescape(char *src, char *dst, size_t size, unsigned int flags);
+
+static inline int string_unescape_inplace(char *buf, unsigned int flags)
+{
+       return string_unescape(buf, buf, 0, flags);
+}
+
+static inline int string_unescape_any(char *src, char *dst, size_t size)
+{
+       return string_unescape(src, dst, size, UNESCAPE_ANY);
+}
+
+static inline int string_unescape_any_inplace(char *buf)
+{
+       return string_unescape_any(buf, buf, 0);
+}
+
 #endif
index 28be08c..77ebaa3 100644 (file)
@@ -1463,5 +1463,8 @@ source "lib/Kconfig.kgdb"
 
 source "lib/Kconfig.kmemcheck"
 
+config TEST_STRING_HELPERS
+       tristate "Test functions located in the string_helpers module at runtime"
+
 config TEST_KSTRTOX
        tristate "Test kstrto*() family of functions at runtime"
index 6e2cc56..23c9a0f 100644 (file)
@@ -22,8 +22,10 @@ lib-y        += kobject.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
         bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-        string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
+        gcd.o lcm.o list_sort.o uuid.o flex_array.o \
         bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o
+obj-y += string_helpers.o
+obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
index 1cffc22..ed5c145 100644 (file)
@@ -2,10 +2,12 @@
  * Helpers for formatting and printing strings
  *
  * Copyright 31 August 2008 James Bottomley
+ * Copyright (C) 2013, Intel Corporation
  */
 #include <linux/kernel.h>
 #include <linux/math64.h>
 #include <linux/export.h>
+#include <linux/ctype.h>
 #include <linux/string_helpers.h>
 
 /**
@@ -66,3 +68,134 @@ int string_get_size(u64 size, const enum string_size_units units,
        return 0;
 }
 EXPORT_SYMBOL(string_get_size);
+
+static bool unescape_space(char **src, char **dst)
+{
+       char *p = *dst, *q = *src;
+
+       switch (*q) {
+       case 'n':
+               *p = '\n';
+               break;
+       case 'r':
+               *p = '\r';
+               break;
+       case 't':
+               *p = '\t';
+               break;
+       case 'v':
+               *p = '\v';
+               break;
+       case 'f':
+               *p = '\f';
+               break;
+       default:
+               return false;
+       }
+       *dst += 1;
+       *src += 1;
+       return true;
+}
+
+static bool unescape_octal(char **src, char **dst)
+{
+       char *p = *dst, *q = *src;
+       u8 num;
+
+       if (isodigit(*q) == 0)
+               return false;
+
+       num = (*q++) & 7;
+       while (num < 32 && isodigit(*q) && (q - *src < 3)) {
+               num <<= 3;
+               num += (*q++) & 7;
+       }
+       *p = num;
+       *dst += 1;
+       *src = q;
+       return true;
+}
+
+static bool unescape_hex(char **src, char **dst)
+{
+       char *p = *dst, *q = *src;
+       int digit;
+       u8 num;
+
+       if (*q++ != 'x')
+               return false;
+
+       num = digit = hex_to_bin(*q++);
+       if (digit < 0)
+               return false;
+
+       digit = hex_to_bin(*q);
+       if (digit >= 0) {
+               q++;
+               num = (num << 4) | digit;
+       }
+       *p = num;
+       *dst += 1;
+       *src = q;
+       return true;
+}
+
+static bool unescape_special(char **src, char **dst)
+{
+       char *p = *dst, *q = *src;
+
+       switch (*q) {
+       case '\"':
+               *p = '\"';
+               break;
+       case '\\':
+               *p = '\\';
+               break;
+       case 'a':
+               *p = '\a';
+               break;
+       case 'e':
+               *p = '\e';
+               break;
+       default:
+               return false;
+       }
+       *dst += 1;
+       *src += 1;
+       return true;
+}
+
+int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
+{
+       char *out = dst;
+
+       while (*src && --size) {
+               if (src[0] == '\\' && src[1] != '\0' && size > 1) {
+                       src++;
+                       size--;
+
+                       if (flags & UNESCAPE_SPACE &&
+                                       unescape_space(&src, &out))
+                               continue;
+
+                       if (flags & UNESCAPE_OCTAL &&
+                                       unescape_octal(&src, &out))
+                               continue;
+
+                       if (flags & UNESCAPE_HEX &&
+                                       unescape_hex(&src, &out))
+                               continue;
+
+                       if (flags & UNESCAPE_SPECIAL &&
+                                       unescape_special(&src, &out))
+                               continue;
+
+                       *out++ = '\\';
+               }
+               *out++ = *src++;
+       }
+       *out = '\0';
+
+       return out - dst;
+}
+EXPORT_SYMBOL(string_unescape);
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
new file mode 100644 (file)
index 0000000..6ac48de
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * Test cases for lib/string_helpers.c module.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+
+struct test_string {
+       const char *in;
+       const char *out;
+       unsigned int flags;
+};
+
+static const struct test_string strings[] __initconst = {
+       {
+               .in = "\\f\\ \\n\\r\\t\\v",
+               .out = "\f\\ \n\r\t\v",
+               .flags = UNESCAPE_SPACE,
+       },
+       {
+               .in = "\\40\\1\\387\\0064\\05\\040\\8a\\110\\777",
+               .out = " \001\00387\0064\005 \\8aH?7",
+               .flags = UNESCAPE_OCTAL,
+       },
+       {
+               .in = "\\xv\\xa\\x2c\\xD\\x6f2",
+               .out = "\\xv\n,\ro2",
+               .flags = UNESCAPE_HEX,
+       },
+       {
+               .in = "\\h\\\\\\\"\\a\\e\\",
+               .out = "\\h\\\"\a\e\\",
+               .flags = UNESCAPE_SPECIAL,
+       },
+};
+
+static void __init test_string_unescape(unsigned int flags, bool inplace)
+{
+       char in[256];
+       char out_test[256];
+       char out_real[256];
+       int i, p = 0, q_test = 0, q_real = sizeof(out_real);
+
+       for (i = 0; i < ARRAY_SIZE(strings); i++) {
+               const char *s = strings[i].in;
+               int len = strlen(strings[i].in);
+
+               /* Copy string to in buffer */
+               memcpy(&in[p], s, len);
+               p += len;
+
+               /* Copy expected result for given flags */
+               if (flags & strings[i].flags) {
+                       s = strings[i].out;
+                       len = strlen(strings[i].out);
+               }
+               memcpy(&out_test[q_test], s, len);
+               q_test += len;
+       }
+       in[p++] = '\0';
+
+       /* Call string_unescape and compare result */
+       if (inplace) {
+               memcpy(out_real, in, p);
+               if (flags == UNESCAPE_ANY)
+                       q_real = string_unescape_any_inplace(out_real);
+               else
+                       q_real = string_unescape_inplace(out_real, flags);
+       } else if (flags == UNESCAPE_ANY) {
+               q_real = string_unescape_any(in, out_real, q_real);
+       } else {
+               q_real = string_unescape(in, out_real, q_real, flags);
+       }
+
+       if (q_real != q_test || memcmp(out_test, out_real, q_test)) {
+               pr_warn("Test failed: flags = %u\n", flags);
+               print_hex_dump(KERN_WARNING, "Input: ",
+                              DUMP_PREFIX_NONE, 16, 1, in, p - 1, true);
+               print_hex_dump(KERN_WARNING, "Expected: ",
+                              DUMP_PREFIX_NONE, 16, 1, out_test, q_test, true);
+               print_hex_dump(KERN_WARNING, "Got: ",
+                              DUMP_PREFIX_NONE, 16, 1, out_real, q_real, true);
+       }
+}
+
+static int __init test_string_helpers_init(void)
+{
+       unsigned int i;
+
+       pr_info("Running tests...\n");
+       for (i = 0; i < UNESCAPE_ANY + 1; i++)
+               test_string_unescape(i, false);
+       test_string_unescape(get_random_int() % (UNESCAPE_ANY + 1), true);
+
+       return -EINVAL;
+}
+module_init(test_string_helpers_init);
+MODULE_LICENSE("Dual BSD/GPL");