printk: convert byte-buffer to variable-length record buffer
[pandora-kernel.git] / kernel / printk.c
index b663c2c..7435732 100644 (file)
@@ -54,8 +54,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
 {
 }
 
-#define __LOG_BUF_LEN  (1 << CONFIG_LOG_BUF_SHIFT)
-
 /* printk's without a loglevel use this.. */
 #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
 
@@ -98,24 +96,6 @@ EXPORT_SYMBOL_GPL(console_drivers);
  */
 static int console_locked, console_suspended;
 
-/*
- * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
- * It is also used in interesting ways to provide interlocking in
- * console_unlock();.
- */
-static DEFINE_RAW_SPINLOCK(logbuf_lock);
-
-#define LOG_BUF_MASK (log_buf_len-1)
-#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
-
-/*
- * The indices into log_buf are not constrained to log_buf_len - they
- * must be masked before subscripting
- */
-static unsigned log_start;     /* Index into log_buf: next char to be read by syslog() */
-static unsigned con_start;     /* Index into log_buf: next char to be sent to consoles */
-static unsigned log_end;       /* Index into log_buf: most-recently-written-char + 1 */
-
 /*
  * If exclusive_console is non-NULL then only this console is to be printed to.
  */
@@ -146,12 +126,176 @@ EXPORT_SYMBOL(console_set_on_cmdline);
 static int console_may_schedule;
 
 #ifdef CONFIG_PRINTK
+/*
+ * The printk log buffer consists of a chain of concatenated variable
+ * length records. Every record starts with a record header, containing
+ * the overall length of the record.
+ *
+ * The heads to the first and last entry in the buffer, as well as the
+ * sequence numbers of these both entries are maintained when messages
+ * are stored..
+ *
+ * If the heads indicate available messages, the length in the header
+ * tells the start next message. A length == 0 for the next message
+ * indicates a wrap-around to the beginning of the buffer.
+ *
+ * Every record carries the monotonic timestamp in microseconds, as well as
+ * the standard userspace syslog level and syslog facility. The usual
+ * kernel messages use LOG_KERN; userspace-injected messages always carry
+ * a matching syslog facility, by default LOG_USER. The origin of every
+ * message can be reliably determined that way.
+ *
+ * The human readable log message directly follows the message header. The
+ * length of the message text is stored in the header, the stored message
+ * is not terminated.
+ *
+ */
+
+struct log {
+       u64 ts_nsec;            /* timestamp in nanoseconds */
+       u16 len;                /* length of entire record */
+       u16 text_len;           /* length of text buffer */
+       u16 dict_len;           /* length of dictionary buffer */
+       u16 level;              /* syslog level + facility */
+};
+
+/*
+ * The logbuf_lock protects kmsg buffer, indices, counters. It is also
+ * used in interesting ways to provide interlocking in console_unlock();
+ */
+static DEFINE_RAW_SPINLOCK(logbuf_lock);
 
+/* cpu currently holding logbuf_lock */
+static volatile unsigned int logbuf_cpu = UINT_MAX;
+
+#define LOG_LINE_MAX 1024
+
+/* record buffer */
+#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
 static char __log_buf[__LOG_BUF_LEN];
 static char *log_buf = __log_buf;
-static int log_buf_len = __LOG_BUF_LEN;
-static unsigned logged_chars; /* Number of chars produced since last read+clear operation */
-static int saved_console_loglevel = -1;
+static u32 log_buf_len = __LOG_BUF_LEN;
+
+/* index and sequence number of the first record stored in the buffer */
+static u64 log_first_seq;
+static u32 log_first_idx;
+
+/* index and sequence number of the next record to store in the buffer */
+static u64 log_next_seq;
+static u32 log_next_idx;
+
+/* the next printk record to read after the last 'clear' command */
+static u64 clear_seq;
+static u32 clear_idx;
+
+/* the next printk record to read by syslog(READ) or /proc/kmsg */
+static u64 syslog_seq;
+static u32 syslog_idx;
+
+/* human readable text of the record */
+static char *log_text(const struct log *msg)
+{
+       return (char *)msg + sizeof(struct log);
+}
+
+/* optional key/value pair dictionary attached to the record */
+static char *log_dict(const struct log *msg)
+{
+       return (char *)msg + sizeof(struct log) + msg->text_len;
+}
+
+/* get record by index; idx must point to valid msg */
+static struct log *log_from_idx(u32 idx)
+{
+       struct log *msg = (struct log *)(log_buf + idx);
+
+       /*
+        * A length == 0 record is the end of buffer marker. Wrap around and
+        * read the message at the start of the buffer.
+        */
+       if (!msg->len)
+               return (struct log *)log_buf;
+       return msg;
+}
+
+/* get next record; idx must point to valid msg */
+static u32 log_next(u32 idx)
+{
+       struct log *msg = (struct log *)(log_buf + idx);
+
+       /* length == 0 indicates the end of the buffer; wrap */
+       /*
+        * A length == 0 record is the end of buffer marker. Wrap around and
+        * read the message at the start of the buffer as *this* one, and
+        * return the one after that.
+        */
+       if (!msg->len) {
+               msg = (struct log *)log_buf;
+               return msg->len;
+       }
+       return idx + msg->len;
+}
+
+#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#define LOG_ALIGN 4
+#else
+#define LOG_ALIGN 8
+#endif
+
+/* insert record into the buffer, discard old ones, update heads */
+static void log_store(int facility, int level,
+                     const char *dict, u16 dict_len,
+                     const char *text, u16 text_len)
+{
+       struct log *msg;
+       u32 size, pad_len;
+
+       /* number of '\0' padding bytes to next message */
+       size = sizeof(struct log) + text_len + dict_len;
+       pad_len = (-size) & (LOG_ALIGN - 1);
+       size += pad_len;
+
+       while (log_first_seq < log_next_seq) {
+               u32 free;
+
+               if (log_next_idx > log_first_idx)
+                       free = max(log_buf_len - log_next_idx, log_first_idx);
+               else
+                       free = log_first_idx - log_next_idx;
+
+               if (free > size + sizeof(struct log))
+                       break;
+
+               /* drop old messages until we have enough contiuous space */
+               log_first_idx = log_next(log_first_idx);
+               log_first_seq++;
+       }
+
+       if (log_next_idx + size + sizeof(struct log) >= log_buf_len) {
+               /*
+                * This message + an additional empty header does not fit
+                * at the end of the buffer. Add an empty header with len == 0
+                * to signify a wrap around.
+                */
+               memset(log_buf + log_next_idx, 0, sizeof(struct log));
+               log_next_idx = 0;
+       }
+
+       /* fill message */
+       msg = (struct log *)(log_buf + log_next_idx);
+       memcpy(log_text(msg), text, text_len);
+       msg->text_len = text_len;
+       memcpy(log_dict(msg), dict, dict_len);
+       msg->dict_len = dict_len;
+       msg->level = (facility << 3) | (level & 7);
+       msg->ts_nsec = local_clock();
+       memset(log_dict(msg) + dict_len, 0, pad_len);
+       msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
+
+       /* insert message */
+       log_next_idx += msg->len;
+       log_next_seq++;
+}
 
 #ifdef CONFIG_KEXEC
 /*
@@ -165,9 +309,9 @@ static int saved_console_loglevel = -1;
 void log_buf_kexec_setup(void)
 {
        VMCOREINFO_SYMBOL(log_buf);
-       VMCOREINFO_SYMBOL(log_end);
        VMCOREINFO_SYMBOL(log_buf_len);
-       VMCOREINFO_SYMBOL(logged_chars);
+       VMCOREINFO_SYMBOL(log_first_idx);
+       VMCOREINFO_SYMBOL(log_next_idx);
 }
 #endif
 
@@ -191,7 +335,6 @@ early_param("log_buf_len", log_buf_len_setup);
 void __init setup_log_buf(int early)
 {
        unsigned long flags;
-       unsigned start, dest_idx, offset;
        char *new_log_buf;
        int free;
 
@@ -219,20 +362,8 @@ void __init setup_log_buf(int early)
        log_buf_len = new_log_buf_len;
        log_buf = new_log_buf;
        new_log_buf_len = 0;
-       free = __LOG_BUF_LEN - log_end;
-
-       offset = start = min(con_start, log_start);
-       dest_idx = 0;
-       while (start != log_end) {
-               unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1);
-
-               log_buf[dest_idx] = __log_buf[log_idx_mask];
-               start++;
-               dest_idx++;
-       }
-       log_start -= offset;
-       con_start -= offset;
-       log_end -= offset;
+       free = __LOG_BUF_LEN - log_next_idx;
+       memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
        raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 
        pr_info("log_buf_len: %d\n", log_buf_len);
@@ -332,11 +463,165 @@ static int check_syslog_permissions(int type, bool from_file)
        return 0;
 }
 
+#if defined(CONFIG_PRINTK_TIME)
+static bool printk_time = 1;
+#else
+static bool printk_time;
+#endif
+module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
+
+static int syslog_print_line(u32 idx, char *text, size_t size)
+{
+       struct log *msg;
+       size_t len;
+
+       msg = log_from_idx(idx);
+       if (!text) {
+               /* calculate length only */
+               len = 3;
+
+               if (msg->level > 9)
+                       len++;
+               if (msg->level > 99)
+                       len++;
+
+               if (printk_time)
+                       len += 15;
+
+               len += msg->text_len;
+               len++;
+               return len;
+       }
+
+       len = sprintf(text, "<%u>", msg->level);
+
+       if (printk_time) {
+               unsigned long long t = msg->ts_nsec;
+               unsigned long rem_ns = do_div(t, 1000000000);
+
+               len += sprintf(text + len, "[%5lu.%06lu] ",
+                                  (unsigned long) t, rem_ns / 1000);
+       }
+
+       if (len + msg->text_len > size)
+               return -EINVAL;
+       memcpy(text + len, log_text(msg), msg->text_len);
+       len += msg->text_len;
+       text[len++] = '\n';
+       return len;
+}
+
+static int syslog_print(char __user *buf, int size)
+{
+       char *text;
+       int len;
+
+       text = kmalloc(LOG_LINE_MAX, GFP_KERNEL);
+       if (!text)
+               return -ENOMEM;
+
+       raw_spin_lock_irq(&logbuf_lock);
+       if (syslog_seq < log_first_seq) {
+               /* messages are gone, move to first one */
+               syslog_seq = log_first_seq;
+               syslog_idx = log_first_idx;
+       }
+       len = syslog_print_line(syslog_idx, text, LOG_LINE_MAX);
+       syslog_idx = log_next(syslog_idx);
+       syslog_seq++;
+       raw_spin_unlock_irq(&logbuf_lock);
+
+       if (len > 0 && copy_to_user(buf, text, len))
+               len = -EFAULT;
+
+       kfree(text);
+       return len;
+}
+
+static int syslog_print_all(char __user *buf, int size, bool clear)
+{
+       char *text;
+       int len = 0;
+
+       text = kmalloc(LOG_LINE_MAX, GFP_KERNEL);
+       if (!text)
+               return -ENOMEM;
+
+       raw_spin_lock_irq(&logbuf_lock);
+       if (buf) {
+               u64 next_seq;
+               u64 seq;
+               u32 idx;
+
+               if (clear_seq < log_first_seq) {
+                       /* messages are gone, move to first available one */
+                       clear_seq = log_first_seq;
+                       clear_idx = log_first_idx;
+               }
+
+               /*
+                * Find first record that fits, including all following records,
+                * into the user-provided buffer for this dump.
+               */
+               seq = clear_seq;
+               idx = clear_idx;
+               while (seq < log_next_seq) {
+                       len += syslog_print_line(idx, NULL, 0);
+                       idx = log_next(idx);
+                       seq++;
+               }
+               seq = clear_seq;
+               idx = clear_idx;
+               while (len > size && seq < log_next_seq) {
+                       len -= syslog_print_line(idx, NULL, 0);
+                       idx = log_next(idx);
+                       seq++;
+               }
+
+               /* last message in this dump */
+               next_seq = log_next_seq;
+
+               len = 0;
+               while (len >= 0 && seq < next_seq) {
+                       int textlen;
+
+                       textlen = syslog_print_line(idx, text, LOG_LINE_MAX);
+                       if (textlen < 0) {
+                               len = textlen;
+                               break;
+                       }
+                       idx = log_next(idx);
+                       seq++;
+
+                       raw_spin_unlock_irq(&logbuf_lock);
+                       if (copy_to_user(buf + len, text, textlen))
+                               len = -EFAULT;
+                       else
+                               len += textlen;
+                       raw_spin_lock_irq(&logbuf_lock);
+
+                       if (seq < log_first_seq) {
+                               /* messages are gone, move to next one */
+                               seq = log_first_seq;
+                               idx = log_first_idx;
+                       }
+               }
+       }
+
+       if (clear) {
+               clear_seq = log_next_seq;
+               clear_idx = log_next_idx;
+       }
+       raw_spin_unlock_irq(&logbuf_lock);
+
+       kfree(text);
+       return len;
+}
+
 int do_syslog(int type, char __user *buf, int len, bool from_file)
 {
-       unsigned i, j, limit, count;
-       int do_clear = 0;
-       char c;
+       bool clear = false;
+       static int saved_console_loglevel = -1;
        int error;
 
        error = check_syslog_permissions(type, from_file);
@@ -364,28 +649,14 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                        goto out;
                }
                error = wait_event_interruptible(log_wait,
-                                                       (log_start - log_end));
+                                                syslog_seq != log_next_seq);
                if (error)
                        goto out;
-               i = 0;
-               raw_spin_lock_irq(&logbuf_lock);
-               while (!error && (log_start != log_end) && i < len) {
-                       c = LOG_BUF(log_start);
-                       log_start++;
-                       raw_spin_unlock_irq(&logbuf_lock);
-                       error = __put_user(c,buf);
-                       buf++;
-                       i++;
-                       cond_resched();
-                       raw_spin_lock_irq(&logbuf_lock);
-               }
-               raw_spin_unlock_irq(&logbuf_lock);
-               if (!error)
-                       error = i;
+               error = syslog_print(buf, len);
                break;
        /* Read/clear last kernel messages */
        case SYSLOG_ACTION_READ_CLEAR:
-               do_clear = 1;
+               clear = true;
                /* FALL THRU */
        /* Read last kernel messages */
        case SYSLOG_ACTION_READ_ALL:
@@ -399,52 +670,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                        error = -EFAULT;
                        goto out;
                }
-               count = len;
-               if (count > log_buf_len)
-                       count = log_buf_len;
-               raw_spin_lock_irq(&logbuf_lock);
-               if (count > logged_chars)
-                       count = logged_chars;
-               if (do_clear)
-                       logged_chars = 0;
-               limit = log_end;
-               /*
-                * __put_user() could sleep, and while we sleep
-                * printk() could overwrite the messages
-                * we try to copy to user space. Therefore
-                * the messages are copied in reverse. <manfreds>
-                */
-               for (i = 0; i < count && !error; i++) {
-                       j = limit-1-i;
-                       if (j + log_buf_len < log_end)
-                               break;
-                       c = LOG_BUF(j);
-                       raw_spin_unlock_irq(&logbuf_lock);
-                       error = __put_user(c,&buf[count-1-i]);
-                       cond_resched();
-                       raw_spin_lock_irq(&logbuf_lock);
-               }
-               raw_spin_unlock_irq(&logbuf_lock);
-               if (error)
-                       break;
-               error = i;
-               if (i != count) {
-                       int offset = count-error;
-                       /* buffer overflow during copy, correct user buffer. */
-                       for (i = 0; i < error; i++) {
-                               if (__get_user(c,&buf[i+offset]) ||
-                                   __put_user(c,&buf[i])) {
-                                       error = -EFAULT;
-                                       break;
-                               }
-                               cond_resched();
-                       }
-               }
+               error = syslog_print_all(buf, len, clear);
                break;
        /* Clear ring buffer */
        case SYSLOG_ACTION_CLEAR:
-               logged_chars = 0;
-               break;
+               syslog_print_all(NULL, 0, true);
        /* Disable logging to console */
        case SYSLOG_ACTION_CONSOLE_OFF:
                if (saved_console_loglevel == -1)
@@ -472,7 +702,33 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                break;
        /* Number of chars in the log buffer */
        case SYSLOG_ACTION_SIZE_UNREAD:
-               error = log_end - log_start;
+               raw_spin_lock_irq(&logbuf_lock);
+               if (syslog_seq < log_first_seq) {
+                       /* messages are gone, move to first one */
+                       syslog_seq = log_first_seq;
+                       syslog_idx = log_first_idx;
+               }
+               if (from_file) {
+                       /*
+                        * Short-cut for poll(/"proc/kmsg") which simply checks
+                        * for pending data, not the size; return the count of
+                        * records, not the length.
+                        */
+                       error = log_next_idx - syslog_idx;
+               } else {
+                       u64 seq;
+                       u32 idx;
+
+                       error = 0;
+                       seq = syslog_seq;
+                       idx = syslog_idx;
+                       while (seq < log_next_seq) {
+                               error += syslog_print_line(idx, NULL, 0);
+                               idx = log_next(idx);
+                               seq++;
+                       }
+               }
+               raw_spin_unlock_irq(&logbuf_lock);
                break;
        /* Size of the log buffer */
        case SYSLOG_ACTION_SIZE_BUFFER:
@@ -501,29 +757,11 @@ void kdb_syslog_data(char *syslog_data[4])
 {
        syslog_data[0] = log_buf;
        syslog_data[1] = log_buf + log_buf_len;
-       syslog_data[2] = log_buf + log_end -
-               (logged_chars < log_buf_len ? logged_chars : log_buf_len);
-       syslog_data[3] = log_buf + log_end;
+       syslog_data[2] = log_buf + log_first_idx;
+       syslog_data[3] = log_buf + log_next_idx;
 }
 #endif /* CONFIG_KGDB_KDB */
 
-/*
- * Call the console drivers on a range of log_buf
- */
-static void __call_console_drivers(unsigned start, unsigned end)
-{
-       struct console *con;
-
-       for_each_console(con) {
-               if (exclusive_console && con != exclusive_console)
-                       continue;
-               if ((con->flags & CON_ENABLED) && con->write &&
-                               (cpu_online(smp_processor_id()) ||
-                               (con->flags & CON_ANYTIME)))
-                       con->write(con, &LOG_BUF(start), end - start);
-       }
-}
-
 static bool __read_mostly ignore_loglevel;
 
 static int __init ignore_loglevel_setup(char *str)
@@ -539,143 +777,34 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
        "print all kernel messages to the console.");
 
-/*
- * Write out chars from start to end - 1 inclusive
- */
-static void _call_console_drivers(unsigned start,
-                               unsigned end, int msg_log_level)
-{
-       trace_console(&LOG_BUF(0), start, end, log_buf_len);
-
-       if ((msg_log_level < console_loglevel || ignore_loglevel) &&
-                       console_drivers && start != end) {
-               if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
-                       /* wrapped write */
-                       __call_console_drivers(start & LOG_BUF_MASK,
-                                               log_buf_len);
-                       __call_console_drivers(0, end & LOG_BUF_MASK);
-               } else {
-                       __call_console_drivers(start, end);
-               }
-       }
-}
-
-/*
- * Parse the syslog header <[0-9]*>. The decimal value represents 32bit, the
- * lower 3 bit are the log level, the rest are the log facility. In case
- * userspace passes usual userspace syslog messages to /dev/kmsg or
- * /dev/ttyprintk, the log prefix might contain the facility. Printk needs
- * to extract the correct log level for in-kernel processing, and not mangle
- * the original value.
- *
- * If a prefix is found, the length of the prefix is returned. If 'level' is
- * passed, it will be filled in with the log level without a possible facility
- * value. If 'special' is passed, the special printk prefix chars are accepted
- * and returned. If no valid header is found, 0 is returned and the passed
- * variables are not touched.
- */
-static size_t log_prefix(const char *p, unsigned int *level, char *special)
-{
-       unsigned int lev = 0;
-       char sp = '\0';
-       size_t len;
-
-       if (p[0] != '<' || !p[1])
-               return 0;
-       if (p[2] == '>') {
-               /* usual single digit level number or special char */
-               switch (p[1]) {
-               case '0' ... '7':
-                       lev = p[1] - '0';
-                       break;
-               case 'c': /* KERN_CONT */
-               case 'd': /* KERN_DEFAULT */
-                       sp = p[1];
-                       break;
-               default:
-                       return 0;
-               }
-               len = 3;
-       } else {
-               /* multi digit including the level and facility number */
-               char *endp = NULL;
-
-               lev = (simple_strtoul(&p[1], &endp, 10) & 7);
-               if (endp == NULL || endp[0] != '>')
-                       return 0;
-               len = (endp + 1) - p;
-       }
-
-       /* do not accept special char if not asked for */
-       if (sp && !special)
-               return 0;
-
-       if (special) {
-               *special = sp;
-               /* return special char, do not touch level */
-               if (sp)
-                       return len;
-       }
-
-       if (level)
-               *level = lev;
-       return len;
-}
-
 /*
  * Call the console drivers, asking them to write out
  * log_buf[start] to log_buf[end - 1].
  * The console_lock must be held.
  */
-static void call_console_drivers(unsigned start, unsigned end)
+static void call_console_drivers(int level, const char *text, size_t len)
 {
-       unsigned cur_index, start_print;
-       static int msg_level = -1;
-
-       BUG_ON(((int)(start - end)) > 0);
-
-       cur_index = start;
-       start_print = start;
-       while (cur_index != end) {
-               if (msg_level < 0 && ((end - cur_index) > 2)) {
-                       /* strip log prefix */
-                       cur_index += log_prefix(&LOG_BUF(cur_index), &msg_level, NULL);
-                       start_print = cur_index;
-               }
-               while (cur_index != end) {
-                       char c = LOG_BUF(cur_index);
-
-                       cur_index++;
-                       if (c == '\n') {
-                               if (msg_level < 0) {
-                                       /*
-                                        * printk() has already given us loglevel tags in
-                                        * the buffer.  This code is here in case the
-                                        * log buffer has wrapped right round and scribbled
-                                        * on those tags
-                                        */
-                                       msg_level = default_message_loglevel;
-                               }
-                               _call_console_drivers(start_print, cur_index, msg_level);
-                               msg_level = -1;
-                               start_print = cur_index;
-                               break;
-                       }
-               }
-       }
-       _call_console_drivers(start_print, end, msg_level);
-}
+       struct console *con;
 
-static void emit_log_char(char c)
-{
-       LOG_BUF(log_end) = c;
-       log_end++;
-       if (log_end - log_start > log_buf_len)
-               log_start = log_end - log_buf_len;
-       if (log_end - con_start > log_buf_len)
-               con_start = log_end - log_buf_len;
-       if (logged_chars < log_buf_len)
-               logged_chars++;
+       trace_console(text, 0, len, len);
+
+       if (level >= console_loglevel && !ignore_loglevel)
+               return;
+       if (!console_drivers)
+               return;
+
+       for_each_console(con) {
+               if (exclusive_console && con != exclusive_console)
+                       continue;
+               if (!(con->flags & CON_ENABLED))
+                       continue;
+               if (!con->write)
+                       continue;
+               if (!cpu_online(smp_processor_id()) &&
+                   !(con->flags & CON_ANYTIME))
+                       continue;
+               con->write(con, text, len);
+       }
 }
 
 /*
@@ -700,16 +829,6 @@ static void zap_locks(void)
        sema_init(&console_sem, 1);
 }
 
-#if defined(CONFIG_PRINTK_TIME)
-static bool printk_time = 1;
-#else
-static bool printk_time = 0;
-#endif
-module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
-
-static bool always_kmsg_dump;
-module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
-
 /* Check if we have any console registered that can be called early in boot. */
 static int have_callable_console(void)
 {
@@ -722,51 +841,6 @@ static int have_callable_console(void)
        return 0;
 }
 
-/**
- * printk - print a kernel message
- * @fmt: format string
- *
- * This is printk().  It can be called from any context.  We want it to work.
- *
- * We try to grab the console_lock.  If we succeed, it's easy - we log the output and
- * call the console drivers.  If we fail to get the semaphore we place the output
- * into the log buffer and return.  The current holder of the console_sem will
- * notice the new output in console_unlock(); and will send it to the
- * consoles before releasing the lock.
- *
- * One effect of this deferred printing is that code which calls printk() and
- * then changes console_loglevel may break. This is because console_loglevel
- * is inspected when the actual printing occurs.
- *
- * See also:
- * printf(3)
- *
- * See the vsnprintf() documentation for format string extensions over C99.
- */
-
-asmlinkage int printk(const char *fmt, ...)
-{
-       va_list args;
-       int r;
-
-#ifdef CONFIG_KGDB_KDB
-       if (unlikely(kdb_trap_printk)) {
-               va_start(args, fmt);
-               r = vkdb_printf(fmt, args);
-               va_end(args);
-               return r;
-       }
-#endif
-       va_start(args, fmt);
-       r = vprintk(fmt, args);
-       va_end(args);
-
-       return r;
-}
-
-/* cpu currently holding logbuf_lock */
-static volatile unsigned int printk_cpu = UINT_MAX;
-
 /*
  * Can we actually use the console at this time on this cpu?
  *
@@ -810,17 +884,12 @@ static int console_trylock_for_printk(unsigned int cpu)
                        retval = 0;
                }
        }
-       printk_cpu = UINT_MAX;
+       logbuf_cpu = UINT_MAX;
        if (wake)
                up(&console_sem);
        raw_spin_unlock(&logbuf_lock);
        return retval;
 }
-static const char recursion_bug_msg [] =
-               KERN_CRIT "BUG: recent printk recursion!\n";
-static int recursion_bug;
-static int new_text_line = 1;
-static char printk_buf[1024];
 
 int printk_delay_msec __read_mostly;
 
@@ -836,15 +905,22 @@ static inline void printk_delay(void)
        }
 }
 
-asmlinkage int vprintk(const char *fmt, va_list args)
+asmlinkage int vprintk_emit(int facility, int level,
+                           const char *dict, size_t dictlen,
+                           const char *fmt, va_list args)
 {
-       int printed_len = 0;
-       int current_log_level = default_message_loglevel;
+       static int recursion_bug;
+       static char buf[LOG_LINE_MAX];
+       static size_t buflen;
+       static int buflevel;
+       static char textbuf[LOG_LINE_MAX];
+       char *text = textbuf;
+       size_t textlen;
        unsigned long flags;
        int this_cpu;
-       char *p;
-       size_t plen;
-       char special;
+       bool newline = false;
+       bool cont = false;
+       int printed_len = 0;
 
        boot_delay_msec();
        printk_delay();
@@ -856,7 +932,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
        /*
         * Ouch, printk recursed into itself!
         */
-       if (unlikely(printk_cpu == this_cpu)) {
+       if (unlikely(logbuf_cpu == this_cpu)) {
                /*
                 * If a crash is occurring during printk() on this CPU,
                 * then try to get the crash message out but make sure
@@ -873,97 +949,92 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 
        lockdep_off();
        raw_spin_lock(&logbuf_lock);
-       printk_cpu = this_cpu;
+       logbuf_cpu = this_cpu;
 
        if (recursion_bug) {
+               static const char recursion_msg[] =
+                       "BUG: recent printk recursion!";
+
                recursion_bug = 0;
-               strcpy(printk_buf, recursion_bug_msg);
-               printed_len = strlen(recursion_bug_msg);
+               printed_len += strlen(recursion_msg);
+               /* emit KERN_CRIT message */
+               log_store(0, 2, NULL, 0, recursion_msg, printed_len);
        }
-       /* Emit the output into the temporary buffer */
-       printed_len += vscnprintf(printk_buf + printed_len,
-                                 sizeof(printk_buf) - printed_len, fmt, args);
 
-       p = printk_buf;
+       /*
+        * The printf needs to come first; we need the syslog
+        * prefix which might be passed-in as a parameter.
+        */
+       textlen = vscnprintf(text, sizeof(textbuf), fmt, args);
 
-       /* Read log level and handle special printk prefix */
-       plen = log_prefix(p, &current_log_level, &special);
-       if (plen) {
-               p += plen;
+       /* mark and strip a trailing newline */
+       if (textlen && text[textlen-1] == '\n') {
+               textlen--;
+               newline = true;
+       }
 
-               switch (special) {
-               case 'c': /* Strip <c> KERN_CONT, continue line */
-                       plen = 0;
+       /* strip syslog prefix and extract log level or flags */
+       if (text[0] == '<' && text[1] && text[2] == '>') {
+               switch (text[1]) {
+               case '0' ... '7':
+                       if (level == -1)
+                               level = text[1] - '0';
+                       text += 3;
+                       textlen -= 3;
+                       break;
+               case 'c':       /* KERN_CONT */
+                       cont = true;
+               case 'd':       /* KERN_DEFAULT */
+                       text += 3;
+                       textlen -= 3;
                        break;
-               case 'd': /* Strip <d> KERN_DEFAULT, start new line */
-                       plen = 0;
-               default:
-                       if (!new_text_line) {
-                               emit_log_char('\n');
-                               new_text_line = 1;
-                       }
                }
        }
 
-       /*
-        * Copy the output into log_buf. If the caller didn't provide
-        * the appropriate log prefix, we insert them here
-        */
-       for (; *p; p++) {
-               if (new_text_line) {
-                       new_text_line = 0;
-
-                       if (plen) {
-                               /* Copy original log prefix */
-                               int i;
-
-                               for (i = 0; i < plen; i++)
-                                       emit_log_char(printk_buf[i]);
-                               printed_len += plen;
-                       } else {
-                               /* Add log prefix */
-                               emit_log_char('<');
-                               emit_log_char(current_log_level + '0');
-                               emit_log_char('>');
-                               printed_len += 3;
-                       }
+       if (buflen && (!cont || dict)) {
+               /* no continuation; flush existing buffer */
+               log_store(facility, buflevel, NULL, 0, buf, buflen);
+               printed_len += buflen;
+               buflen = 0;
+       }
 
-                       if (printk_time) {
-                               /* Add the current time stamp */
-                               char tbuf[50], *tp;
-                               unsigned tlen;
-                               unsigned long long t;
-                               unsigned long nanosec_rem;
-
-                               t = cpu_clock(printk_cpu);
-                               nanosec_rem = do_div(t, 1000000000);
-                               tlen = sprintf(tbuf, "[%5lu.%06lu] ",
-                                               (unsigned long) t,
-                                               nanosec_rem / 1000);
-
-                               for (tp = tbuf; tp < tbuf + tlen; tp++)
-                                       emit_log_char(*tp);
-                               printed_len += tlen;
-                       }
+       if (buflen == 0) {
+               /* remember level for first message in the buffer */
+               if (level == -1)
+                       buflevel = default_message_loglevel;
+               else
+                       buflevel = level;
+       }
 
-                       if (!*p)
-                               break;
-               }
+       if (buflen || !newline) {
+               /* append to existing buffer, or buffer until next message */
+               if (buflen + textlen > sizeof(buf))
+                       textlen = sizeof(buf) - buflen;
+               memcpy(buf + buflen, text, textlen);
+               buflen += textlen;
+       }
 
-               emit_log_char(*p);
-               if (*p == '\n')
-                       new_text_line = 1;
+       if (newline) {
+               /* end of line; flush buffer */
+               if (buflen) {
+                       log_store(facility, buflevel,
+                                 dict, dictlen, buf, buflen);
+                       printed_len += buflen;
+                       buflen = 0;
+               } else {
+                       log_store(facility, buflevel,
+                                 dict, dictlen, text, textlen);
+                       printed_len += textlen;
+               }
        }
 
        /*
-        * Try to acquire and then immediately release the
-        * console semaphore. The release will do all the
-        * actual magic (print out buffers, wake up klogd,
-        * etc). 
+        * Try to acquire and then immediately release the console semaphore.
+        * The release will print out buffers and wake up /dev/kmsg and syslog()
+        * users.
         *
-        * The console_trylock_for_printk() function
-        * will release 'logbuf_lock' regardless of whether it
-        * actually gets the semaphore or not.
+        * The console_trylock_for_printk() function will release 'logbuf_lock'
+        * regardless of whether it actually gets the console semaphore or not.
         */
        if (console_trylock_for_printk(this_cpu))
                console_unlock();
@@ -974,12 +1045,73 @@ out_restore_irqs:
 
        return printed_len;
 }
-EXPORT_SYMBOL(printk);
+EXPORT_SYMBOL(vprintk_emit);
+
+asmlinkage int vprintk(const char *fmt, va_list args)
+{
+       return vprintk_emit(0, -1, NULL, 0, fmt, args);
+}
 EXPORT_SYMBOL(vprintk);
 
+asmlinkage int printk_emit(int facility, int level,
+                          const char *dict, size_t dictlen,
+                          const char *fmt, ...)
+{
+       va_list args;
+       int r;
+
+       va_start(args, fmt);
+       r = vprintk_emit(facility, level, dict, dictlen, fmt, args);
+       va_end(args);
+
+       return r;
+}
+EXPORT_SYMBOL(printk_emit);
+
+/**
+ * printk - print a kernel message
+ * @fmt: format string
+ *
+ * This is printk(). It can be called from any context. We want it to work.
+ *
+ * We try to grab the console_lock. If we succeed, it's easy - we log the
+ * output and call the console drivers.  If we fail to get the semaphore, we
+ * place the output into the log buffer and return. The current holder of
+ * the console_sem will notice the new output in console_unlock(); and will
+ * send it to the consoles before releasing the lock.
+ *
+ * One effect of this deferred printing is that code which calls printk() and
+ * then changes console_loglevel may break. This is because console_loglevel
+ * is inspected when the actual printing occurs.
+ *
+ * See also:
+ * printf(3)
+ *
+ * See the vsnprintf() documentation for format string extensions over C99.
+ */
+asmlinkage int printk(const char *fmt, ...)
+{
+       va_list args;
+       int r;
+
+#ifdef CONFIG_KGDB_KDB
+       if (unlikely(kdb_trap_printk)) {
+               va_start(args, fmt);
+               r = vkdb_printf(fmt, args);
+               va_end(args);
+               return r;
+       }
+#endif
+       va_start(args, fmt);
+       r = vprintk_emit(0, -1, NULL, 0, fmt, args);
+       va_end(args);
+
+       return r;
+}
+EXPORT_SYMBOL(printk);
 #else
 
-static void call_console_drivers(unsigned start, unsigned end)
+static void call_console_drivers(int level, const char *text, size_t len)
 {
 }
 
@@ -1217,7 +1349,7 @@ int is_console_locked(void)
 }
 
 /*
- * Delayed printk facility, for scheduler-internal messages:
+ * Delayed printk version, for scheduler-internal messages:
  */
 #define PRINTK_BUF_SIZE                512
 
@@ -1253,6 +1385,10 @@ void wake_up_klogd(void)
                this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
 }
 
+/* the next printk record to write to the console */
+static u64 console_seq;
+static u32 console_idx;
+
 /**
  * console_unlock - unlock the console system
  *
@@ -1263,15 +1399,16 @@ void wake_up_klogd(void)
  * by printk().  If this is the case, console_unlock(); emits
  * the output prior to releasing the lock.
  *
- * If there is output waiting for klogd, we wake it up.
+ * If there is output waiting, we wake it /dev/kmsg and syslog() users.
  *
  * console_unlock(); may be called from any context.
  */
 void console_unlock(void)
 {
+       static u64 seen_seq;
        unsigned long flags;
-       unsigned _con_start, _log_end;
-       unsigned wake_klogd = 0, retry = 0;
+       bool wake_klogd = false;
+       bool retry;
 
        if (console_suspended) {
                up(&console_sem);
@@ -1281,17 +1418,41 @@ void console_unlock(void)
        console_may_schedule = 0;
 
 again:
-       for ( ; ; ) {
+       for (;;) {
+               struct log *msg;
+               static char text[LOG_LINE_MAX];
+               size_t len;
+               int level;
+
                raw_spin_lock_irqsave(&logbuf_lock, flags);
-               wake_klogd |= log_start - log_end;
-               if (con_start == log_end)
-                       break;                  /* Nothing to print */
-               _con_start = con_start;
-               _log_end = log_end;
-               con_start = log_end;            /* Flush */
+               if (seen_seq != log_next_seq) {
+                       wake_klogd = true;
+                       seen_seq = log_next_seq;
+               }
+
+               if (console_seq < log_first_seq) {
+                       /* messages are gone, move to first one */
+                       console_seq = log_first_seq;
+                       console_idx = log_first_idx;
+               }
+
+               if (console_seq == log_next_seq)
+                       break;
+
+               msg = log_from_idx(console_idx);
+               level = msg->level & 7;
+               len = msg->text_len;
+               if (len+1 >= sizeof(text))
+                       len = sizeof(text)-1;
+               memcpy(text, log_text(msg), len);
+               text[len++] = '\n';
+
+               console_idx = log_next(console_idx);
+               console_seq++;
                raw_spin_unlock(&logbuf_lock);
+
                stop_critical_timings();        /* don't trace print latency */
-               call_console_drivers(_con_start, _log_end);
+               call_console_drivers(level, text, len);
                start_critical_timings();
                local_irq_restore(flags);
        }
@@ -1312,8 +1473,7 @@ again:
         * flush, no worries.
         */
        raw_spin_lock(&logbuf_lock);
-       if (con_start != log_end)
-               retry = 1;
+       retry = console_seq != log_next_seq;
        raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 
        if (retry && console_trylock())
@@ -1549,7 +1709,8 @@ void register_console(struct console *newcon)
                 * for us.
                 */
                raw_spin_lock_irqsave(&logbuf_lock, flags);
-               con_start = log_start;
+               console_seq = syslog_seq;
+               console_idx = syslog_idx;
                raw_spin_unlock_irqrestore(&logbuf_lock, flags);
                /*
                 * We're about to replay the log buffer.  Only do this to the
@@ -1758,6 +1919,9 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper)
 }
 EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
 
+static bool always_kmsg_dump;
+module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
+
 /**
  * kmsg_dump - dump kernel log to kernel message dumpers.
  * @reason: the reason (oops, panic etc) for dumping
@@ -1767,8 +1931,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
  */
 void kmsg_dump(enum kmsg_dump_reason reason)
 {
-       unsigned long end;
-       unsigned chars;
+       u64 idx;
        struct kmsg_dumper *dumper;
        const char *s1, *s2;
        unsigned long l1, l2;
@@ -1780,24 +1943,27 @@ void kmsg_dump(enum kmsg_dump_reason reason)
        /* Theoretically, the log could move on after we do this, but
           there's not a lot we can do about that. The new messages
           will overwrite the start of what we dump. */
+
        raw_spin_lock_irqsave(&logbuf_lock, flags);
-       end = log_end & LOG_BUF_MASK;
-       chars = logged_chars;
-       raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+       if (syslog_seq < log_first_seq)
+               idx = syslog_idx;
+       else
+               idx = log_first_idx;
 
-       if (chars > end) {
-               s1 = log_buf + log_buf_len - chars + end;
-               l1 = chars - end;
+       if (idx > log_next_idx) {
+               s1 = log_buf;
+               l1 = log_next_idx;
 
-               s2 = log_buf;
-               l2 = end;
+               s2 = log_buf + idx;
+               l2 = log_buf_len - idx;
        } else {
                s1 = "";
                l1 = 0;
 
-               s2 = log_buf + end - chars;
-               l2 = chars;
+               s2 = log_buf + idx;
+               l2 = log_next_idx - idx;
        }
+       raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 
        rcu_read_lock();
        list_for_each_entry_rcu(dumper, &dump_list, list)