tracing: Have mkdir and rmdir be part of tracefs
[pandora-kernel.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static struct tracer_flags dummy_tracer_flags = {
78         .val = 0,
79         .opts = dummy_tracer_opt
80 };
81
82 static int
83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85         return 0;
86 }
87
88 /*
89  * To prevent the comm cache from being overwritten when no
90  * tracing is active, only save the comm when a trace event
91  * occurred.
92  */
93 static DEFINE_PER_CPU(bool, trace_cmdline_save);
94
95 /*
96  * Kill all tracing for good (never come back).
97  * It is initialized to 1 but will turn to zero if the initialization
98  * of the tracer is successful. But that is the only place that sets
99  * this back to zero.
100  */
101 static int tracing_disabled = 1;
102
103 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
104
105 cpumask_var_t __read_mostly     tracing_buffer_mask;
106
107 /*
108  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
109  *
110  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
111  * is set, then ftrace_dump is called. This will output the contents
112  * of the ftrace buffers to the console.  This is very useful for
113  * capturing traces that lead to crashes and outputing it to a
114  * serial console.
115  *
116  * It is default off, but you can enable it with either specifying
117  * "ftrace_dump_on_oops" in the kernel command line, or setting
118  * /proc/sys/kernel/ftrace_dump_on_oops
119  * Set 1 if you want to dump buffers of all CPUs
120  * Set 2 if you want to dump the buffer of the CPU that triggered oops
121  */
122
123 enum ftrace_dump_mode ftrace_dump_on_oops;
124
125 /* When set, tracing will stop when a WARN*() is hit */
126 int __disable_trace_on_warning;
127
128 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
129
130 #define MAX_TRACER_SIZE         100
131 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
132 static char *default_bootup_tracer;
133
134 static bool allocate_snapshot;
135
136 static int __init set_cmdline_ftrace(char *str)
137 {
138         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
139         default_bootup_tracer = bootup_tracer_buf;
140         /* We are using ftrace early, expand it */
141         ring_buffer_expanded = true;
142         return 1;
143 }
144 __setup("ftrace=", set_cmdline_ftrace);
145
146 static int __init set_ftrace_dump_on_oops(char *str)
147 {
148         if (*str++ != '=' || !*str) {
149                 ftrace_dump_on_oops = DUMP_ALL;
150                 return 1;
151         }
152
153         if (!strcmp("orig_cpu", str)) {
154                 ftrace_dump_on_oops = DUMP_ORIG;
155                 return 1;
156         }
157
158         return 0;
159 }
160 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
161
162 static int __init stop_trace_on_warning(char *str)
163 {
164         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
165                 __disable_trace_on_warning = 1;
166         return 1;
167 }
168 __setup("traceoff_on_warning", stop_trace_on_warning);
169
170 static int __init boot_alloc_snapshot(char *str)
171 {
172         allocate_snapshot = true;
173         /* We also need the main ring buffer expanded */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("alloc_snapshot", boot_alloc_snapshot);
178
179
180 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
181 static char *trace_boot_options __initdata;
182
183 static int __init set_trace_boot_options(char *str)
184 {
185         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
186         trace_boot_options = trace_boot_options_buf;
187         return 0;
188 }
189 __setup("trace_options=", set_trace_boot_options);
190
191 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
192 static char *trace_boot_clock __initdata;
193
194 static int __init set_trace_boot_clock(char *str)
195 {
196         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
197         trace_boot_clock = trace_boot_clock_buf;
198         return 0;
199 }
200 __setup("trace_clock=", set_trace_boot_clock);
201
202 static int __init set_tracepoint_printk(char *str)
203 {
204         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
205                 tracepoint_printk = 1;
206         return 1;
207 }
208 __setup("tp_printk", set_tracepoint_printk);
209
210 unsigned long long ns2usecs(cycle_t nsec)
211 {
212         nsec += 500;
213         do_div(nsec, 1000);
214         return nsec;
215 }
216
217 /*
218  * The global_trace is the descriptor that holds the tracing
219  * buffers for the live tracing. For each CPU, it contains
220  * a link list of pages that will store trace entries. The
221  * page descriptor of the pages in the memory is used to hold
222  * the link list by linking the lru item in the page descriptor
223  * to each of the pages in the buffer per CPU.
224  *
225  * For each active CPU there is a data field that holds the
226  * pages for the buffer for that CPU. Each CPU has the same number
227  * of pages allocated for its buffer.
228  */
229 static struct trace_array       global_trace;
230
231 LIST_HEAD(ftrace_trace_arrays);
232
233 int trace_array_get(struct trace_array *this_tr)
234 {
235         struct trace_array *tr;
236         int ret = -ENODEV;
237
238         mutex_lock(&trace_types_lock);
239         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
240                 if (tr == this_tr) {
241                         tr->ref++;
242                         ret = 0;
243                         break;
244                 }
245         }
246         mutex_unlock(&trace_types_lock);
247
248         return ret;
249 }
250
251 static void __trace_array_put(struct trace_array *this_tr)
252 {
253         WARN_ON(!this_tr->ref);
254         this_tr->ref--;
255 }
256
257 void trace_array_put(struct trace_array *this_tr)
258 {
259         mutex_lock(&trace_types_lock);
260         __trace_array_put(this_tr);
261         mutex_unlock(&trace_types_lock);
262 }
263
264 int filter_check_discard(struct ftrace_event_file *file, void *rec,
265                          struct ring_buffer *buffer,
266                          struct ring_buffer_event *event)
267 {
268         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(file->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(filter_check_discard);
277
278 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
279                               struct ring_buffer *buffer,
280                               struct ring_buffer_event *event)
281 {
282         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
283             !filter_match_preds(call->filter, rec)) {
284                 ring_buffer_discard_commit(buffer, event);
285                 return 1;
286         }
287
288         return 0;
289 }
290 EXPORT_SYMBOL_GPL(call_filter_check_discard);
291
292 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
293 {
294         u64 ts;
295
296         /* Early boot up does not have a buffer yet */
297         if (!buf->buffer)
298                 return trace_clock_local();
299
300         ts = ring_buffer_time_stamp(buf->buffer, cpu);
301         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
302
303         return ts;
304 }
305
306 cycle_t ftrace_now(int cpu)
307 {
308         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
309 }
310
311 /**
312  * tracing_is_enabled - Show if global_trace has been disabled
313  *
314  * Shows if the global trace has been enabled or not. It uses the
315  * mirror flag "buffer_disabled" to be used in fast paths such as for
316  * the irqsoff tracer. But it may be inaccurate due to races. If you
317  * need to know the accurate state, use tracing_is_on() which is a little
318  * slower, but accurate.
319  */
320 int tracing_is_enabled(void)
321 {
322         /*
323          * For quick access (irqsoff uses this in fast path), just
324          * return the mirror variable of the state of the ring buffer.
325          * It's a little racy, but we don't really care.
326          */
327         smp_rmb();
328         return !global_trace.buffer_disabled;
329 }
330
331 /*
332  * trace_buf_size is the size in bytes that is allocated
333  * for a buffer. Note, the number of bytes is always rounded
334  * to page size.
335  *
336  * This number is purposely set to a low number of 16384.
337  * If the dump on oops happens, it will be much appreciated
338  * to not have to wait for all that output. Anyway this can be
339  * boot time and run time configurable.
340  */
341 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
342
343 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
344
345 /* trace_types holds a link list of available tracers. */
346 static struct tracer            *trace_types __read_mostly;
347
348 /*
349  * trace_types_lock is used to protect the trace_types list.
350  */
351 DEFINE_MUTEX(trace_types_lock);
352
353 /*
354  * serialize the access of the ring buffer
355  *
356  * ring buffer serializes readers, but it is low level protection.
357  * The validity of the events (which returns by ring_buffer_peek() ..etc)
358  * are not protected by ring buffer.
359  *
360  * The content of events may become garbage if we allow other process consumes
361  * these events concurrently:
362  *   A) the page of the consumed events may become a normal page
363  *      (not reader page) in ring buffer, and this page will be rewrited
364  *      by events producer.
365  *   B) The page of the consumed events may become a page for splice_read,
366  *      and this page will be returned to system.
367  *
368  * These primitives allow multi process access to different cpu ring buffer
369  * concurrently.
370  *
371  * These primitives don't distinguish read-only and read-consume access.
372  * Multi read-only access are also serialized.
373  */
374
375 #ifdef CONFIG_SMP
376 static DECLARE_RWSEM(all_cpu_access_lock);
377 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
378
379 static inline void trace_access_lock(int cpu)
380 {
381         if (cpu == RING_BUFFER_ALL_CPUS) {
382                 /* gain it for accessing the whole ring buffer. */
383                 down_write(&all_cpu_access_lock);
384         } else {
385                 /* gain it for accessing a cpu ring buffer. */
386
387                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
388                 down_read(&all_cpu_access_lock);
389
390                 /* Secondly block other access to this @cpu ring buffer. */
391                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
392         }
393 }
394
395 static inline void trace_access_unlock(int cpu)
396 {
397         if (cpu == RING_BUFFER_ALL_CPUS) {
398                 up_write(&all_cpu_access_lock);
399         } else {
400                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
401                 up_read(&all_cpu_access_lock);
402         }
403 }
404
405 static inline void trace_access_lock_init(void)
406 {
407         int cpu;
408
409         for_each_possible_cpu(cpu)
410                 mutex_init(&per_cpu(cpu_access_lock, cpu));
411 }
412
413 #else
414
415 static DEFINE_MUTEX(access_lock);
416
417 static inline void trace_access_lock(int cpu)
418 {
419         (void)cpu;
420         mutex_lock(&access_lock);
421 }
422
423 static inline void trace_access_unlock(int cpu)
424 {
425         (void)cpu;
426         mutex_unlock(&access_lock);
427 }
428
429 static inline void trace_access_lock_init(void)
430 {
431 }
432
433 #endif
434
435 /* trace_flags holds trace_options default values */
436 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
437         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
438         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
439         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
440
441 static void tracer_tracing_on(struct trace_array *tr)
442 {
443         if (tr->trace_buffer.buffer)
444                 ring_buffer_record_on(tr->trace_buffer.buffer);
445         /*
446          * This flag is looked at when buffers haven't been allocated
447          * yet, or by some tracers (like irqsoff), that just want to
448          * know if the ring buffer has been disabled, but it can handle
449          * races of where it gets disabled but we still do a record.
450          * As the check is in the fast path of the tracers, it is more
451          * important to be fast than accurate.
452          */
453         tr->buffer_disabled = 0;
454         /* Make the flag seen by readers */
455         smp_wmb();
456 }
457
458 /**
459  * tracing_on - enable tracing buffers
460  *
461  * This function enables tracing buffers that may have been
462  * disabled with tracing_off.
463  */
464 void tracing_on(void)
465 {
466         tracer_tracing_on(&global_trace);
467 }
468 EXPORT_SYMBOL_GPL(tracing_on);
469
470 /**
471  * __trace_puts - write a constant string into the trace buffer.
472  * @ip:    The address of the caller
473  * @str:   The constant string to write
474  * @size:  The size of the string.
475  */
476 int __trace_puts(unsigned long ip, const char *str, int size)
477 {
478         struct ring_buffer_event *event;
479         struct ring_buffer *buffer;
480         struct print_entry *entry;
481         unsigned long irq_flags;
482         int alloc;
483         int pc;
484
485         if (!(trace_flags & TRACE_ITER_PRINTK))
486                 return 0;
487
488         pc = preempt_count();
489
490         if (unlikely(tracing_selftest_running || tracing_disabled))
491                 return 0;
492
493         alloc = sizeof(*entry) + size + 2; /* possible \n added */
494
495         local_save_flags(irq_flags);
496         buffer = global_trace.trace_buffer.buffer;
497         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
498                                           irq_flags, pc);
499         if (!event)
500                 return 0;
501
502         entry = ring_buffer_event_data(event);
503         entry->ip = ip;
504
505         memcpy(&entry->buf, str, size);
506
507         /* Add a newline if necessary */
508         if (entry->buf[size - 1] != '\n') {
509                 entry->buf[size] = '\n';
510                 entry->buf[size + 1] = '\0';
511         } else
512                 entry->buf[size] = '\0';
513
514         __buffer_unlock_commit(buffer, event);
515         ftrace_trace_stack(buffer, irq_flags, 4, pc);
516
517         return size;
518 }
519 EXPORT_SYMBOL_GPL(__trace_puts);
520
521 /**
522  * __trace_bputs - write the pointer to a constant string into trace buffer
523  * @ip:    The address of the caller
524  * @str:   The constant string to write to the buffer to
525  */
526 int __trace_bputs(unsigned long ip, const char *str)
527 {
528         struct ring_buffer_event *event;
529         struct ring_buffer *buffer;
530         struct bputs_entry *entry;
531         unsigned long irq_flags;
532         int size = sizeof(struct bputs_entry);
533         int pc;
534
535         if (!(trace_flags & TRACE_ITER_PRINTK))
536                 return 0;
537
538         pc = preempt_count();
539
540         if (unlikely(tracing_selftest_running || tracing_disabled))
541                 return 0;
542
543         local_save_flags(irq_flags);
544         buffer = global_trace.trace_buffer.buffer;
545         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
546                                           irq_flags, pc);
547         if (!event)
548                 return 0;
549
550         entry = ring_buffer_event_data(event);
551         entry->ip                       = ip;
552         entry->str                      = str;
553
554         __buffer_unlock_commit(buffer, event);
555         ftrace_trace_stack(buffer, irq_flags, 4, pc);
556
557         return 1;
558 }
559 EXPORT_SYMBOL_GPL(__trace_bputs);
560
561 #ifdef CONFIG_TRACER_SNAPSHOT
562 /**
563  * trace_snapshot - take a snapshot of the current buffer.
564  *
565  * This causes a swap between the snapshot buffer and the current live
566  * tracing buffer. You can use this to take snapshots of the live
567  * trace when some condition is triggered, but continue to trace.
568  *
569  * Note, make sure to allocate the snapshot with either
570  * a tracing_snapshot_alloc(), or by doing it manually
571  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
572  *
573  * If the snapshot buffer is not allocated, it will stop tracing.
574  * Basically making a permanent snapshot.
575  */
576 void tracing_snapshot(void)
577 {
578         struct trace_array *tr = &global_trace;
579         struct tracer *tracer = tr->current_trace;
580         unsigned long flags;
581
582         if (in_nmi()) {
583                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
584                 internal_trace_puts("*** snapshot is being ignored        ***\n");
585                 return;
586         }
587
588         if (!tr->allocated_snapshot) {
589                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
590                 internal_trace_puts("*** stopping trace here!   ***\n");
591                 tracing_off();
592                 return;
593         }
594
595         /* Note, snapshot can not be used when the tracer uses it */
596         if (tracer->use_max_tr) {
597                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
598                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
599                 return;
600         }
601
602         local_irq_save(flags);
603         update_max_tr(tr, current, smp_processor_id());
604         local_irq_restore(flags);
605 }
606 EXPORT_SYMBOL_GPL(tracing_snapshot);
607
608 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
609                                         struct trace_buffer *size_buf, int cpu_id);
610 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
611
612 static int alloc_snapshot(struct trace_array *tr)
613 {
614         int ret;
615
616         if (!tr->allocated_snapshot) {
617
618                 /* allocate spare buffer */
619                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
620                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
621                 if (ret < 0)
622                         return ret;
623
624                 tr->allocated_snapshot = true;
625         }
626
627         return 0;
628 }
629
630 static void free_snapshot(struct trace_array *tr)
631 {
632         /*
633          * We don't free the ring buffer. instead, resize it because
634          * The max_tr ring buffer has some state (e.g. ring->clock) and
635          * we want preserve it.
636          */
637         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
638         set_buffer_entries(&tr->max_buffer, 1);
639         tracing_reset_online_cpus(&tr->max_buffer);
640         tr->allocated_snapshot = false;
641 }
642
643 /**
644  * tracing_alloc_snapshot - allocate snapshot buffer.
645  *
646  * This only allocates the snapshot buffer if it isn't already
647  * allocated - it doesn't also take a snapshot.
648  *
649  * This is meant to be used in cases where the snapshot buffer needs
650  * to be set up for events that can't sleep but need to be able to
651  * trigger a snapshot.
652  */
653 int tracing_alloc_snapshot(void)
654 {
655         struct trace_array *tr = &global_trace;
656         int ret;
657
658         ret = alloc_snapshot(tr);
659         WARN_ON(ret < 0);
660
661         return ret;
662 }
663 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
664
665 /**
666  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
667  *
668  * This is similar to trace_snapshot(), but it will allocate the
669  * snapshot buffer if it isn't already allocated. Use this only
670  * where it is safe to sleep, as the allocation may sleep.
671  *
672  * This causes a swap between the snapshot buffer and the current live
673  * tracing buffer. You can use this to take snapshots of the live
674  * trace when some condition is triggered, but continue to trace.
675  */
676 void tracing_snapshot_alloc(void)
677 {
678         int ret;
679
680         ret = tracing_alloc_snapshot();
681         if (ret < 0)
682                 return;
683
684         tracing_snapshot();
685 }
686 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
687 #else
688 void tracing_snapshot(void)
689 {
690         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
691 }
692 EXPORT_SYMBOL_GPL(tracing_snapshot);
693 int tracing_alloc_snapshot(void)
694 {
695         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
696         return -ENODEV;
697 }
698 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
699 void tracing_snapshot_alloc(void)
700 {
701         /* Give warning */
702         tracing_snapshot();
703 }
704 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
705 #endif /* CONFIG_TRACER_SNAPSHOT */
706
707 static void tracer_tracing_off(struct trace_array *tr)
708 {
709         if (tr->trace_buffer.buffer)
710                 ring_buffer_record_off(tr->trace_buffer.buffer);
711         /*
712          * This flag is looked at when buffers haven't been allocated
713          * yet, or by some tracers (like irqsoff), that just want to
714          * know if the ring buffer has been disabled, but it can handle
715          * races of where it gets disabled but we still do a record.
716          * As the check is in the fast path of the tracers, it is more
717          * important to be fast than accurate.
718          */
719         tr->buffer_disabled = 1;
720         /* Make the flag seen by readers */
721         smp_wmb();
722 }
723
724 /**
725  * tracing_off - turn off tracing buffers
726  *
727  * This function stops the tracing buffers from recording data.
728  * It does not disable any overhead the tracers themselves may
729  * be causing. This function simply causes all recording to
730  * the ring buffers to fail.
731  */
732 void tracing_off(void)
733 {
734         tracer_tracing_off(&global_trace);
735 }
736 EXPORT_SYMBOL_GPL(tracing_off);
737
738 void disable_trace_on_warning(void)
739 {
740         if (__disable_trace_on_warning)
741                 tracing_off();
742 }
743
744 /**
745  * tracer_tracing_is_on - show real state of ring buffer enabled
746  * @tr : the trace array to know if ring buffer is enabled
747  *
748  * Shows real state of the ring buffer if it is enabled or not.
749  */
750 static int tracer_tracing_is_on(struct trace_array *tr)
751 {
752         if (tr->trace_buffer.buffer)
753                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
754         return !tr->buffer_disabled;
755 }
756
757 /**
758  * tracing_is_on - show state of ring buffers enabled
759  */
760 int tracing_is_on(void)
761 {
762         return tracer_tracing_is_on(&global_trace);
763 }
764 EXPORT_SYMBOL_GPL(tracing_is_on);
765
766 static int __init set_buf_size(char *str)
767 {
768         unsigned long buf_size;
769
770         if (!str)
771                 return 0;
772         buf_size = memparse(str, &str);
773         /* nr_entries can not be zero */
774         if (buf_size == 0)
775                 return 0;
776         trace_buf_size = buf_size;
777         return 1;
778 }
779 __setup("trace_buf_size=", set_buf_size);
780
781 static int __init set_tracing_thresh(char *str)
782 {
783         unsigned long threshold;
784         int ret;
785
786         if (!str)
787                 return 0;
788         ret = kstrtoul(str, 0, &threshold);
789         if (ret < 0)
790                 return 0;
791         tracing_thresh = threshold * 1000;
792         return 1;
793 }
794 __setup("tracing_thresh=", set_tracing_thresh);
795
796 unsigned long nsecs_to_usecs(unsigned long nsecs)
797 {
798         return nsecs / 1000;
799 }
800
801 /* These must match the bit postions in trace_iterator_flags */
802 static const char *trace_options[] = {
803         "print-parent",
804         "sym-offset",
805         "sym-addr",
806         "verbose",
807         "raw",
808         "hex",
809         "bin",
810         "block",
811         "stacktrace",
812         "trace_printk",
813         "ftrace_preempt",
814         "branch",
815         "annotate",
816         "userstacktrace",
817         "sym-userobj",
818         "printk-msg-only",
819         "context-info",
820         "latency-format",
821         "sleep-time",
822         "graph-time",
823         "record-cmd",
824         "overwrite",
825         "disable_on_free",
826         "irq-info",
827         "markers",
828         "function-trace",
829         NULL
830 };
831
832 static struct {
833         u64 (*func)(void);
834         const char *name;
835         int in_ns;              /* is this clock in nanoseconds? */
836 } trace_clocks[] = {
837         { trace_clock_local,            "local",        1 },
838         { trace_clock_global,           "global",       1 },
839         { trace_clock_counter,          "counter",      0 },
840         { trace_clock_jiffies,          "uptime",       0 },
841         { trace_clock,                  "perf",         1 },
842         { ktime_get_mono_fast_ns,       "mono",         1 },
843         ARCH_TRACE_CLOCKS
844 };
845
846 /*
847  * trace_parser_get_init - gets the buffer for trace parser
848  */
849 int trace_parser_get_init(struct trace_parser *parser, int size)
850 {
851         memset(parser, 0, sizeof(*parser));
852
853         parser->buffer = kmalloc(size, GFP_KERNEL);
854         if (!parser->buffer)
855                 return 1;
856
857         parser->size = size;
858         return 0;
859 }
860
861 /*
862  * trace_parser_put - frees the buffer for trace parser
863  */
864 void trace_parser_put(struct trace_parser *parser)
865 {
866         kfree(parser->buffer);
867 }
868
869 /*
870  * trace_get_user - reads the user input string separated by  space
871  * (matched by isspace(ch))
872  *
873  * For each string found the 'struct trace_parser' is updated,
874  * and the function returns.
875  *
876  * Returns number of bytes read.
877  *
878  * See kernel/trace/trace.h for 'struct trace_parser' details.
879  */
880 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
881         size_t cnt, loff_t *ppos)
882 {
883         char ch;
884         size_t read = 0;
885         ssize_t ret;
886
887         if (!*ppos)
888                 trace_parser_clear(parser);
889
890         ret = get_user(ch, ubuf++);
891         if (ret)
892                 goto out;
893
894         read++;
895         cnt--;
896
897         /*
898          * The parser is not finished with the last write,
899          * continue reading the user input without skipping spaces.
900          */
901         if (!parser->cont) {
902                 /* skip white space */
903                 while (cnt && isspace(ch)) {
904                         ret = get_user(ch, ubuf++);
905                         if (ret)
906                                 goto out;
907                         read++;
908                         cnt--;
909                 }
910
911                 /* only spaces were written */
912                 if (isspace(ch)) {
913                         *ppos += read;
914                         ret = read;
915                         goto out;
916                 }
917
918                 parser->idx = 0;
919         }
920
921         /* read the non-space input */
922         while (cnt && !isspace(ch)) {
923                 if (parser->idx < parser->size - 1)
924                         parser->buffer[parser->idx++] = ch;
925                 else {
926                         ret = -EINVAL;
927                         goto out;
928                 }
929                 ret = get_user(ch, ubuf++);
930                 if (ret)
931                         goto out;
932                 read++;
933                 cnt--;
934         }
935
936         /* We either got finished input or we have to wait for another call. */
937         if (isspace(ch)) {
938                 parser->buffer[parser->idx] = 0;
939                 parser->cont = false;
940         } else if (parser->idx < parser->size - 1) {
941                 parser->cont = true;
942                 parser->buffer[parser->idx++] = ch;
943         } else {
944                 ret = -EINVAL;
945                 goto out;
946         }
947
948         *ppos += read;
949         ret = read;
950
951 out:
952         return ret;
953 }
954
955 /* TODO add a seq_buf_to_buffer() */
956 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
957 {
958         int len;
959
960         if (trace_seq_used(s) <= s->seq.readpos)
961                 return -EBUSY;
962
963         len = trace_seq_used(s) - s->seq.readpos;
964         if (cnt > len)
965                 cnt = len;
966         memcpy(buf, s->buffer + s->seq.readpos, cnt);
967
968         s->seq.readpos += cnt;
969         return cnt;
970 }
971
972 unsigned long __read_mostly     tracing_thresh;
973
974 #ifdef CONFIG_TRACER_MAX_TRACE
975 /*
976  * Copy the new maximum trace into the separate maximum-trace
977  * structure. (this way the maximum trace is permanently saved,
978  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
979  */
980 static void
981 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
982 {
983         struct trace_buffer *trace_buf = &tr->trace_buffer;
984         struct trace_buffer *max_buf = &tr->max_buffer;
985         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
986         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
987
988         max_buf->cpu = cpu;
989         max_buf->time_start = data->preempt_timestamp;
990
991         max_data->saved_latency = tr->max_latency;
992         max_data->critical_start = data->critical_start;
993         max_data->critical_end = data->critical_end;
994
995         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
996         max_data->pid = tsk->pid;
997         /*
998          * If tsk == current, then use current_uid(), as that does not use
999          * RCU. The irq tracer can be called out of RCU scope.
1000          */
1001         if (tsk == current)
1002                 max_data->uid = current_uid();
1003         else
1004                 max_data->uid = task_uid(tsk);
1005
1006         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1007         max_data->policy = tsk->policy;
1008         max_data->rt_priority = tsk->rt_priority;
1009
1010         /* record this tasks comm */
1011         tracing_record_cmdline(tsk);
1012 }
1013
1014 /**
1015  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1016  * @tr: tracer
1017  * @tsk: the task with the latency
1018  * @cpu: The cpu that initiated the trace.
1019  *
1020  * Flip the buffers between the @tr and the max_tr and record information
1021  * about which task was the cause of this latency.
1022  */
1023 void
1024 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1025 {
1026         struct ring_buffer *buf;
1027
1028         if (tr->stop_count)
1029                 return;
1030
1031         WARN_ON_ONCE(!irqs_disabled());
1032
1033         if (!tr->allocated_snapshot) {
1034                 /* Only the nop tracer should hit this when disabling */
1035                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1036                 return;
1037         }
1038
1039         arch_spin_lock(&tr->max_lock);
1040
1041         buf = tr->trace_buffer.buffer;
1042         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1043         tr->max_buffer.buffer = buf;
1044
1045         __update_max_tr(tr, tsk, cpu);
1046         arch_spin_unlock(&tr->max_lock);
1047 }
1048
1049 /**
1050  * update_max_tr_single - only copy one trace over, and reset the rest
1051  * @tr - tracer
1052  * @tsk - task with the latency
1053  * @cpu - the cpu of the buffer to copy.
1054  *
1055  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1056  */
1057 void
1058 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1059 {
1060         int ret;
1061
1062         if (tr->stop_count)
1063                 return;
1064
1065         WARN_ON_ONCE(!irqs_disabled());
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&tr->max_lock);
1073
1074         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1075
1076         if (ret == -EBUSY) {
1077                 /*
1078                  * We failed to swap the buffer due to a commit taking
1079                  * place on this CPU. We fail to record, but we reset
1080                  * the max trace buffer (no one writes directly to it)
1081                  * and flag that it failed.
1082                  */
1083                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1084                         "Failed to swap buffers due to commit in progress\n");
1085         }
1086
1087         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1088
1089         __update_max_tr(tr, tsk, cpu);
1090         arch_spin_unlock(&tr->max_lock);
1091 }
1092 #endif /* CONFIG_TRACER_MAX_TRACE */
1093
1094 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1095 {
1096         /* Iterators are static, they should be filled or empty */
1097         if (trace_buffer_iter(iter, iter->cpu_file))
1098                 return 0;
1099
1100         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1101                                 full);
1102 }
1103
1104 #ifdef CONFIG_FTRACE_STARTUP_TEST
1105 static int run_tracer_selftest(struct tracer *type)
1106 {
1107         struct trace_array *tr = &global_trace;
1108         struct tracer *saved_tracer = tr->current_trace;
1109         int ret;
1110
1111         if (!type->selftest || tracing_selftest_disabled)
1112                 return 0;
1113
1114         /*
1115          * Run a selftest on this tracer.
1116          * Here we reset the trace buffer, and set the current
1117          * tracer to be this tracer. The tracer can then run some
1118          * internal tracing to verify that everything is in order.
1119          * If we fail, we do not register this tracer.
1120          */
1121         tracing_reset_online_cpus(&tr->trace_buffer);
1122
1123         tr->current_trace = type;
1124
1125 #ifdef CONFIG_TRACER_MAX_TRACE
1126         if (type->use_max_tr) {
1127                 /* If we expanded the buffers, make sure the max is expanded too */
1128                 if (ring_buffer_expanded)
1129                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1130                                            RING_BUFFER_ALL_CPUS);
1131                 tr->allocated_snapshot = true;
1132         }
1133 #endif
1134
1135         /* the test is responsible for initializing and enabling */
1136         pr_info("Testing tracer %s: ", type->name);
1137         ret = type->selftest(type, tr);
1138         /* the test is responsible for resetting too */
1139         tr->current_trace = saved_tracer;
1140         if (ret) {
1141                 printk(KERN_CONT "FAILED!\n");
1142                 /* Add the warning after printing 'FAILED' */
1143                 WARN_ON(1);
1144                 return -1;
1145         }
1146         /* Only reset on passing, to avoid touching corrupted buffers */
1147         tracing_reset_online_cpus(&tr->trace_buffer);
1148
1149 #ifdef CONFIG_TRACER_MAX_TRACE
1150         if (type->use_max_tr) {
1151                 tr->allocated_snapshot = false;
1152
1153                 /* Shrink the max buffer again */
1154                 if (ring_buffer_expanded)
1155                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1156                                            RING_BUFFER_ALL_CPUS);
1157         }
1158 #endif
1159
1160         printk(KERN_CONT "PASSED\n");
1161         return 0;
1162 }
1163 #else
1164 static inline int run_tracer_selftest(struct tracer *type)
1165 {
1166         return 0;
1167 }
1168 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1169
1170 /**
1171  * register_tracer - register a tracer with the ftrace system.
1172  * @type - the plugin for the tracer
1173  *
1174  * Register a new plugin tracer.
1175  */
1176 int register_tracer(struct tracer *type)
1177 {
1178         struct tracer *t;
1179         int ret = 0;
1180
1181         if (!type->name) {
1182                 pr_info("Tracer must have a name\n");
1183                 return -1;
1184         }
1185
1186         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1187                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1188                 return -1;
1189         }
1190
1191         mutex_lock(&trace_types_lock);
1192
1193         tracing_selftest_running = true;
1194
1195         for (t = trace_types; t; t = t->next) {
1196                 if (strcmp(type->name, t->name) == 0) {
1197                         /* already found */
1198                         pr_info("Tracer %s already registered\n",
1199                                 type->name);
1200                         ret = -1;
1201                         goto out;
1202                 }
1203         }
1204
1205         if (!type->set_flag)
1206                 type->set_flag = &dummy_set_flag;
1207         if (!type->flags)
1208                 type->flags = &dummy_tracer_flags;
1209         else
1210                 if (!type->flags->opts)
1211                         type->flags->opts = dummy_tracer_opt;
1212
1213         ret = run_tracer_selftest(type);
1214         if (ret < 0)
1215                 goto out;
1216
1217         type->next = trace_types;
1218         trace_types = type;
1219
1220  out:
1221         tracing_selftest_running = false;
1222         mutex_unlock(&trace_types_lock);
1223
1224         if (ret || !default_bootup_tracer)
1225                 goto out_unlock;
1226
1227         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1228                 goto out_unlock;
1229
1230         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1231         /* Do we want this tracer to start on bootup? */
1232         tracing_set_tracer(&global_trace, type->name);
1233         default_bootup_tracer = NULL;
1234         /* disable other selftests, since this will break it. */
1235         tracing_selftest_disabled = true;
1236 #ifdef CONFIG_FTRACE_STARTUP_TEST
1237         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1238                type->name);
1239 #endif
1240
1241  out_unlock:
1242         return ret;
1243 }
1244
1245 void tracing_reset(struct trace_buffer *buf, int cpu)
1246 {
1247         struct ring_buffer *buffer = buf->buffer;
1248
1249         if (!buffer)
1250                 return;
1251
1252         ring_buffer_record_disable(buffer);
1253
1254         /* Make sure all commits have finished */
1255         synchronize_sched();
1256         ring_buffer_reset_cpu(buffer, cpu);
1257
1258         ring_buffer_record_enable(buffer);
1259 }
1260
1261 void tracing_reset_online_cpus(struct trace_buffer *buf)
1262 {
1263         struct ring_buffer *buffer = buf->buffer;
1264         int cpu;
1265
1266         if (!buffer)
1267                 return;
1268
1269         ring_buffer_record_disable(buffer);
1270
1271         /* Make sure all commits have finished */
1272         synchronize_sched();
1273
1274         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1275
1276         for_each_online_cpu(cpu)
1277                 ring_buffer_reset_cpu(buffer, cpu);
1278
1279         ring_buffer_record_enable(buffer);
1280 }
1281
1282 /* Must have trace_types_lock held */
1283 void tracing_reset_all_online_cpus(void)
1284 {
1285         struct trace_array *tr;
1286
1287         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1288                 tracing_reset_online_cpus(&tr->trace_buffer);
1289 #ifdef CONFIG_TRACER_MAX_TRACE
1290                 tracing_reset_online_cpus(&tr->max_buffer);
1291 #endif
1292         }
1293 }
1294
1295 #define SAVED_CMDLINES_DEFAULT 128
1296 #define NO_CMDLINE_MAP UINT_MAX
1297 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1298 struct saved_cmdlines_buffer {
1299         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1300         unsigned *map_cmdline_to_pid;
1301         unsigned cmdline_num;
1302         int cmdline_idx;
1303         char *saved_cmdlines;
1304 };
1305 static struct saved_cmdlines_buffer *savedcmd;
1306
1307 /* temporary disable recording */
1308 static atomic_t trace_record_cmdline_disabled __read_mostly;
1309
1310 static inline char *get_saved_cmdlines(int idx)
1311 {
1312         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1313 }
1314
1315 static inline void set_cmdline(int idx, const char *cmdline)
1316 {
1317         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1318 }
1319
1320 static int allocate_cmdlines_buffer(unsigned int val,
1321                                     struct saved_cmdlines_buffer *s)
1322 {
1323         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1324                                         GFP_KERNEL);
1325         if (!s->map_cmdline_to_pid)
1326                 return -ENOMEM;
1327
1328         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1329         if (!s->saved_cmdlines) {
1330                 kfree(s->map_cmdline_to_pid);
1331                 return -ENOMEM;
1332         }
1333
1334         s->cmdline_idx = 0;
1335         s->cmdline_num = val;
1336         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1337                sizeof(s->map_pid_to_cmdline));
1338         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1339                val * sizeof(*s->map_cmdline_to_pid));
1340
1341         return 0;
1342 }
1343
1344 static int trace_create_savedcmd(void)
1345 {
1346         int ret;
1347
1348         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1349         if (!savedcmd)
1350                 return -ENOMEM;
1351
1352         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1353         if (ret < 0) {
1354                 kfree(savedcmd);
1355                 savedcmd = NULL;
1356                 return -ENOMEM;
1357         }
1358
1359         return 0;
1360 }
1361
1362 int is_tracing_stopped(void)
1363 {
1364         return global_trace.stop_count;
1365 }
1366
1367 /**
1368  * tracing_start - quick start of the tracer
1369  *
1370  * If tracing is enabled but was stopped by tracing_stop,
1371  * this will start the tracer back up.
1372  */
1373 void tracing_start(void)
1374 {
1375         struct ring_buffer *buffer;
1376         unsigned long flags;
1377
1378         if (tracing_disabled)
1379                 return;
1380
1381         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1382         if (--global_trace.stop_count) {
1383                 if (global_trace.stop_count < 0) {
1384                         /* Someone screwed up their debugging */
1385                         WARN_ON_ONCE(1);
1386                         global_trace.stop_count = 0;
1387                 }
1388                 goto out;
1389         }
1390
1391         /* Prevent the buffers from switching */
1392         arch_spin_lock(&global_trace.max_lock);
1393
1394         buffer = global_trace.trace_buffer.buffer;
1395         if (buffer)
1396                 ring_buffer_record_enable(buffer);
1397
1398 #ifdef CONFIG_TRACER_MAX_TRACE
1399         buffer = global_trace.max_buffer.buffer;
1400         if (buffer)
1401                 ring_buffer_record_enable(buffer);
1402 #endif
1403
1404         arch_spin_unlock(&global_trace.max_lock);
1405
1406  out:
1407         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1408 }
1409
1410 static void tracing_start_tr(struct trace_array *tr)
1411 {
1412         struct ring_buffer *buffer;
1413         unsigned long flags;
1414
1415         if (tracing_disabled)
1416                 return;
1417
1418         /* If global, we need to also start the max tracer */
1419         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1420                 return tracing_start();
1421
1422         raw_spin_lock_irqsave(&tr->start_lock, flags);
1423
1424         if (--tr->stop_count) {
1425                 if (tr->stop_count < 0) {
1426                         /* Someone screwed up their debugging */
1427                         WARN_ON_ONCE(1);
1428                         tr->stop_count = 0;
1429                 }
1430                 goto out;
1431         }
1432
1433         buffer = tr->trace_buffer.buffer;
1434         if (buffer)
1435                 ring_buffer_record_enable(buffer);
1436
1437  out:
1438         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1439 }
1440
1441 /**
1442  * tracing_stop - quick stop of the tracer
1443  *
1444  * Light weight way to stop tracing. Use in conjunction with
1445  * tracing_start.
1446  */
1447 void tracing_stop(void)
1448 {
1449         struct ring_buffer *buffer;
1450         unsigned long flags;
1451
1452         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1453         if (global_trace.stop_count++)
1454                 goto out;
1455
1456         /* Prevent the buffers from switching */
1457         arch_spin_lock(&global_trace.max_lock);
1458
1459         buffer = global_trace.trace_buffer.buffer;
1460         if (buffer)
1461                 ring_buffer_record_disable(buffer);
1462
1463 #ifdef CONFIG_TRACER_MAX_TRACE
1464         buffer = global_trace.max_buffer.buffer;
1465         if (buffer)
1466                 ring_buffer_record_disable(buffer);
1467 #endif
1468
1469         arch_spin_unlock(&global_trace.max_lock);
1470
1471  out:
1472         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1473 }
1474
1475 static void tracing_stop_tr(struct trace_array *tr)
1476 {
1477         struct ring_buffer *buffer;
1478         unsigned long flags;
1479
1480         /* If global, we need to also stop the max tracer */
1481         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1482                 return tracing_stop();
1483
1484         raw_spin_lock_irqsave(&tr->start_lock, flags);
1485         if (tr->stop_count++)
1486                 goto out;
1487
1488         buffer = tr->trace_buffer.buffer;
1489         if (buffer)
1490                 ring_buffer_record_disable(buffer);
1491
1492  out:
1493         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1494 }
1495
1496 void trace_stop_cmdline_recording(void);
1497
1498 static int trace_save_cmdline(struct task_struct *tsk)
1499 {
1500         unsigned pid, idx;
1501
1502         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1503                 return 0;
1504
1505         /*
1506          * It's not the end of the world if we don't get
1507          * the lock, but we also don't want to spin
1508          * nor do we want to disable interrupts,
1509          * so if we miss here, then better luck next time.
1510          */
1511         if (!arch_spin_trylock(&trace_cmdline_lock))
1512                 return 0;
1513
1514         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1515         if (idx == NO_CMDLINE_MAP) {
1516                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1517
1518                 /*
1519                  * Check whether the cmdline buffer at idx has a pid
1520                  * mapped. We are going to overwrite that entry so we
1521                  * need to clear the map_pid_to_cmdline. Otherwise we
1522                  * would read the new comm for the old pid.
1523                  */
1524                 pid = savedcmd->map_cmdline_to_pid[idx];
1525                 if (pid != NO_CMDLINE_MAP)
1526                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1527
1528                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1529                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1530
1531                 savedcmd->cmdline_idx = idx;
1532         }
1533
1534         set_cmdline(idx, tsk->comm);
1535
1536         arch_spin_unlock(&trace_cmdline_lock);
1537
1538         return 1;
1539 }
1540
1541 static void __trace_find_cmdline(int pid, char comm[])
1542 {
1543         unsigned map;
1544
1545         if (!pid) {
1546                 strcpy(comm, "<idle>");
1547                 return;
1548         }
1549
1550         if (WARN_ON_ONCE(pid < 0)) {
1551                 strcpy(comm, "<XXX>");
1552                 return;
1553         }
1554
1555         if (pid > PID_MAX_DEFAULT) {
1556                 strcpy(comm, "<...>");
1557                 return;
1558         }
1559
1560         map = savedcmd->map_pid_to_cmdline[pid];
1561         if (map != NO_CMDLINE_MAP)
1562                 strcpy(comm, get_saved_cmdlines(map));
1563         else
1564                 strcpy(comm, "<...>");
1565 }
1566
1567 void trace_find_cmdline(int pid, char comm[])
1568 {
1569         preempt_disable();
1570         arch_spin_lock(&trace_cmdline_lock);
1571
1572         __trace_find_cmdline(pid, comm);
1573
1574         arch_spin_unlock(&trace_cmdline_lock);
1575         preempt_enable();
1576 }
1577
1578 void tracing_record_cmdline(struct task_struct *tsk)
1579 {
1580         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1581                 return;
1582
1583         if (!__this_cpu_read(trace_cmdline_save))
1584                 return;
1585
1586         if (trace_save_cmdline(tsk))
1587                 __this_cpu_write(trace_cmdline_save, false);
1588 }
1589
1590 void
1591 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1592                              int pc)
1593 {
1594         struct task_struct *tsk = current;
1595
1596         entry->preempt_count            = pc & 0xff;
1597         entry->pid                      = (tsk) ? tsk->pid : 0;
1598         entry->flags =
1599 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1600                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1601 #else
1602                 TRACE_FLAG_IRQS_NOSUPPORT |
1603 #endif
1604                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1605                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1606                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1607                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1608 }
1609 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1610
1611 struct ring_buffer_event *
1612 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1613                           int type,
1614                           unsigned long len,
1615                           unsigned long flags, int pc)
1616 {
1617         struct ring_buffer_event *event;
1618
1619         event = ring_buffer_lock_reserve(buffer, len);
1620         if (event != NULL) {
1621                 struct trace_entry *ent = ring_buffer_event_data(event);
1622
1623                 tracing_generic_entry_update(ent, flags, pc);
1624                 ent->type = type;
1625         }
1626
1627         return event;
1628 }
1629
1630 void
1631 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1632 {
1633         __this_cpu_write(trace_cmdline_save, true);
1634         ring_buffer_unlock_commit(buffer, event);
1635 }
1636
1637 static inline void
1638 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1639                              struct ring_buffer_event *event,
1640                              unsigned long flags, int pc)
1641 {
1642         __buffer_unlock_commit(buffer, event);
1643
1644         ftrace_trace_stack(buffer, flags, 6, pc);
1645         ftrace_trace_userstack(buffer, flags, pc);
1646 }
1647
1648 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1649                                 struct ring_buffer_event *event,
1650                                 unsigned long flags, int pc)
1651 {
1652         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1653 }
1654 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1655
1656 static struct ring_buffer *temp_buffer;
1657
1658 struct ring_buffer_event *
1659 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1660                           struct ftrace_event_file *ftrace_file,
1661                           int type, unsigned long len,
1662                           unsigned long flags, int pc)
1663 {
1664         struct ring_buffer_event *entry;
1665
1666         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1667         entry = trace_buffer_lock_reserve(*current_rb,
1668                                          type, len, flags, pc);
1669         /*
1670          * If tracing is off, but we have triggers enabled
1671          * we still need to look at the event data. Use the temp_buffer
1672          * to store the trace event for the tigger to use. It's recusive
1673          * safe and will not be recorded anywhere.
1674          */
1675         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1676                 *current_rb = temp_buffer;
1677                 entry = trace_buffer_lock_reserve(*current_rb,
1678                                                   type, len, flags, pc);
1679         }
1680         return entry;
1681 }
1682 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1683
1684 struct ring_buffer_event *
1685 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1686                                   int type, unsigned long len,
1687                                   unsigned long flags, int pc)
1688 {
1689         *current_rb = global_trace.trace_buffer.buffer;
1690         return trace_buffer_lock_reserve(*current_rb,
1691                                          type, len, flags, pc);
1692 }
1693 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1694
1695 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1696                                         struct ring_buffer_event *event,
1697                                         unsigned long flags, int pc)
1698 {
1699         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1700 }
1701 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1702
1703 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1704                                      struct ring_buffer_event *event,
1705                                      unsigned long flags, int pc,
1706                                      struct pt_regs *regs)
1707 {
1708         __buffer_unlock_commit(buffer, event);
1709
1710         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1711         ftrace_trace_userstack(buffer, flags, pc);
1712 }
1713 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1714
1715 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1716                                          struct ring_buffer_event *event)
1717 {
1718         ring_buffer_discard_commit(buffer, event);
1719 }
1720 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1721
1722 void
1723 trace_function(struct trace_array *tr,
1724                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1725                int pc)
1726 {
1727         struct ftrace_event_call *call = &event_function;
1728         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1729         struct ring_buffer_event *event;
1730         struct ftrace_entry *entry;
1731
1732         /* If we are reading the ring buffer, don't trace */
1733         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1734                 return;
1735
1736         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1737                                           flags, pc);
1738         if (!event)
1739                 return;
1740         entry   = ring_buffer_event_data(event);
1741         entry->ip                       = ip;
1742         entry->parent_ip                = parent_ip;
1743
1744         if (!call_filter_check_discard(call, entry, buffer, event))
1745                 __buffer_unlock_commit(buffer, event);
1746 }
1747
1748 #ifdef CONFIG_STACKTRACE
1749
1750 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1751 struct ftrace_stack {
1752         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1753 };
1754
1755 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1756 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1757
1758 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1759                                  unsigned long flags,
1760                                  int skip, int pc, struct pt_regs *regs)
1761 {
1762         struct ftrace_event_call *call = &event_kernel_stack;
1763         struct ring_buffer_event *event;
1764         struct stack_entry *entry;
1765         struct stack_trace trace;
1766         int use_stack;
1767         int size = FTRACE_STACK_ENTRIES;
1768
1769         trace.nr_entries        = 0;
1770         trace.skip              = skip;
1771
1772         /*
1773          * Since events can happen in NMIs there's no safe way to
1774          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1775          * or NMI comes in, it will just have to use the default
1776          * FTRACE_STACK_SIZE.
1777          */
1778         preempt_disable_notrace();
1779
1780         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1781         /*
1782          * We don't need any atomic variables, just a barrier.
1783          * If an interrupt comes in, we don't care, because it would
1784          * have exited and put the counter back to what we want.
1785          * We just need a barrier to keep gcc from moving things
1786          * around.
1787          */
1788         barrier();
1789         if (use_stack == 1) {
1790                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1791                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1792
1793                 if (regs)
1794                         save_stack_trace_regs(regs, &trace);
1795                 else
1796                         save_stack_trace(&trace);
1797
1798                 if (trace.nr_entries > size)
1799                         size = trace.nr_entries;
1800         } else
1801                 /* From now on, use_stack is a boolean */
1802                 use_stack = 0;
1803
1804         size *= sizeof(unsigned long);
1805
1806         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1807                                           sizeof(*entry) + size, flags, pc);
1808         if (!event)
1809                 goto out;
1810         entry = ring_buffer_event_data(event);
1811
1812         memset(&entry->caller, 0, size);
1813
1814         if (use_stack)
1815                 memcpy(&entry->caller, trace.entries,
1816                        trace.nr_entries * sizeof(unsigned long));
1817         else {
1818                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1819                 trace.entries           = entry->caller;
1820                 if (regs)
1821                         save_stack_trace_regs(regs, &trace);
1822                 else
1823                         save_stack_trace(&trace);
1824         }
1825
1826         entry->size = trace.nr_entries;
1827
1828         if (!call_filter_check_discard(call, entry, buffer, event))
1829                 __buffer_unlock_commit(buffer, event);
1830
1831  out:
1832         /* Again, don't let gcc optimize things here */
1833         barrier();
1834         __this_cpu_dec(ftrace_stack_reserve);
1835         preempt_enable_notrace();
1836
1837 }
1838
1839 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1840                              int skip, int pc, struct pt_regs *regs)
1841 {
1842         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1843                 return;
1844
1845         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1846 }
1847
1848 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1849                         int skip, int pc)
1850 {
1851         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1852                 return;
1853
1854         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1855 }
1856
1857 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1858                    int pc)
1859 {
1860         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1861 }
1862
1863 /**
1864  * trace_dump_stack - record a stack back trace in the trace buffer
1865  * @skip: Number of functions to skip (helper handlers)
1866  */
1867 void trace_dump_stack(int skip)
1868 {
1869         unsigned long flags;
1870
1871         if (tracing_disabled || tracing_selftest_running)
1872                 return;
1873
1874         local_save_flags(flags);
1875
1876         /*
1877          * Skip 3 more, seems to get us at the caller of
1878          * this function.
1879          */
1880         skip += 3;
1881         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1882                              flags, skip, preempt_count(), NULL);
1883 }
1884
1885 static DEFINE_PER_CPU(int, user_stack_count);
1886
1887 void
1888 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1889 {
1890         struct ftrace_event_call *call = &event_user_stack;
1891         struct ring_buffer_event *event;
1892         struct userstack_entry *entry;
1893         struct stack_trace trace;
1894
1895         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1896                 return;
1897
1898         /*
1899          * NMIs can not handle page faults, even with fix ups.
1900          * The save user stack can (and often does) fault.
1901          */
1902         if (unlikely(in_nmi()))
1903                 return;
1904
1905         /*
1906          * prevent recursion, since the user stack tracing may
1907          * trigger other kernel events.
1908          */
1909         preempt_disable();
1910         if (__this_cpu_read(user_stack_count))
1911                 goto out;
1912
1913         __this_cpu_inc(user_stack_count);
1914
1915         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1916                                           sizeof(*entry), flags, pc);
1917         if (!event)
1918                 goto out_drop_count;
1919         entry   = ring_buffer_event_data(event);
1920
1921         entry->tgid             = current->tgid;
1922         memset(&entry->caller, 0, sizeof(entry->caller));
1923
1924         trace.nr_entries        = 0;
1925         trace.max_entries       = FTRACE_STACK_ENTRIES;
1926         trace.skip              = 0;
1927         trace.entries           = entry->caller;
1928
1929         save_stack_trace_user(&trace);
1930         if (!call_filter_check_discard(call, entry, buffer, event))
1931                 __buffer_unlock_commit(buffer, event);
1932
1933  out_drop_count:
1934         __this_cpu_dec(user_stack_count);
1935  out:
1936         preempt_enable();
1937 }
1938
1939 #ifdef UNUSED
1940 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1941 {
1942         ftrace_trace_userstack(tr, flags, preempt_count());
1943 }
1944 #endif /* UNUSED */
1945
1946 #endif /* CONFIG_STACKTRACE */
1947
1948 /* created for use with alloc_percpu */
1949 struct trace_buffer_struct {
1950         char buffer[TRACE_BUF_SIZE];
1951 };
1952
1953 static struct trace_buffer_struct *trace_percpu_buffer;
1954 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1955 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1956 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1957
1958 /*
1959  * The buffer used is dependent on the context. There is a per cpu
1960  * buffer for normal context, softirq contex, hard irq context and
1961  * for NMI context. Thise allows for lockless recording.
1962  *
1963  * Note, if the buffers failed to be allocated, then this returns NULL
1964  */
1965 static char *get_trace_buf(void)
1966 {
1967         struct trace_buffer_struct *percpu_buffer;
1968
1969         /*
1970          * If we have allocated per cpu buffers, then we do not
1971          * need to do any locking.
1972          */
1973         if (in_nmi())
1974                 percpu_buffer = trace_percpu_nmi_buffer;
1975         else if (in_irq())
1976                 percpu_buffer = trace_percpu_irq_buffer;
1977         else if (in_softirq())
1978                 percpu_buffer = trace_percpu_sirq_buffer;
1979         else
1980                 percpu_buffer = trace_percpu_buffer;
1981
1982         if (!percpu_buffer)
1983                 return NULL;
1984
1985         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1986 }
1987
1988 static int alloc_percpu_trace_buffer(void)
1989 {
1990         struct trace_buffer_struct *buffers;
1991         struct trace_buffer_struct *sirq_buffers;
1992         struct trace_buffer_struct *irq_buffers;
1993         struct trace_buffer_struct *nmi_buffers;
1994
1995         buffers = alloc_percpu(struct trace_buffer_struct);
1996         if (!buffers)
1997                 goto err_warn;
1998
1999         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2000         if (!sirq_buffers)
2001                 goto err_sirq;
2002
2003         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2004         if (!irq_buffers)
2005                 goto err_irq;
2006
2007         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2008         if (!nmi_buffers)
2009                 goto err_nmi;
2010
2011         trace_percpu_buffer = buffers;
2012         trace_percpu_sirq_buffer = sirq_buffers;
2013         trace_percpu_irq_buffer = irq_buffers;
2014         trace_percpu_nmi_buffer = nmi_buffers;
2015
2016         return 0;
2017
2018  err_nmi:
2019         free_percpu(irq_buffers);
2020  err_irq:
2021         free_percpu(sirq_buffers);
2022  err_sirq:
2023         free_percpu(buffers);
2024  err_warn:
2025         WARN(1, "Could not allocate percpu trace_printk buffer");
2026         return -ENOMEM;
2027 }
2028
2029 static int buffers_allocated;
2030
2031 void trace_printk_init_buffers(void)
2032 {
2033         if (buffers_allocated)
2034                 return;
2035
2036         if (alloc_percpu_trace_buffer())
2037                 return;
2038
2039         /* trace_printk() is for debug use only. Don't use it in production. */
2040
2041         pr_warning("\n");
2042         pr_warning("**********************************************************\n");
2043         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2044         pr_warning("**                                                      **\n");
2045         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2046         pr_warning("**                                                      **\n");
2047         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2048         pr_warning("** unsafe for production use.                           **\n");
2049         pr_warning("**                                                      **\n");
2050         pr_warning("** If you see this message and you are not debugging    **\n");
2051         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2052         pr_warning("**                                                      **\n");
2053         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2054         pr_warning("**********************************************************\n");
2055
2056         /* Expand the buffers to set size */
2057         tracing_update_buffers();
2058
2059         buffers_allocated = 1;
2060
2061         /*
2062          * trace_printk_init_buffers() can be called by modules.
2063          * If that happens, then we need to start cmdline recording
2064          * directly here. If the global_trace.buffer is already
2065          * allocated here, then this was called by module code.
2066          */
2067         if (global_trace.trace_buffer.buffer)
2068                 tracing_start_cmdline_record();
2069 }
2070
2071 void trace_printk_start_comm(void)
2072 {
2073         /* Start tracing comms if trace printk is set */
2074         if (!buffers_allocated)
2075                 return;
2076         tracing_start_cmdline_record();
2077 }
2078
2079 static void trace_printk_start_stop_comm(int enabled)
2080 {
2081         if (!buffers_allocated)
2082                 return;
2083
2084         if (enabled)
2085                 tracing_start_cmdline_record();
2086         else
2087                 tracing_stop_cmdline_record();
2088 }
2089
2090 /**
2091  * trace_vbprintk - write binary msg to tracing buffer
2092  *
2093  */
2094 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2095 {
2096         struct ftrace_event_call *call = &event_bprint;
2097         struct ring_buffer_event *event;
2098         struct ring_buffer *buffer;
2099         struct trace_array *tr = &global_trace;
2100         struct bprint_entry *entry;
2101         unsigned long flags;
2102         char *tbuffer;
2103         int len = 0, size, pc;
2104
2105         if (unlikely(tracing_selftest_running || tracing_disabled))
2106                 return 0;
2107
2108         /* Don't pollute graph traces with trace_vprintk internals */
2109         pause_graph_tracing();
2110
2111         pc = preempt_count();
2112         preempt_disable_notrace();
2113
2114         tbuffer = get_trace_buf();
2115         if (!tbuffer) {
2116                 len = 0;
2117                 goto out;
2118         }
2119
2120         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2121
2122         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2123                 goto out;
2124
2125         local_save_flags(flags);
2126         size = sizeof(*entry) + sizeof(u32) * len;
2127         buffer = tr->trace_buffer.buffer;
2128         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2129                                           flags, pc);
2130         if (!event)
2131                 goto out;
2132         entry = ring_buffer_event_data(event);
2133         entry->ip                       = ip;
2134         entry->fmt                      = fmt;
2135
2136         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2137         if (!call_filter_check_discard(call, entry, buffer, event)) {
2138                 __buffer_unlock_commit(buffer, event);
2139                 ftrace_trace_stack(buffer, flags, 6, pc);
2140         }
2141
2142 out:
2143         preempt_enable_notrace();
2144         unpause_graph_tracing();
2145
2146         return len;
2147 }
2148 EXPORT_SYMBOL_GPL(trace_vbprintk);
2149
2150 static int
2151 __trace_array_vprintk(struct ring_buffer *buffer,
2152                       unsigned long ip, const char *fmt, va_list args)
2153 {
2154         struct ftrace_event_call *call = &event_print;
2155         struct ring_buffer_event *event;
2156         int len = 0, size, pc;
2157         struct print_entry *entry;
2158         unsigned long flags;
2159         char *tbuffer;
2160
2161         if (tracing_disabled || tracing_selftest_running)
2162                 return 0;
2163
2164         /* Don't pollute graph traces with trace_vprintk internals */
2165         pause_graph_tracing();
2166
2167         pc = preempt_count();
2168         preempt_disable_notrace();
2169
2170
2171         tbuffer = get_trace_buf();
2172         if (!tbuffer) {
2173                 len = 0;
2174                 goto out;
2175         }
2176
2177         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2178
2179         local_save_flags(flags);
2180         size = sizeof(*entry) + len + 1;
2181         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2182                                           flags, pc);
2183         if (!event)
2184                 goto out;
2185         entry = ring_buffer_event_data(event);
2186         entry->ip = ip;
2187
2188         memcpy(&entry->buf, tbuffer, len + 1);
2189         if (!call_filter_check_discard(call, entry, buffer, event)) {
2190                 __buffer_unlock_commit(buffer, event);
2191                 ftrace_trace_stack(buffer, flags, 6, pc);
2192         }
2193  out:
2194         preempt_enable_notrace();
2195         unpause_graph_tracing();
2196
2197         return len;
2198 }
2199
2200 int trace_array_vprintk(struct trace_array *tr,
2201                         unsigned long ip, const char *fmt, va_list args)
2202 {
2203         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2204 }
2205
2206 int trace_array_printk(struct trace_array *tr,
2207                        unsigned long ip, const char *fmt, ...)
2208 {
2209         int ret;
2210         va_list ap;
2211
2212         if (!(trace_flags & TRACE_ITER_PRINTK))
2213                 return 0;
2214
2215         va_start(ap, fmt);
2216         ret = trace_array_vprintk(tr, ip, fmt, ap);
2217         va_end(ap);
2218         return ret;
2219 }
2220
2221 int trace_array_printk_buf(struct ring_buffer *buffer,
2222                            unsigned long ip, const char *fmt, ...)
2223 {
2224         int ret;
2225         va_list ap;
2226
2227         if (!(trace_flags & TRACE_ITER_PRINTK))
2228                 return 0;
2229
2230         va_start(ap, fmt);
2231         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2232         va_end(ap);
2233         return ret;
2234 }
2235
2236 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2237 {
2238         return trace_array_vprintk(&global_trace, ip, fmt, args);
2239 }
2240 EXPORT_SYMBOL_GPL(trace_vprintk);
2241
2242 static void trace_iterator_increment(struct trace_iterator *iter)
2243 {
2244         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2245
2246         iter->idx++;
2247         if (buf_iter)
2248                 ring_buffer_read(buf_iter, NULL);
2249 }
2250
2251 static struct trace_entry *
2252 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2253                 unsigned long *lost_events)
2254 {
2255         struct ring_buffer_event *event;
2256         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2257
2258         if (buf_iter)
2259                 event = ring_buffer_iter_peek(buf_iter, ts);
2260         else
2261                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2262                                          lost_events);
2263
2264         if (event) {
2265                 iter->ent_size = ring_buffer_event_length(event);
2266                 return ring_buffer_event_data(event);
2267         }
2268         iter->ent_size = 0;
2269         return NULL;
2270 }
2271
2272 static struct trace_entry *
2273 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2274                   unsigned long *missing_events, u64 *ent_ts)
2275 {
2276         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2277         struct trace_entry *ent, *next = NULL;
2278         unsigned long lost_events = 0, next_lost = 0;
2279         int cpu_file = iter->cpu_file;
2280         u64 next_ts = 0, ts;
2281         int next_cpu = -1;
2282         int next_size = 0;
2283         int cpu;
2284
2285         /*
2286          * If we are in a per_cpu trace file, don't bother by iterating over
2287          * all cpu and peek directly.
2288          */
2289         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2290                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2291                         return NULL;
2292                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2293                 if (ent_cpu)
2294                         *ent_cpu = cpu_file;
2295
2296                 return ent;
2297         }
2298
2299         for_each_tracing_cpu(cpu) {
2300
2301                 if (ring_buffer_empty_cpu(buffer, cpu))
2302                         continue;
2303
2304                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2305
2306                 /*
2307                  * Pick the entry with the smallest timestamp:
2308                  */
2309                 if (ent && (!next || ts < next_ts)) {
2310                         next = ent;
2311                         next_cpu = cpu;
2312                         next_ts = ts;
2313                         next_lost = lost_events;
2314                         next_size = iter->ent_size;
2315                 }
2316         }
2317
2318         iter->ent_size = next_size;
2319
2320         if (ent_cpu)
2321                 *ent_cpu = next_cpu;
2322
2323         if (ent_ts)
2324                 *ent_ts = next_ts;
2325
2326         if (missing_events)
2327                 *missing_events = next_lost;
2328
2329         return next;
2330 }
2331
2332 /* Find the next real entry, without updating the iterator itself */
2333 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2334                                           int *ent_cpu, u64 *ent_ts)
2335 {
2336         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2337 }
2338
2339 /* Find the next real entry, and increment the iterator to the next entry */
2340 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2341 {
2342         iter->ent = __find_next_entry(iter, &iter->cpu,
2343                                       &iter->lost_events, &iter->ts);
2344
2345         if (iter->ent)
2346                 trace_iterator_increment(iter);
2347
2348         return iter->ent ? iter : NULL;
2349 }
2350
2351 static void trace_consume(struct trace_iterator *iter)
2352 {
2353         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2354                             &iter->lost_events);
2355 }
2356
2357 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2358 {
2359         struct trace_iterator *iter = m->private;
2360         int i = (int)*pos;
2361         void *ent;
2362
2363         WARN_ON_ONCE(iter->leftover);
2364
2365         (*pos)++;
2366
2367         /* can't go backwards */
2368         if (iter->idx > i)
2369                 return NULL;
2370
2371         if (iter->idx < 0)
2372                 ent = trace_find_next_entry_inc(iter);
2373         else
2374                 ent = iter;
2375
2376         while (ent && iter->idx < i)
2377                 ent = trace_find_next_entry_inc(iter);
2378
2379         iter->pos = *pos;
2380
2381         return ent;
2382 }
2383
2384 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2385 {
2386         struct ring_buffer_event *event;
2387         struct ring_buffer_iter *buf_iter;
2388         unsigned long entries = 0;
2389         u64 ts;
2390
2391         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2392
2393         buf_iter = trace_buffer_iter(iter, cpu);
2394         if (!buf_iter)
2395                 return;
2396
2397         ring_buffer_iter_reset(buf_iter);
2398
2399         /*
2400          * We could have the case with the max latency tracers
2401          * that a reset never took place on a cpu. This is evident
2402          * by the timestamp being before the start of the buffer.
2403          */
2404         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2405                 if (ts >= iter->trace_buffer->time_start)
2406                         break;
2407                 entries++;
2408                 ring_buffer_read(buf_iter, NULL);
2409         }
2410
2411         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2412 }
2413
2414 /*
2415  * The current tracer is copied to avoid a global locking
2416  * all around.
2417  */
2418 static void *s_start(struct seq_file *m, loff_t *pos)
2419 {
2420         struct trace_iterator *iter = m->private;
2421         struct trace_array *tr = iter->tr;
2422         int cpu_file = iter->cpu_file;
2423         void *p = NULL;
2424         loff_t l = 0;
2425         int cpu;
2426
2427         /*
2428          * copy the tracer to avoid using a global lock all around.
2429          * iter->trace is a copy of current_trace, the pointer to the
2430          * name may be used instead of a strcmp(), as iter->trace->name
2431          * will point to the same string as current_trace->name.
2432          */
2433         mutex_lock(&trace_types_lock);
2434         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2435                 *iter->trace = *tr->current_trace;
2436         mutex_unlock(&trace_types_lock);
2437
2438 #ifdef CONFIG_TRACER_MAX_TRACE
2439         if (iter->snapshot && iter->trace->use_max_tr)
2440                 return ERR_PTR(-EBUSY);
2441 #endif
2442
2443         if (!iter->snapshot)
2444                 atomic_inc(&trace_record_cmdline_disabled);
2445
2446         if (*pos != iter->pos) {
2447                 iter->ent = NULL;
2448                 iter->cpu = 0;
2449                 iter->idx = -1;
2450
2451                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2452                         for_each_tracing_cpu(cpu)
2453                                 tracing_iter_reset(iter, cpu);
2454                 } else
2455                         tracing_iter_reset(iter, cpu_file);
2456
2457                 iter->leftover = 0;
2458                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2459                         ;
2460
2461         } else {
2462                 /*
2463                  * If we overflowed the seq_file before, then we want
2464                  * to just reuse the trace_seq buffer again.
2465                  */
2466                 if (iter->leftover)
2467                         p = iter;
2468                 else {
2469                         l = *pos - 1;
2470                         p = s_next(m, p, &l);
2471                 }
2472         }
2473
2474         trace_event_read_lock();
2475         trace_access_lock(cpu_file);
2476         return p;
2477 }
2478
2479 static void s_stop(struct seq_file *m, void *p)
2480 {
2481         struct trace_iterator *iter = m->private;
2482
2483 #ifdef CONFIG_TRACER_MAX_TRACE
2484         if (iter->snapshot && iter->trace->use_max_tr)
2485                 return;
2486 #endif
2487
2488         if (!iter->snapshot)
2489                 atomic_dec(&trace_record_cmdline_disabled);
2490
2491         trace_access_unlock(iter->cpu_file);
2492         trace_event_read_unlock();
2493 }
2494
2495 static void
2496 get_total_entries(struct trace_buffer *buf,
2497                   unsigned long *total, unsigned long *entries)
2498 {
2499         unsigned long count;
2500         int cpu;
2501
2502         *total = 0;
2503         *entries = 0;
2504
2505         for_each_tracing_cpu(cpu) {
2506                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2507                 /*
2508                  * If this buffer has skipped entries, then we hold all
2509                  * entries for the trace and we need to ignore the
2510                  * ones before the time stamp.
2511                  */
2512                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2513                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2514                         /* total is the same as the entries */
2515                         *total += count;
2516                 } else
2517                         *total += count +
2518                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2519                 *entries += count;
2520         }
2521 }
2522
2523 static void print_lat_help_header(struct seq_file *m)
2524 {
2525         seq_puts(m, "#                  _------=> CPU#            \n"
2526                     "#                 / _-----=> irqs-off        \n"
2527                     "#                | / _----=> need-resched    \n"
2528                     "#                || / _---=> hardirq/softirq \n"
2529                     "#                ||| / _--=> preempt-depth   \n"
2530                     "#                |||| /     delay            \n"
2531                     "#  cmd     pid   ||||| time  |   caller      \n"
2532                     "#     \\   /      |||||  \\    |   /         \n");
2533 }
2534
2535 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2536 {
2537         unsigned long total;
2538         unsigned long entries;
2539
2540         get_total_entries(buf, &total, &entries);
2541         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2542                    entries, total, num_online_cpus());
2543         seq_puts(m, "#\n");
2544 }
2545
2546 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2547 {
2548         print_event_info(buf, m);
2549         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2550                     "#              | |       |          |         |\n");
2551 }
2552
2553 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2554 {
2555         print_event_info(buf, m);
2556         seq_puts(m, "#                              _-----=> irqs-off\n"
2557                     "#                             / _----=> need-resched\n"
2558                     "#                            | / _---=> hardirq/softirq\n"
2559                     "#                            || / _--=> preempt-depth\n"
2560                     "#                            ||| /     delay\n"
2561                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2562                     "#              | |       |   ||||       |         |\n");
2563 }
2564
2565 void
2566 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2567 {
2568         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2569         struct trace_buffer *buf = iter->trace_buffer;
2570         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2571         struct tracer *type = iter->trace;
2572         unsigned long entries;
2573         unsigned long total;
2574         const char *name = "preemption";
2575
2576         name = type->name;
2577
2578         get_total_entries(buf, &total, &entries);
2579
2580         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2581                    name, UTS_RELEASE);
2582         seq_puts(m, "# -----------------------------------"
2583                  "---------------------------------\n");
2584         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2585                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2586                    nsecs_to_usecs(data->saved_latency),
2587                    entries,
2588                    total,
2589                    buf->cpu,
2590 #if defined(CONFIG_PREEMPT_NONE)
2591                    "server",
2592 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2593                    "desktop",
2594 #elif defined(CONFIG_PREEMPT)
2595                    "preempt",
2596 #else
2597                    "unknown",
2598 #endif
2599                    /* These are reserved for later use */
2600                    0, 0, 0, 0);
2601 #ifdef CONFIG_SMP
2602         seq_printf(m, " #P:%d)\n", num_online_cpus());
2603 #else
2604         seq_puts(m, ")\n");
2605 #endif
2606         seq_puts(m, "#    -----------------\n");
2607         seq_printf(m, "#    | task: %.16s-%d "
2608                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2609                    data->comm, data->pid,
2610                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2611                    data->policy, data->rt_priority);
2612         seq_puts(m, "#    -----------------\n");
2613
2614         if (data->critical_start) {
2615                 seq_puts(m, "#  => started at: ");
2616                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2617                 trace_print_seq(m, &iter->seq);
2618                 seq_puts(m, "\n#  => ended at:   ");
2619                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2620                 trace_print_seq(m, &iter->seq);
2621                 seq_puts(m, "\n#\n");
2622         }
2623
2624         seq_puts(m, "#\n");
2625 }
2626
2627 static void test_cpu_buff_start(struct trace_iterator *iter)
2628 {
2629         struct trace_seq *s = &iter->seq;
2630
2631         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2632                 return;
2633
2634         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2635                 return;
2636
2637         if (cpumask_test_cpu(iter->cpu, iter->started))
2638                 return;
2639
2640         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2641                 return;
2642
2643         cpumask_set_cpu(iter->cpu, iter->started);
2644
2645         /* Don't print started cpu buffer for the first entry of the trace */
2646         if (iter->idx > 1)
2647                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2648                                 iter->cpu);
2649 }
2650
2651 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2652 {
2653         struct trace_seq *s = &iter->seq;
2654         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2655         struct trace_entry *entry;
2656         struct trace_event *event;
2657
2658         entry = iter->ent;
2659
2660         test_cpu_buff_start(iter);
2661
2662         event = ftrace_find_event(entry->type);
2663
2664         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2665                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2666                         trace_print_lat_context(iter);
2667                 else
2668                         trace_print_context(iter);
2669         }
2670
2671         if (trace_seq_has_overflowed(s))
2672                 return TRACE_TYPE_PARTIAL_LINE;
2673
2674         if (event)
2675                 return event->funcs->trace(iter, sym_flags, event);
2676
2677         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2678
2679         return trace_handle_return(s);
2680 }
2681
2682 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2683 {
2684         struct trace_seq *s = &iter->seq;
2685         struct trace_entry *entry;
2686         struct trace_event *event;
2687
2688         entry = iter->ent;
2689
2690         if (trace_flags & TRACE_ITER_CONTEXT_INFO)
2691                 trace_seq_printf(s, "%d %d %llu ",
2692                                  entry->pid, iter->cpu, iter->ts);
2693
2694         if (trace_seq_has_overflowed(s))
2695                 return TRACE_TYPE_PARTIAL_LINE;
2696
2697         event = ftrace_find_event(entry->type);
2698         if (event)
2699                 return event->funcs->raw(iter, 0, event);
2700
2701         trace_seq_printf(s, "%d ?\n", entry->type);
2702
2703         return trace_handle_return(s);
2704 }
2705
2706 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2707 {
2708         struct trace_seq *s = &iter->seq;
2709         unsigned char newline = '\n';
2710         struct trace_entry *entry;
2711         struct trace_event *event;
2712
2713         entry = iter->ent;
2714
2715         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2716                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2717                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2718                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2719                 if (trace_seq_has_overflowed(s))
2720                         return TRACE_TYPE_PARTIAL_LINE;
2721         }
2722
2723         event = ftrace_find_event(entry->type);
2724         if (event) {
2725                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2726                 if (ret != TRACE_TYPE_HANDLED)
2727                         return ret;
2728         }
2729
2730         SEQ_PUT_FIELD(s, newline);
2731
2732         return trace_handle_return(s);
2733 }
2734
2735 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2736 {
2737         struct trace_seq *s = &iter->seq;
2738         struct trace_entry *entry;
2739         struct trace_event *event;
2740
2741         entry = iter->ent;
2742
2743         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2744                 SEQ_PUT_FIELD(s, entry->pid);
2745                 SEQ_PUT_FIELD(s, iter->cpu);
2746                 SEQ_PUT_FIELD(s, iter->ts);
2747                 if (trace_seq_has_overflowed(s))
2748                         return TRACE_TYPE_PARTIAL_LINE;
2749         }
2750
2751         event = ftrace_find_event(entry->type);
2752         return event ? event->funcs->binary(iter, 0, event) :
2753                 TRACE_TYPE_HANDLED;
2754 }
2755
2756 int trace_empty(struct trace_iterator *iter)
2757 {
2758         struct ring_buffer_iter *buf_iter;
2759         int cpu;
2760
2761         /* If we are looking at one CPU buffer, only check that one */
2762         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2763                 cpu = iter->cpu_file;
2764                 buf_iter = trace_buffer_iter(iter, cpu);
2765                 if (buf_iter) {
2766                         if (!ring_buffer_iter_empty(buf_iter))
2767                                 return 0;
2768                 } else {
2769                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2770                                 return 0;
2771                 }
2772                 return 1;
2773         }
2774
2775         for_each_tracing_cpu(cpu) {
2776                 buf_iter = trace_buffer_iter(iter, cpu);
2777                 if (buf_iter) {
2778                         if (!ring_buffer_iter_empty(buf_iter))
2779                                 return 0;
2780                 } else {
2781                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2782                                 return 0;
2783                 }
2784         }
2785
2786         return 1;
2787 }
2788
2789 /*  Called with trace_event_read_lock() held. */
2790 enum print_line_t print_trace_line(struct trace_iterator *iter)
2791 {
2792         enum print_line_t ret;
2793
2794         if (iter->lost_events) {
2795                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2796                                  iter->cpu, iter->lost_events);
2797                 if (trace_seq_has_overflowed(&iter->seq))
2798                         return TRACE_TYPE_PARTIAL_LINE;
2799         }
2800
2801         if (iter->trace && iter->trace->print_line) {
2802                 ret = iter->trace->print_line(iter);
2803                 if (ret != TRACE_TYPE_UNHANDLED)
2804                         return ret;
2805         }
2806
2807         if (iter->ent->type == TRACE_BPUTS &&
2808                         trace_flags & TRACE_ITER_PRINTK &&
2809                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2810                 return trace_print_bputs_msg_only(iter);
2811
2812         if (iter->ent->type == TRACE_BPRINT &&
2813                         trace_flags & TRACE_ITER_PRINTK &&
2814                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2815                 return trace_print_bprintk_msg_only(iter);
2816
2817         if (iter->ent->type == TRACE_PRINT &&
2818                         trace_flags & TRACE_ITER_PRINTK &&
2819                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2820                 return trace_print_printk_msg_only(iter);
2821
2822         if (trace_flags & TRACE_ITER_BIN)
2823                 return print_bin_fmt(iter);
2824
2825         if (trace_flags & TRACE_ITER_HEX)
2826                 return print_hex_fmt(iter);
2827
2828         if (trace_flags & TRACE_ITER_RAW)
2829                 return print_raw_fmt(iter);
2830
2831         return print_trace_fmt(iter);
2832 }
2833
2834 void trace_latency_header(struct seq_file *m)
2835 {
2836         struct trace_iterator *iter = m->private;
2837
2838         /* print nothing if the buffers are empty */
2839         if (trace_empty(iter))
2840                 return;
2841
2842         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2843                 print_trace_header(m, iter);
2844
2845         if (!(trace_flags & TRACE_ITER_VERBOSE))
2846                 print_lat_help_header(m);
2847 }
2848
2849 void trace_default_header(struct seq_file *m)
2850 {
2851         struct trace_iterator *iter = m->private;
2852
2853         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2854                 return;
2855
2856         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2857                 /* print nothing if the buffers are empty */
2858                 if (trace_empty(iter))
2859                         return;
2860                 print_trace_header(m, iter);
2861                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2862                         print_lat_help_header(m);
2863         } else {
2864                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2865                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2866                                 print_func_help_header_irq(iter->trace_buffer, m);
2867                         else
2868                                 print_func_help_header(iter->trace_buffer, m);
2869                 }
2870         }
2871 }
2872
2873 static void test_ftrace_alive(struct seq_file *m)
2874 {
2875         if (!ftrace_is_dead())
2876                 return;
2877         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2878                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2879 }
2880
2881 #ifdef CONFIG_TRACER_MAX_TRACE
2882 static void show_snapshot_main_help(struct seq_file *m)
2883 {
2884         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2885                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2886                     "#                      Takes a snapshot of the main buffer.\n"
2887                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2888                     "#                      (Doesn't have to be '2' works with any number that\n"
2889                     "#                       is not a '0' or '1')\n");
2890 }
2891
2892 static void show_snapshot_percpu_help(struct seq_file *m)
2893 {
2894         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2895 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2896         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2897                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2898 #else
2899         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2900                     "#                     Must use main snapshot file to allocate.\n");
2901 #endif
2902         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2903                     "#                      (Doesn't have to be '2' works with any number that\n"
2904                     "#                       is not a '0' or '1')\n");
2905 }
2906
2907 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2908 {
2909         if (iter->tr->allocated_snapshot)
2910                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2911         else
2912                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2913
2914         seq_puts(m, "# Snapshot commands:\n");
2915         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2916                 show_snapshot_main_help(m);
2917         else
2918                 show_snapshot_percpu_help(m);
2919 }
2920 #else
2921 /* Should never be called */
2922 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2923 #endif
2924
2925 static int s_show(struct seq_file *m, void *v)
2926 {
2927         struct trace_iterator *iter = v;
2928         int ret;
2929
2930         if (iter->ent == NULL) {
2931                 if (iter->tr) {
2932                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2933                         seq_puts(m, "#\n");
2934                         test_ftrace_alive(m);
2935                 }
2936                 if (iter->snapshot && trace_empty(iter))
2937                         print_snapshot_help(m, iter);
2938                 else if (iter->trace && iter->trace->print_header)
2939                         iter->trace->print_header(m);
2940                 else
2941                         trace_default_header(m);
2942
2943         } else if (iter->leftover) {
2944                 /*
2945                  * If we filled the seq_file buffer earlier, we
2946                  * want to just show it now.
2947                  */
2948                 ret = trace_print_seq(m, &iter->seq);
2949
2950                 /* ret should this time be zero, but you never know */
2951                 iter->leftover = ret;
2952
2953         } else {
2954                 print_trace_line(iter);
2955                 ret = trace_print_seq(m, &iter->seq);
2956                 /*
2957                  * If we overflow the seq_file buffer, then it will
2958                  * ask us for this data again at start up.
2959                  * Use that instead.
2960                  *  ret is 0 if seq_file write succeeded.
2961                  *        -1 otherwise.
2962                  */
2963                 iter->leftover = ret;
2964         }
2965
2966         return 0;
2967 }
2968
2969 /*
2970  * Should be used after trace_array_get(), trace_types_lock
2971  * ensures that i_cdev was already initialized.
2972  */
2973 static inline int tracing_get_cpu(struct inode *inode)
2974 {
2975         if (inode->i_cdev) /* See trace_create_cpu_file() */
2976                 return (long)inode->i_cdev - 1;
2977         return RING_BUFFER_ALL_CPUS;
2978 }
2979
2980 static const struct seq_operations tracer_seq_ops = {
2981         .start          = s_start,
2982         .next           = s_next,
2983         .stop           = s_stop,
2984         .show           = s_show,
2985 };
2986
2987 static struct trace_iterator *
2988 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2989 {
2990         struct trace_array *tr = inode->i_private;
2991         struct trace_iterator *iter;
2992         int cpu;
2993
2994         if (tracing_disabled)
2995                 return ERR_PTR(-ENODEV);
2996
2997         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2998         if (!iter)
2999                 return ERR_PTR(-ENOMEM);
3000
3001         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
3002                                     GFP_KERNEL);
3003         if (!iter->buffer_iter)
3004                 goto release;
3005
3006         /*
3007          * We make a copy of the current tracer to avoid concurrent
3008          * changes on it while we are reading.
3009          */
3010         mutex_lock(&trace_types_lock);
3011         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3012         if (!iter->trace)
3013                 goto fail;
3014
3015         *iter->trace = *tr->current_trace;
3016
3017         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3018                 goto fail;
3019
3020         iter->tr = tr;
3021
3022 #ifdef CONFIG_TRACER_MAX_TRACE
3023         /* Currently only the top directory has a snapshot */
3024         if (tr->current_trace->print_max || snapshot)
3025                 iter->trace_buffer = &tr->max_buffer;
3026         else
3027 #endif
3028                 iter->trace_buffer = &tr->trace_buffer;
3029         iter->snapshot = snapshot;
3030         iter->pos = -1;
3031         iter->cpu_file = tracing_get_cpu(inode);
3032         mutex_init(&iter->mutex);
3033
3034         /* Notify the tracer early; before we stop tracing. */
3035         if (iter->trace && iter->trace->open)
3036                 iter->trace->open(iter);
3037
3038         /* Annotate start of buffers if we had overruns */
3039         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3040                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3041
3042         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3043         if (trace_clocks[tr->clock_id].in_ns)
3044                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3045
3046         /* stop the trace while dumping if we are not opening "snapshot" */
3047         if (!iter->snapshot)
3048                 tracing_stop_tr(tr);
3049
3050         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3051                 for_each_tracing_cpu(cpu) {
3052                         iter->buffer_iter[cpu] =
3053                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3054                 }
3055                 ring_buffer_read_prepare_sync();
3056                 for_each_tracing_cpu(cpu) {
3057                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3058                         tracing_iter_reset(iter, cpu);
3059                 }
3060         } else {
3061                 cpu = iter->cpu_file;
3062                 iter->buffer_iter[cpu] =
3063                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3064                 ring_buffer_read_prepare_sync();
3065                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3066                 tracing_iter_reset(iter, cpu);
3067         }
3068
3069         mutex_unlock(&trace_types_lock);
3070
3071         return iter;
3072
3073  fail:
3074         mutex_unlock(&trace_types_lock);
3075         kfree(iter->trace);
3076         kfree(iter->buffer_iter);
3077 release:
3078         seq_release_private(inode, file);
3079         return ERR_PTR(-ENOMEM);
3080 }
3081
3082 int tracing_open_generic(struct inode *inode, struct file *filp)
3083 {
3084         if (tracing_disabled)
3085                 return -ENODEV;
3086
3087         filp->private_data = inode->i_private;
3088         return 0;
3089 }
3090
3091 bool tracing_is_disabled(void)
3092 {
3093         return (tracing_disabled) ? true: false;
3094 }
3095
3096 /*
3097  * Open and update trace_array ref count.
3098  * Must have the current trace_array passed to it.
3099  */
3100 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3101 {
3102         struct trace_array *tr = inode->i_private;
3103
3104         if (tracing_disabled)
3105                 return -ENODEV;
3106
3107         if (trace_array_get(tr) < 0)
3108                 return -ENODEV;
3109
3110         filp->private_data = inode->i_private;
3111
3112         return 0;
3113 }
3114
3115 static int tracing_release(struct inode *inode, struct file *file)
3116 {
3117         struct trace_array *tr = inode->i_private;
3118         struct seq_file *m = file->private_data;
3119         struct trace_iterator *iter;
3120         int cpu;
3121
3122         if (!(file->f_mode & FMODE_READ)) {
3123                 trace_array_put(tr);
3124                 return 0;
3125         }
3126
3127         /* Writes do not use seq_file */
3128         iter = m->private;
3129         mutex_lock(&trace_types_lock);
3130
3131         for_each_tracing_cpu(cpu) {
3132                 if (iter->buffer_iter[cpu])
3133                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3134         }
3135
3136         if (iter->trace && iter->trace->close)
3137                 iter->trace->close(iter);
3138
3139         if (!iter->snapshot)
3140                 /* reenable tracing if it was previously enabled */
3141                 tracing_start_tr(tr);
3142
3143         __trace_array_put(tr);
3144
3145         mutex_unlock(&trace_types_lock);
3146
3147         mutex_destroy(&iter->mutex);
3148         free_cpumask_var(iter->started);
3149         kfree(iter->trace);
3150         kfree(iter->buffer_iter);
3151         seq_release_private(inode, file);
3152
3153         return 0;
3154 }
3155
3156 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3157 {
3158         struct trace_array *tr = inode->i_private;
3159
3160         trace_array_put(tr);
3161         return 0;
3162 }
3163
3164 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3165 {
3166         struct trace_array *tr = inode->i_private;
3167
3168         trace_array_put(tr);
3169
3170         return single_release(inode, file);
3171 }
3172
3173 static int tracing_open(struct inode *inode, struct file *file)
3174 {
3175         struct trace_array *tr = inode->i_private;
3176         struct trace_iterator *iter;
3177         int ret = 0;
3178
3179         if (trace_array_get(tr) < 0)
3180                 return -ENODEV;
3181
3182         /* If this file was open for write, then erase contents */
3183         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3184                 int cpu = tracing_get_cpu(inode);
3185
3186                 if (cpu == RING_BUFFER_ALL_CPUS)
3187                         tracing_reset_online_cpus(&tr->trace_buffer);
3188                 else
3189                         tracing_reset(&tr->trace_buffer, cpu);
3190         }
3191
3192         if (file->f_mode & FMODE_READ) {
3193                 iter = __tracing_open(inode, file, false);
3194                 if (IS_ERR(iter))
3195                         ret = PTR_ERR(iter);
3196                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3197                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3198         }
3199
3200         if (ret < 0)
3201                 trace_array_put(tr);
3202
3203         return ret;
3204 }
3205
3206 /*
3207  * Some tracers are not suitable for instance buffers.
3208  * A tracer is always available for the global array (toplevel)
3209  * or if it explicitly states that it is.
3210  */
3211 static bool
3212 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3213 {
3214         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3215 }
3216
3217 /* Find the next tracer that this trace array may use */
3218 static struct tracer *
3219 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3220 {
3221         while (t && !trace_ok_for_array(t, tr))
3222                 t = t->next;
3223
3224         return t;
3225 }
3226
3227 static void *
3228 t_next(struct seq_file *m, void *v, loff_t *pos)
3229 {
3230         struct trace_array *tr = m->private;
3231         struct tracer *t = v;
3232
3233         (*pos)++;
3234
3235         if (t)
3236                 t = get_tracer_for_array(tr, t->next);
3237
3238         return t;
3239 }
3240
3241 static void *t_start(struct seq_file *m, loff_t *pos)
3242 {
3243         struct trace_array *tr = m->private;
3244         struct tracer *t;
3245         loff_t l = 0;
3246
3247         mutex_lock(&trace_types_lock);
3248
3249         t = get_tracer_for_array(tr, trace_types);
3250         for (; t && l < *pos; t = t_next(m, t, &l))
3251                         ;
3252
3253         return t;
3254 }
3255
3256 static void t_stop(struct seq_file *m, void *p)
3257 {
3258         mutex_unlock(&trace_types_lock);
3259 }
3260
3261 static int t_show(struct seq_file *m, void *v)
3262 {
3263         struct tracer *t = v;
3264
3265         if (!t)
3266                 return 0;
3267
3268         seq_puts(m, t->name);
3269         if (t->next)
3270                 seq_putc(m, ' ');
3271         else
3272                 seq_putc(m, '\n');
3273
3274         return 0;
3275 }
3276
3277 static const struct seq_operations show_traces_seq_ops = {
3278         .start          = t_start,
3279         .next           = t_next,
3280         .stop           = t_stop,
3281         .show           = t_show,
3282 };
3283
3284 static int show_traces_open(struct inode *inode, struct file *file)
3285 {
3286         struct trace_array *tr = inode->i_private;
3287         struct seq_file *m;
3288         int ret;
3289
3290         if (tracing_disabled)
3291                 return -ENODEV;
3292
3293         ret = seq_open(file, &show_traces_seq_ops);
3294         if (ret)
3295                 return ret;
3296
3297         m = file->private_data;
3298         m->private = tr;
3299
3300         return 0;
3301 }
3302
3303 static ssize_t
3304 tracing_write_stub(struct file *filp, const char __user *ubuf,
3305                    size_t count, loff_t *ppos)
3306 {
3307         return count;
3308 }
3309
3310 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3311 {
3312         int ret;
3313
3314         if (file->f_mode & FMODE_READ)
3315                 ret = seq_lseek(file, offset, whence);
3316         else
3317                 file->f_pos = ret = 0;
3318
3319         return ret;
3320 }
3321
3322 static const struct file_operations tracing_fops = {
3323         .open           = tracing_open,
3324         .read           = seq_read,
3325         .write          = tracing_write_stub,
3326         .llseek         = tracing_lseek,
3327         .release        = tracing_release,
3328 };
3329
3330 static const struct file_operations show_traces_fops = {
3331         .open           = show_traces_open,
3332         .read           = seq_read,
3333         .release        = seq_release,
3334         .llseek         = seq_lseek,
3335 };
3336
3337 /*
3338  * The tracer itself will not take this lock, but still we want
3339  * to provide a consistent cpumask to user-space:
3340  */
3341 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3342
3343 /*
3344  * Temporary storage for the character representation of the
3345  * CPU bitmask (and one more byte for the newline):
3346  */
3347 static char mask_str[NR_CPUS + 1];
3348
3349 static ssize_t
3350 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3351                      size_t count, loff_t *ppos)
3352 {
3353         struct trace_array *tr = file_inode(filp)->i_private;
3354         int len;
3355
3356         mutex_lock(&tracing_cpumask_update_lock);
3357
3358         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3359         if (count - len < 2) {
3360                 count = -EINVAL;
3361                 goto out_err;
3362         }
3363         len += sprintf(mask_str + len, "\n");
3364         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3365
3366 out_err:
3367         mutex_unlock(&tracing_cpumask_update_lock);
3368
3369         return count;
3370 }
3371
3372 static ssize_t
3373 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3374                       size_t count, loff_t *ppos)
3375 {
3376         struct trace_array *tr = file_inode(filp)->i_private;
3377         cpumask_var_t tracing_cpumask_new;
3378         int err, cpu;
3379
3380         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3381                 return -ENOMEM;
3382
3383         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3384         if (err)
3385                 goto err_unlock;
3386
3387         mutex_lock(&tracing_cpumask_update_lock);
3388
3389         local_irq_disable();
3390         arch_spin_lock(&tr->max_lock);
3391         for_each_tracing_cpu(cpu) {
3392                 /*
3393                  * Increase/decrease the disabled counter if we are
3394                  * about to flip a bit in the cpumask:
3395                  */
3396                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3397                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3398                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3399                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3400                 }
3401                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3402                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3403                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3404                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3405                 }
3406         }
3407         arch_spin_unlock(&tr->max_lock);
3408         local_irq_enable();
3409
3410         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3411
3412         mutex_unlock(&tracing_cpumask_update_lock);
3413         free_cpumask_var(tracing_cpumask_new);
3414
3415         return count;
3416
3417 err_unlock:
3418         free_cpumask_var(tracing_cpumask_new);
3419
3420         return err;
3421 }
3422
3423 static const struct file_operations tracing_cpumask_fops = {
3424         .open           = tracing_open_generic_tr,
3425         .read           = tracing_cpumask_read,
3426         .write          = tracing_cpumask_write,
3427         .release        = tracing_release_generic_tr,
3428         .llseek         = generic_file_llseek,
3429 };
3430
3431 static int tracing_trace_options_show(struct seq_file *m, void *v)
3432 {
3433         struct tracer_opt *trace_opts;
3434         struct trace_array *tr = m->private;
3435         u32 tracer_flags;
3436         int i;
3437
3438         mutex_lock(&trace_types_lock);
3439         tracer_flags = tr->current_trace->flags->val;
3440         trace_opts = tr->current_trace->flags->opts;
3441
3442         for (i = 0; trace_options[i]; i++) {
3443                 if (trace_flags & (1 << i))
3444                         seq_printf(m, "%s\n", trace_options[i]);
3445                 else
3446                         seq_printf(m, "no%s\n", trace_options[i]);
3447         }
3448
3449         for (i = 0; trace_opts[i].name; i++) {
3450                 if (tracer_flags & trace_opts[i].bit)
3451                         seq_printf(m, "%s\n", trace_opts[i].name);
3452                 else
3453                         seq_printf(m, "no%s\n", trace_opts[i].name);
3454         }
3455         mutex_unlock(&trace_types_lock);
3456
3457         return 0;
3458 }
3459
3460 static int __set_tracer_option(struct trace_array *tr,
3461                                struct tracer_flags *tracer_flags,
3462                                struct tracer_opt *opts, int neg)
3463 {
3464         struct tracer *trace = tr->current_trace;
3465         int ret;
3466
3467         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3468         if (ret)
3469                 return ret;
3470
3471         if (neg)
3472                 tracer_flags->val &= ~opts->bit;
3473         else
3474                 tracer_flags->val |= opts->bit;
3475         return 0;
3476 }
3477
3478 /* Try to assign a tracer specific option */
3479 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3480 {
3481         struct tracer *trace = tr->current_trace;
3482         struct tracer_flags *tracer_flags = trace->flags;
3483         struct tracer_opt *opts = NULL;
3484         int i;
3485
3486         for (i = 0; tracer_flags->opts[i].name; i++) {
3487                 opts = &tracer_flags->opts[i];
3488
3489                 if (strcmp(cmp, opts->name) == 0)
3490                         return __set_tracer_option(tr, trace->flags, opts, neg);
3491         }
3492
3493         return -EINVAL;
3494 }
3495
3496 /* Some tracers require overwrite to stay enabled */
3497 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3498 {
3499         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3500                 return -1;
3501
3502         return 0;
3503 }
3504
3505 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3506 {
3507         /* do nothing if flag is already set */
3508         if (!!(trace_flags & mask) == !!enabled)
3509                 return 0;
3510
3511         /* Give the tracer a chance to approve the change */
3512         if (tr->current_trace->flag_changed)
3513                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3514                         return -EINVAL;
3515
3516         if (enabled)
3517                 trace_flags |= mask;
3518         else
3519                 trace_flags &= ~mask;
3520
3521         if (mask == TRACE_ITER_RECORD_CMD)
3522                 trace_event_enable_cmd_record(enabled);
3523
3524         if (mask == TRACE_ITER_OVERWRITE) {
3525                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3526 #ifdef CONFIG_TRACER_MAX_TRACE
3527                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3528 #endif
3529         }
3530
3531         if (mask == TRACE_ITER_PRINTK)
3532                 trace_printk_start_stop_comm(enabled);
3533
3534         return 0;
3535 }
3536
3537 static int trace_set_options(struct trace_array *tr, char *option)
3538 {
3539         char *cmp;
3540         int neg = 0;
3541         int ret = -ENODEV;
3542         int i;
3543
3544         cmp = strstrip(option);
3545
3546         if (strncmp(cmp, "no", 2) == 0) {
3547                 neg = 1;
3548                 cmp += 2;
3549         }
3550
3551         mutex_lock(&trace_types_lock);
3552
3553         for (i = 0; trace_options[i]; i++) {
3554                 if (strcmp(cmp, trace_options[i]) == 0) {
3555                         ret = set_tracer_flag(tr, 1 << i, !neg);
3556                         break;
3557                 }
3558         }
3559
3560         /* If no option could be set, test the specific tracer options */
3561         if (!trace_options[i])
3562                 ret = set_tracer_option(tr, cmp, neg);
3563
3564         mutex_unlock(&trace_types_lock);
3565
3566         return ret;
3567 }
3568
3569 static ssize_t
3570 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3571                         size_t cnt, loff_t *ppos)
3572 {
3573         struct seq_file *m = filp->private_data;
3574         struct trace_array *tr = m->private;
3575         char buf[64];
3576         int ret;
3577
3578         if (cnt >= sizeof(buf))
3579                 return -EINVAL;
3580
3581         if (copy_from_user(&buf, ubuf, cnt))
3582                 return -EFAULT;
3583
3584         buf[cnt] = 0;
3585
3586         ret = trace_set_options(tr, buf);
3587         if (ret < 0)
3588                 return ret;
3589
3590         *ppos += cnt;
3591
3592         return cnt;
3593 }
3594
3595 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3596 {
3597         struct trace_array *tr = inode->i_private;
3598         int ret;
3599
3600         if (tracing_disabled)
3601                 return -ENODEV;
3602
3603         if (trace_array_get(tr) < 0)
3604                 return -ENODEV;
3605
3606         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3607         if (ret < 0)
3608                 trace_array_put(tr);
3609
3610         return ret;
3611 }
3612
3613 static const struct file_operations tracing_iter_fops = {
3614         .open           = tracing_trace_options_open,
3615         .read           = seq_read,
3616         .llseek         = seq_lseek,
3617         .release        = tracing_single_release_tr,
3618         .write          = tracing_trace_options_write,
3619 };
3620
3621 static const char readme_msg[] =
3622         "tracing mini-HOWTO:\n\n"
3623         "# echo 0 > tracing_on : quick way to disable tracing\n"
3624         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3625         " Important files:\n"
3626         "  trace\t\t\t- The static contents of the buffer\n"
3627         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3628         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3629         "  current_tracer\t- function and latency tracers\n"
3630         "  available_tracers\t- list of configured tracers for current_tracer\n"
3631         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3632         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3633         "  trace_clock\t\t-change the clock used to order events\n"
3634         "       local:   Per cpu clock but may not be synced across CPUs\n"
3635         "      global:   Synced across CPUs but slows tracing down.\n"
3636         "     counter:   Not a clock, but just an increment\n"
3637         "      uptime:   Jiffy counter from time of boot\n"
3638         "        perf:   Same clock that perf events use\n"
3639 #ifdef CONFIG_X86_64
3640         "     x86-tsc:   TSC cycle counter\n"
3641 #endif
3642         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3643         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3644         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3645         "\t\t\t  Remove sub-buffer with rmdir\n"
3646         "  trace_options\t\t- Set format or modify how tracing happens\n"
3647         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3648         "\t\t\t  option name\n"
3649         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3650 #ifdef CONFIG_DYNAMIC_FTRACE
3651         "\n  available_filter_functions - list of functions that can be filtered on\n"
3652         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3653         "\t\t\t  functions\n"
3654         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3655         "\t     modules: Can select a group via module\n"
3656         "\t      Format: :mod:<module-name>\n"
3657         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3658         "\t    triggers: a command to perform when function is hit\n"
3659         "\t      Format: <function>:<trigger>[:count]\n"
3660         "\t     trigger: traceon, traceoff\n"
3661         "\t\t      enable_event:<system>:<event>\n"
3662         "\t\t      disable_event:<system>:<event>\n"
3663 #ifdef CONFIG_STACKTRACE
3664         "\t\t      stacktrace\n"
3665 #endif
3666 #ifdef CONFIG_TRACER_SNAPSHOT
3667         "\t\t      snapshot\n"
3668 #endif
3669         "\t\t      dump\n"
3670         "\t\t      cpudump\n"
3671         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3672         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3673         "\t     The first one will disable tracing every time do_fault is hit\n"
3674         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3675         "\t       The first time do trap is hit and it disables tracing, the\n"
3676         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3677         "\t       the counter will not decrement. It only decrements when the\n"
3678         "\t       trigger did work\n"
3679         "\t     To remove trigger without count:\n"
3680         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3681         "\t     To remove trigger with a count:\n"
3682         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3683         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3684         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3685         "\t    modules: Can select a group via module command :mod:\n"
3686         "\t    Does not accept triggers\n"
3687 #endif /* CONFIG_DYNAMIC_FTRACE */
3688 #ifdef CONFIG_FUNCTION_TRACER
3689         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3690         "\t\t    (function)\n"
3691 #endif
3692 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3693         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3694         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3695         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3696 #endif
3697 #ifdef CONFIG_TRACER_SNAPSHOT
3698         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3699         "\t\t\t  snapshot buffer. Read the contents for more\n"
3700         "\t\t\t  information\n"
3701 #endif
3702 #ifdef CONFIG_STACK_TRACER
3703         "  stack_trace\t\t- Shows the max stack trace when active\n"
3704         "  stack_max_size\t- Shows current max stack size that was traced\n"
3705         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3706         "\t\t\t  new trace)\n"
3707 #ifdef CONFIG_DYNAMIC_FTRACE
3708         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3709         "\t\t\t  traces\n"
3710 #endif
3711 #endif /* CONFIG_STACK_TRACER */
3712         "  events/\t\t- Directory containing all trace event subsystems:\n"
3713         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3714         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3715         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3716         "\t\t\t  events\n"
3717         "      filter\t\t- If set, only events passing filter are traced\n"
3718         "  events/<system>/<event>/\t- Directory containing control files for\n"
3719         "\t\t\t  <event>:\n"
3720         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3721         "      filter\t\t- If set, only events passing filter are traced\n"
3722         "      trigger\t\t- If set, a command to perform when event is hit\n"
3723         "\t    Format: <trigger>[:count][if <filter>]\n"
3724         "\t   trigger: traceon, traceoff\n"
3725         "\t            enable_event:<system>:<event>\n"
3726         "\t            disable_event:<system>:<event>\n"
3727 #ifdef CONFIG_STACKTRACE
3728         "\t\t    stacktrace\n"
3729 #endif
3730 #ifdef CONFIG_TRACER_SNAPSHOT
3731         "\t\t    snapshot\n"
3732 #endif
3733         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3734         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3735         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3736         "\t                  events/block/block_unplug/trigger\n"
3737         "\t   The first disables tracing every time block_unplug is hit.\n"
3738         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3739         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3740         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3741         "\t   Like function triggers, the counter is only decremented if it\n"
3742         "\t    enabled or disabled tracing.\n"
3743         "\t   To remove a trigger without a count:\n"
3744         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3745         "\t   To remove a trigger with a count:\n"
3746         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3747         "\t   Filters can be ignored when removing a trigger.\n"
3748 ;
3749
3750 static ssize_t
3751 tracing_readme_read(struct file *filp, char __user *ubuf,
3752                        size_t cnt, loff_t *ppos)
3753 {
3754         return simple_read_from_buffer(ubuf, cnt, ppos,
3755                                         readme_msg, strlen(readme_msg));
3756 }
3757
3758 static const struct file_operations tracing_readme_fops = {
3759         .open           = tracing_open_generic,
3760         .read           = tracing_readme_read,
3761         .llseek         = generic_file_llseek,
3762 };
3763
3764 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3765 {
3766         unsigned int *ptr = v;
3767
3768         if (*pos || m->count)
3769                 ptr++;
3770
3771         (*pos)++;
3772
3773         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3774              ptr++) {
3775                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3776                         continue;
3777
3778                 return ptr;
3779         }
3780
3781         return NULL;
3782 }
3783
3784 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3785 {
3786         void *v;
3787         loff_t l = 0;
3788
3789         preempt_disable();
3790         arch_spin_lock(&trace_cmdline_lock);
3791
3792         v = &savedcmd->map_cmdline_to_pid[0];
3793         while (l <= *pos) {
3794                 v = saved_cmdlines_next(m, v, &l);
3795                 if (!v)
3796                         return NULL;
3797         }
3798
3799         return v;
3800 }
3801
3802 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3803 {
3804         arch_spin_unlock(&trace_cmdline_lock);
3805         preempt_enable();
3806 }
3807
3808 static int saved_cmdlines_show(struct seq_file *m, void *v)
3809 {
3810         char buf[TASK_COMM_LEN];
3811         unsigned int *pid = v;
3812
3813         __trace_find_cmdline(*pid, buf);
3814         seq_printf(m, "%d %s\n", *pid, buf);
3815         return 0;
3816 }
3817
3818 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3819         .start          = saved_cmdlines_start,
3820         .next           = saved_cmdlines_next,
3821         .stop           = saved_cmdlines_stop,
3822         .show           = saved_cmdlines_show,
3823 };
3824
3825 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3826 {
3827         if (tracing_disabled)
3828                 return -ENODEV;
3829
3830         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3831 }
3832
3833 static const struct file_operations tracing_saved_cmdlines_fops = {
3834         .open           = tracing_saved_cmdlines_open,
3835         .read           = seq_read,
3836         .llseek         = seq_lseek,
3837         .release        = seq_release,
3838 };
3839
3840 static ssize_t
3841 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3842                                  size_t cnt, loff_t *ppos)
3843 {
3844         char buf[64];
3845         int r;
3846
3847         arch_spin_lock(&trace_cmdline_lock);
3848         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3849         arch_spin_unlock(&trace_cmdline_lock);
3850
3851         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3852 }
3853
3854 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3855 {
3856         kfree(s->saved_cmdlines);
3857         kfree(s->map_cmdline_to_pid);
3858         kfree(s);
3859 }
3860
3861 static int tracing_resize_saved_cmdlines(unsigned int val)
3862 {
3863         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3864
3865         s = kmalloc(sizeof(*s), GFP_KERNEL);
3866         if (!s)
3867                 return -ENOMEM;
3868
3869         if (allocate_cmdlines_buffer(val, s) < 0) {
3870                 kfree(s);
3871                 return -ENOMEM;
3872         }
3873
3874         arch_spin_lock(&trace_cmdline_lock);
3875         savedcmd_temp = savedcmd;
3876         savedcmd = s;
3877         arch_spin_unlock(&trace_cmdline_lock);
3878         free_saved_cmdlines_buffer(savedcmd_temp);
3879
3880         return 0;
3881 }
3882
3883 static ssize_t
3884 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3885                                   size_t cnt, loff_t *ppos)
3886 {
3887         unsigned long val;
3888         int ret;
3889
3890         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3891         if (ret)
3892                 return ret;
3893
3894         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3895         if (!val || val > PID_MAX_DEFAULT)
3896                 return -EINVAL;
3897
3898         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3899         if (ret < 0)
3900                 return ret;
3901
3902         *ppos += cnt;
3903
3904         return cnt;
3905 }
3906
3907 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3908         .open           = tracing_open_generic,
3909         .read           = tracing_saved_cmdlines_size_read,
3910         .write          = tracing_saved_cmdlines_size_write,
3911 };
3912
3913 static ssize_t
3914 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3915                        size_t cnt, loff_t *ppos)
3916 {
3917         struct trace_array *tr = filp->private_data;
3918         char buf[MAX_TRACER_SIZE+2];
3919         int r;
3920
3921         mutex_lock(&trace_types_lock);
3922         r = sprintf(buf, "%s\n", tr->current_trace->name);
3923         mutex_unlock(&trace_types_lock);
3924
3925         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3926 }
3927
3928 int tracer_init(struct tracer *t, struct trace_array *tr)
3929 {
3930         tracing_reset_online_cpus(&tr->trace_buffer);
3931         return t->init(tr);
3932 }
3933
3934 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3935 {
3936         int cpu;
3937
3938         for_each_tracing_cpu(cpu)
3939                 per_cpu_ptr(buf->data, cpu)->entries = val;
3940 }
3941
3942 #ifdef CONFIG_TRACER_MAX_TRACE
3943 /* resize @tr's buffer to the size of @size_tr's entries */
3944 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3945                                         struct trace_buffer *size_buf, int cpu_id)
3946 {
3947         int cpu, ret = 0;
3948
3949         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3950                 for_each_tracing_cpu(cpu) {
3951                         ret = ring_buffer_resize(trace_buf->buffer,
3952                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3953                         if (ret < 0)
3954                                 break;
3955                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3956                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3957                 }
3958         } else {
3959                 ret = ring_buffer_resize(trace_buf->buffer,
3960                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3961                 if (ret == 0)
3962                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3963                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3964         }
3965
3966         return ret;
3967 }
3968 #endif /* CONFIG_TRACER_MAX_TRACE */
3969
3970 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3971                                         unsigned long size, int cpu)
3972 {
3973         int ret;
3974
3975         /*
3976          * If kernel or user changes the size of the ring buffer
3977          * we use the size that was given, and we can forget about
3978          * expanding it later.
3979          */
3980         ring_buffer_expanded = true;
3981
3982         /* May be called before buffers are initialized */
3983         if (!tr->trace_buffer.buffer)
3984                 return 0;
3985
3986         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3987         if (ret < 0)
3988                 return ret;
3989
3990 #ifdef CONFIG_TRACER_MAX_TRACE
3991         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3992             !tr->current_trace->use_max_tr)
3993                 goto out;
3994
3995         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3996         if (ret < 0) {
3997                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3998                                                      &tr->trace_buffer, cpu);
3999                 if (r < 0) {
4000                         /*
4001                          * AARGH! We are left with different
4002                          * size max buffer!!!!
4003                          * The max buffer is our "snapshot" buffer.
4004                          * When a tracer needs a snapshot (one of the
4005                          * latency tracers), it swaps the max buffer
4006                          * with the saved snap shot. We succeeded to
4007                          * update the size of the main buffer, but failed to
4008                          * update the size of the max buffer. But when we tried
4009                          * to reset the main buffer to the original size, we
4010                          * failed there too. This is very unlikely to
4011                          * happen, but if it does, warn and kill all
4012                          * tracing.
4013                          */
4014                         WARN_ON(1);
4015                         tracing_disabled = 1;
4016                 }
4017                 return ret;
4018         }
4019
4020         if (cpu == RING_BUFFER_ALL_CPUS)
4021                 set_buffer_entries(&tr->max_buffer, size);
4022         else
4023                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4024
4025  out:
4026 #endif /* CONFIG_TRACER_MAX_TRACE */
4027
4028         if (cpu == RING_BUFFER_ALL_CPUS)
4029                 set_buffer_entries(&tr->trace_buffer, size);
4030         else
4031                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4032
4033         return ret;
4034 }
4035
4036 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4037                                           unsigned long size, int cpu_id)
4038 {
4039         int ret = size;
4040
4041         mutex_lock(&trace_types_lock);
4042
4043         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4044                 /* make sure, this cpu is enabled in the mask */
4045                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4046                         ret = -EINVAL;
4047                         goto out;
4048                 }
4049         }
4050
4051         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4052         if (ret < 0)
4053                 ret = -ENOMEM;
4054
4055 out:
4056         mutex_unlock(&trace_types_lock);
4057
4058         return ret;
4059 }
4060
4061
4062 /**
4063  * tracing_update_buffers - used by tracing facility to expand ring buffers
4064  *
4065  * To save on memory when the tracing is never used on a system with it
4066  * configured in. The ring buffers are set to a minimum size. But once
4067  * a user starts to use the tracing facility, then they need to grow
4068  * to their default size.
4069  *
4070  * This function is to be called when a tracer is about to be used.
4071  */
4072 int tracing_update_buffers(void)
4073 {
4074         int ret = 0;
4075
4076         mutex_lock(&trace_types_lock);
4077         if (!ring_buffer_expanded)
4078                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4079                                                 RING_BUFFER_ALL_CPUS);
4080         mutex_unlock(&trace_types_lock);
4081
4082         return ret;
4083 }
4084
4085 struct trace_option_dentry;
4086
4087 static struct trace_option_dentry *
4088 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4089
4090 static void
4091 destroy_trace_option_files(struct trace_option_dentry *topts);
4092
4093 /*
4094  * Used to clear out the tracer before deletion of an instance.
4095  * Must have trace_types_lock held.
4096  */
4097 static void tracing_set_nop(struct trace_array *tr)
4098 {
4099         if (tr->current_trace == &nop_trace)
4100                 return;
4101         
4102         tr->current_trace->enabled--;
4103
4104         if (tr->current_trace->reset)
4105                 tr->current_trace->reset(tr);
4106
4107         tr->current_trace = &nop_trace;
4108 }
4109
4110 static void update_tracer_options(struct trace_array *tr, struct tracer *t)
4111 {
4112         static struct trace_option_dentry *topts;
4113
4114         /* Only enable if the directory has been created already. */
4115         if (!tr->dir)
4116                 return;
4117
4118         /* Currently, only the top instance has options */
4119         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL))
4120                 return;
4121
4122         destroy_trace_option_files(topts);
4123         topts = create_trace_option_files(tr, t);
4124 }
4125
4126 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4127 {
4128         struct tracer *t;
4129 #ifdef CONFIG_TRACER_MAX_TRACE
4130         bool had_max_tr;
4131 #endif
4132         int ret = 0;
4133
4134         mutex_lock(&trace_types_lock);
4135
4136         if (!ring_buffer_expanded) {
4137                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4138                                                 RING_BUFFER_ALL_CPUS);
4139                 if (ret < 0)
4140                         goto out;
4141                 ret = 0;
4142         }
4143
4144         for (t = trace_types; t; t = t->next) {
4145                 if (strcmp(t->name, buf) == 0)
4146                         break;
4147         }
4148         if (!t) {
4149                 ret = -EINVAL;
4150                 goto out;
4151         }
4152         if (t == tr->current_trace)
4153                 goto out;
4154
4155         /* Some tracers are only allowed for the top level buffer */
4156         if (!trace_ok_for_array(t, tr)) {
4157                 ret = -EINVAL;
4158                 goto out;
4159         }
4160
4161         /* If trace pipe files are being read, we can't change the tracer */
4162         if (tr->current_trace->ref) {
4163                 ret = -EBUSY;
4164                 goto out;
4165         }
4166
4167         trace_branch_disable();
4168
4169         tr->current_trace->enabled--;
4170
4171         if (tr->current_trace->reset)
4172                 tr->current_trace->reset(tr);
4173
4174         /* Current trace needs to be nop_trace before synchronize_sched */
4175         tr->current_trace = &nop_trace;
4176
4177 #ifdef CONFIG_TRACER_MAX_TRACE
4178         had_max_tr = tr->allocated_snapshot;
4179
4180         if (had_max_tr && !t->use_max_tr) {
4181                 /*
4182                  * We need to make sure that the update_max_tr sees that
4183                  * current_trace changed to nop_trace to keep it from
4184                  * swapping the buffers after we resize it.
4185                  * The update_max_tr is called from interrupts disabled
4186                  * so a synchronized_sched() is sufficient.
4187                  */
4188                 synchronize_sched();
4189                 free_snapshot(tr);
4190         }
4191 #endif
4192         update_tracer_options(tr, t);
4193
4194 #ifdef CONFIG_TRACER_MAX_TRACE
4195         if (t->use_max_tr && !had_max_tr) {
4196                 ret = alloc_snapshot(tr);
4197                 if (ret < 0)
4198                         goto out;
4199         }
4200 #endif
4201
4202         if (t->init) {
4203                 ret = tracer_init(t, tr);
4204                 if (ret)
4205                         goto out;
4206         }
4207
4208         tr->current_trace = t;
4209         tr->current_trace->enabled++;
4210         trace_branch_enable(tr);
4211  out:
4212         mutex_unlock(&trace_types_lock);
4213
4214         return ret;
4215 }
4216
4217 static ssize_t
4218 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4219                         size_t cnt, loff_t *ppos)
4220 {
4221         struct trace_array *tr = filp->private_data;
4222         char buf[MAX_TRACER_SIZE+1];
4223         int i;
4224         size_t ret;
4225         int err;
4226
4227         ret = cnt;
4228
4229         if (cnt > MAX_TRACER_SIZE)
4230                 cnt = MAX_TRACER_SIZE;
4231
4232         if (copy_from_user(&buf, ubuf, cnt))
4233                 return -EFAULT;
4234
4235         buf[cnt] = 0;
4236
4237         /* strip ending whitespace. */
4238         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4239                 buf[i] = 0;
4240
4241         err = tracing_set_tracer(tr, buf);
4242         if (err)
4243                 return err;
4244
4245         *ppos += ret;
4246
4247         return ret;
4248 }
4249
4250 static ssize_t
4251 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4252                    size_t cnt, loff_t *ppos)
4253 {
4254         char buf[64];
4255         int r;
4256
4257         r = snprintf(buf, sizeof(buf), "%ld\n",
4258                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4259         if (r > sizeof(buf))
4260                 r = sizeof(buf);
4261         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4262 }
4263
4264 static ssize_t
4265 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4266                     size_t cnt, loff_t *ppos)
4267 {
4268         unsigned long val;
4269         int ret;
4270
4271         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4272         if (ret)
4273                 return ret;
4274
4275         *ptr = val * 1000;
4276
4277         return cnt;
4278 }
4279
4280 static ssize_t
4281 tracing_thresh_read(struct file *filp, char __user *ubuf,
4282                     size_t cnt, loff_t *ppos)
4283 {
4284         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4285 }
4286
4287 static ssize_t
4288 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4289                      size_t cnt, loff_t *ppos)
4290 {
4291         struct trace_array *tr = filp->private_data;
4292         int ret;
4293
4294         mutex_lock(&trace_types_lock);
4295         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4296         if (ret < 0)
4297                 goto out;
4298
4299         if (tr->current_trace->update_thresh) {
4300                 ret = tr->current_trace->update_thresh(tr);
4301                 if (ret < 0)
4302                         goto out;
4303         }
4304
4305         ret = cnt;
4306 out:
4307         mutex_unlock(&trace_types_lock);
4308
4309         return ret;
4310 }
4311
4312 static ssize_t
4313 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4314                      size_t cnt, loff_t *ppos)
4315 {
4316         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4317 }
4318
4319 static ssize_t
4320 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4321                       size_t cnt, loff_t *ppos)
4322 {
4323         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4324 }
4325
4326 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4327 {
4328         struct trace_array *tr = inode->i_private;
4329         struct trace_iterator *iter;
4330         int ret = 0;
4331
4332         if (tracing_disabled)
4333                 return -ENODEV;
4334
4335         if (trace_array_get(tr) < 0)
4336                 return -ENODEV;
4337
4338         mutex_lock(&trace_types_lock);
4339
4340         /* create a buffer to store the information to pass to userspace */
4341         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4342         if (!iter) {
4343                 ret = -ENOMEM;
4344                 __trace_array_put(tr);
4345                 goto out;
4346         }
4347
4348         trace_seq_init(&iter->seq);
4349         iter->trace = tr->current_trace;
4350
4351         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4352                 ret = -ENOMEM;
4353                 goto fail;
4354         }
4355
4356         /* trace pipe does not show start of buffer */
4357         cpumask_setall(iter->started);
4358
4359         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4360                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4361
4362         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4363         if (trace_clocks[tr->clock_id].in_ns)
4364                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4365
4366         iter->tr = tr;
4367         iter->trace_buffer = &tr->trace_buffer;
4368         iter->cpu_file = tracing_get_cpu(inode);
4369         mutex_init(&iter->mutex);
4370         filp->private_data = iter;
4371
4372         if (iter->trace->pipe_open)
4373                 iter->trace->pipe_open(iter);
4374
4375         nonseekable_open(inode, filp);
4376
4377         tr->current_trace->ref++;
4378 out:
4379         mutex_unlock(&trace_types_lock);
4380         return ret;
4381
4382 fail:
4383         kfree(iter->trace);
4384         kfree(iter);
4385         __trace_array_put(tr);
4386         mutex_unlock(&trace_types_lock);
4387         return ret;
4388 }
4389
4390 static int tracing_release_pipe(struct inode *inode, struct file *file)
4391 {
4392         struct trace_iterator *iter = file->private_data;
4393         struct trace_array *tr = inode->i_private;
4394
4395         mutex_lock(&trace_types_lock);
4396
4397         tr->current_trace->ref--;
4398
4399         if (iter->trace->pipe_close)
4400                 iter->trace->pipe_close(iter);
4401
4402         mutex_unlock(&trace_types_lock);
4403
4404         free_cpumask_var(iter->started);
4405         mutex_destroy(&iter->mutex);
4406         kfree(iter);
4407
4408         trace_array_put(tr);
4409
4410         return 0;
4411 }
4412
4413 static unsigned int
4414 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4415 {
4416         /* Iterators are static, they should be filled or empty */
4417         if (trace_buffer_iter(iter, iter->cpu_file))
4418                 return POLLIN | POLLRDNORM;
4419
4420         if (trace_flags & TRACE_ITER_BLOCK)
4421                 /*
4422                  * Always select as readable when in blocking mode
4423                  */
4424                 return POLLIN | POLLRDNORM;
4425         else
4426                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4427                                              filp, poll_table);
4428 }
4429
4430 static unsigned int
4431 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4432 {
4433         struct trace_iterator *iter = filp->private_data;
4434
4435         return trace_poll(iter, filp, poll_table);
4436 }
4437
4438 /* Must be called with iter->mutex held. */
4439 static int tracing_wait_pipe(struct file *filp)
4440 {
4441         struct trace_iterator *iter = filp->private_data;
4442         int ret;
4443
4444         while (trace_empty(iter)) {
4445
4446                 if ((filp->f_flags & O_NONBLOCK)) {
4447                         return -EAGAIN;
4448                 }
4449
4450                 /*
4451                  * We block until we read something and tracing is disabled.
4452                  * We still block if tracing is disabled, but we have never
4453                  * read anything. This allows a user to cat this file, and
4454                  * then enable tracing. But after we have read something,
4455                  * we give an EOF when tracing is again disabled.
4456                  *
4457                  * iter->pos will be 0 if we haven't read anything.
4458                  */
4459                 if (!tracing_is_on() && iter->pos)
4460                         break;
4461
4462                 mutex_unlock(&iter->mutex);
4463
4464                 ret = wait_on_pipe(iter, false);
4465
4466                 mutex_lock(&iter->mutex);
4467
4468                 if (ret)
4469                         return ret;
4470         }
4471
4472         return 1;
4473 }
4474
4475 /*
4476  * Consumer reader.
4477  */
4478 static ssize_t
4479 tracing_read_pipe(struct file *filp, char __user *ubuf,
4480                   size_t cnt, loff_t *ppos)
4481 {
4482         struct trace_iterator *iter = filp->private_data;
4483         ssize_t sret;
4484
4485         /* return any leftover data */
4486         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4487         if (sret != -EBUSY)
4488                 return sret;
4489
4490         trace_seq_init(&iter->seq);
4491
4492         /*
4493          * Avoid more than one consumer on a single file descriptor
4494          * This is just a matter of traces coherency, the ring buffer itself
4495          * is protected.
4496          */
4497         mutex_lock(&iter->mutex);
4498         if (iter->trace->read) {
4499                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4500                 if (sret)
4501                         goto out;
4502         }
4503
4504 waitagain:
4505         sret = tracing_wait_pipe(filp);
4506         if (sret <= 0)
4507                 goto out;
4508
4509         /* stop when tracing is finished */
4510         if (trace_empty(iter)) {
4511                 sret = 0;
4512                 goto out;
4513         }
4514
4515         if (cnt >= PAGE_SIZE)
4516                 cnt = PAGE_SIZE - 1;
4517
4518         /* reset all but tr, trace, and overruns */
4519         memset(&iter->seq, 0,
4520                sizeof(struct trace_iterator) -
4521                offsetof(struct trace_iterator, seq));
4522         cpumask_clear(iter->started);
4523         iter->pos = -1;
4524
4525         trace_event_read_lock();
4526         trace_access_lock(iter->cpu_file);
4527         while (trace_find_next_entry_inc(iter) != NULL) {
4528                 enum print_line_t ret;
4529                 int save_len = iter->seq.seq.len;
4530
4531                 ret = print_trace_line(iter);
4532                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4533                         /* don't print partial lines */
4534                         iter->seq.seq.len = save_len;
4535                         break;
4536                 }
4537                 if (ret != TRACE_TYPE_NO_CONSUME)
4538                         trace_consume(iter);
4539
4540                 if (trace_seq_used(&iter->seq) >= cnt)
4541                         break;
4542
4543                 /*
4544                  * Setting the full flag means we reached the trace_seq buffer
4545                  * size and we should leave by partial output condition above.
4546                  * One of the trace_seq_* functions is not used properly.
4547                  */
4548                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4549                           iter->ent->type);
4550         }
4551         trace_access_unlock(iter->cpu_file);
4552         trace_event_read_unlock();
4553
4554         /* Now copy what we have to the user */
4555         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4556         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4557                 trace_seq_init(&iter->seq);
4558
4559         /*
4560          * If there was nothing to send to user, in spite of consuming trace
4561          * entries, go back to wait for more entries.
4562          */
4563         if (sret == -EBUSY)
4564                 goto waitagain;
4565
4566 out:
4567         mutex_unlock(&iter->mutex);
4568
4569         return sret;
4570 }
4571
4572 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4573                                      unsigned int idx)
4574 {
4575         __free_page(spd->pages[idx]);
4576 }
4577
4578 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4579         .can_merge              = 0,
4580         .confirm                = generic_pipe_buf_confirm,
4581         .release                = generic_pipe_buf_release,
4582         .steal                  = generic_pipe_buf_steal,
4583         .get                    = generic_pipe_buf_get,
4584 };
4585
4586 static size_t
4587 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4588 {
4589         size_t count;
4590         int save_len;
4591         int ret;
4592
4593         /* Seq buffer is page-sized, exactly what we need. */
4594         for (;;) {
4595                 save_len = iter->seq.seq.len;
4596                 ret = print_trace_line(iter);
4597
4598                 if (trace_seq_has_overflowed(&iter->seq)) {
4599                         iter->seq.seq.len = save_len;
4600                         break;
4601                 }
4602
4603                 /*
4604                  * This should not be hit, because it should only
4605                  * be set if the iter->seq overflowed. But check it
4606                  * anyway to be safe.
4607                  */
4608                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4609                         iter->seq.seq.len = save_len;
4610                         break;
4611                 }
4612
4613                 count = trace_seq_used(&iter->seq) - save_len;
4614                 if (rem < count) {
4615                         rem = 0;
4616                         iter->seq.seq.len = save_len;
4617                         break;
4618                 }
4619
4620                 if (ret != TRACE_TYPE_NO_CONSUME)
4621                         trace_consume(iter);
4622                 rem -= count;
4623                 if (!trace_find_next_entry_inc(iter))   {
4624                         rem = 0;
4625                         iter->ent = NULL;
4626                         break;
4627                 }
4628         }
4629
4630         return rem;
4631 }
4632
4633 static ssize_t tracing_splice_read_pipe(struct file *filp,
4634                                         loff_t *ppos,
4635                                         struct pipe_inode_info *pipe,
4636                                         size_t len,
4637                                         unsigned int flags)
4638 {
4639         struct page *pages_def[PIPE_DEF_BUFFERS];
4640         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4641         struct trace_iterator *iter = filp->private_data;
4642         struct splice_pipe_desc spd = {
4643                 .pages          = pages_def,
4644                 .partial        = partial_def,
4645                 .nr_pages       = 0, /* This gets updated below. */
4646                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4647                 .flags          = flags,
4648                 .ops            = &tracing_pipe_buf_ops,
4649                 .spd_release    = tracing_spd_release_pipe,
4650         };
4651         ssize_t ret;
4652         size_t rem;
4653         unsigned int i;
4654
4655         if (splice_grow_spd(pipe, &spd))
4656                 return -ENOMEM;
4657
4658         mutex_lock(&iter->mutex);
4659
4660         if (iter->trace->splice_read) {
4661                 ret = iter->trace->splice_read(iter, filp,
4662                                                ppos, pipe, len, flags);
4663                 if (ret)
4664                         goto out_err;
4665         }
4666
4667         ret = tracing_wait_pipe(filp);
4668         if (ret <= 0)
4669                 goto out_err;
4670
4671         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4672                 ret = -EFAULT;
4673                 goto out_err;
4674         }
4675
4676         trace_event_read_lock();
4677         trace_access_lock(iter->cpu_file);
4678
4679         /* Fill as many pages as possible. */
4680         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4681                 spd.pages[i] = alloc_page(GFP_KERNEL);
4682                 if (!spd.pages[i])
4683                         break;
4684
4685                 rem = tracing_fill_pipe_page(rem, iter);
4686
4687                 /* Copy the data into the page, so we can start over. */
4688                 ret = trace_seq_to_buffer(&iter->seq,
4689                                           page_address(spd.pages[i]),
4690                                           trace_seq_used(&iter->seq));
4691                 if (ret < 0) {
4692                         __free_page(spd.pages[i]);
4693                         break;
4694                 }
4695                 spd.partial[i].offset = 0;
4696                 spd.partial[i].len = trace_seq_used(&iter->seq);
4697
4698                 trace_seq_init(&iter->seq);
4699         }
4700
4701         trace_access_unlock(iter->cpu_file);
4702         trace_event_read_unlock();
4703         mutex_unlock(&iter->mutex);
4704
4705         spd.nr_pages = i;
4706
4707         ret = splice_to_pipe(pipe, &spd);
4708 out:
4709         splice_shrink_spd(&spd);
4710         return ret;
4711
4712 out_err:
4713         mutex_unlock(&iter->mutex);
4714         goto out;
4715 }
4716
4717 static ssize_t
4718 tracing_entries_read(struct file *filp, char __user *ubuf,
4719                      size_t cnt, loff_t *ppos)
4720 {
4721         struct inode *inode = file_inode(filp);
4722         struct trace_array *tr = inode->i_private;
4723         int cpu = tracing_get_cpu(inode);
4724         char buf[64];
4725         int r = 0;
4726         ssize_t ret;
4727
4728         mutex_lock(&trace_types_lock);
4729
4730         if (cpu == RING_BUFFER_ALL_CPUS) {
4731                 int cpu, buf_size_same;
4732                 unsigned long size;
4733
4734                 size = 0;
4735                 buf_size_same = 1;
4736                 /* check if all cpu sizes are same */
4737                 for_each_tracing_cpu(cpu) {
4738                         /* fill in the size from first enabled cpu */
4739                         if (size == 0)
4740                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4741                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4742                                 buf_size_same = 0;
4743                                 break;
4744                         }
4745                 }
4746
4747                 if (buf_size_same) {
4748                         if (!ring_buffer_expanded)
4749                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4750                                             size >> 10,
4751                                             trace_buf_size >> 10);
4752                         else
4753                                 r = sprintf(buf, "%lu\n", size >> 10);
4754                 } else
4755                         r = sprintf(buf, "X\n");
4756         } else
4757                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4758
4759         mutex_unlock(&trace_types_lock);
4760
4761         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4762         return ret;
4763 }
4764
4765 static ssize_t
4766 tracing_entries_write(struct file *filp, const char __user *ubuf,
4767                       size_t cnt, loff_t *ppos)
4768 {
4769         struct inode *inode = file_inode(filp);
4770         struct trace_array *tr = inode->i_private;
4771         unsigned long val;
4772         int ret;
4773
4774         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4775         if (ret)
4776                 return ret;
4777
4778         /* must have at least 1 entry */
4779         if (!val)
4780                 return -EINVAL;
4781
4782         /* value is in KB */
4783         val <<= 10;
4784         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4785         if (ret < 0)
4786                 return ret;
4787
4788         *ppos += cnt;
4789
4790         return cnt;
4791 }
4792
4793 static ssize_t
4794 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4795                                 size_t cnt, loff_t *ppos)
4796 {
4797         struct trace_array *tr = filp->private_data;
4798         char buf[64];
4799         int r, cpu;
4800         unsigned long size = 0, expanded_size = 0;
4801
4802         mutex_lock(&trace_types_lock);
4803         for_each_tracing_cpu(cpu) {
4804                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4805                 if (!ring_buffer_expanded)
4806                         expanded_size += trace_buf_size >> 10;
4807         }
4808         if (ring_buffer_expanded)
4809                 r = sprintf(buf, "%lu\n", size);
4810         else
4811                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4812         mutex_unlock(&trace_types_lock);
4813
4814         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4815 }
4816
4817 static ssize_t
4818 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4819                           size_t cnt, loff_t *ppos)
4820 {
4821         /*
4822          * There is no need to read what the user has written, this function
4823          * is just to make sure that there is no error when "echo" is used
4824          */
4825
4826         *ppos += cnt;
4827
4828         return cnt;
4829 }
4830
4831 static int
4832 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4833 {
4834         struct trace_array *tr = inode->i_private;
4835
4836         /* disable tracing ? */
4837         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4838                 tracer_tracing_off(tr);
4839         /* resize the ring buffer to 0 */
4840         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4841
4842         trace_array_put(tr);
4843
4844         return 0;
4845 }
4846
4847 static ssize_t
4848 tracing_mark_write(struct file *filp, const char __user *ubuf,
4849                                         size_t cnt, loff_t *fpos)
4850 {
4851         unsigned long addr = (unsigned long)ubuf;
4852         struct trace_array *tr = filp->private_data;
4853         struct ring_buffer_event *event;
4854         struct ring_buffer *buffer;
4855         struct print_entry *entry;
4856         unsigned long irq_flags;
4857         struct page *pages[2];
4858         void *map_page[2];
4859         int nr_pages = 1;
4860         ssize_t written;
4861         int offset;
4862         int size;
4863         int len;
4864         int ret;
4865         int i;
4866
4867         if (tracing_disabled)
4868                 return -EINVAL;
4869
4870         if (!(trace_flags & TRACE_ITER_MARKERS))
4871                 return -EINVAL;
4872
4873         if (cnt > TRACE_BUF_SIZE)
4874                 cnt = TRACE_BUF_SIZE;
4875
4876         /*
4877          * Userspace is injecting traces into the kernel trace buffer.
4878          * We want to be as non intrusive as possible.
4879          * To do so, we do not want to allocate any special buffers
4880          * or take any locks, but instead write the userspace data
4881          * straight into the ring buffer.
4882          *
4883          * First we need to pin the userspace buffer into memory,
4884          * which, most likely it is, because it just referenced it.
4885          * But there's no guarantee that it is. By using get_user_pages_fast()
4886          * and kmap_atomic/kunmap_atomic() we can get access to the
4887          * pages directly. We then write the data directly into the
4888          * ring buffer.
4889          */
4890         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4891
4892         /* check if we cross pages */
4893         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4894                 nr_pages = 2;
4895
4896         offset = addr & (PAGE_SIZE - 1);
4897         addr &= PAGE_MASK;
4898
4899         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4900         if (ret < nr_pages) {
4901                 while (--ret >= 0)
4902                         put_page(pages[ret]);
4903                 written = -EFAULT;
4904                 goto out;
4905         }
4906
4907         for (i = 0; i < nr_pages; i++)
4908                 map_page[i] = kmap_atomic(pages[i]);
4909
4910         local_save_flags(irq_flags);
4911         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4912         buffer = tr->trace_buffer.buffer;
4913         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4914                                           irq_flags, preempt_count());
4915         if (!event) {
4916                 /* Ring buffer disabled, return as if not open for write */
4917                 written = -EBADF;
4918                 goto out_unlock;
4919         }
4920
4921         entry = ring_buffer_event_data(event);
4922         entry->ip = _THIS_IP_;
4923
4924         if (nr_pages == 2) {
4925                 len = PAGE_SIZE - offset;
4926                 memcpy(&entry->buf, map_page[0] + offset, len);
4927                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4928         } else
4929                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4930
4931         if (entry->buf[cnt - 1] != '\n') {
4932                 entry->buf[cnt] = '\n';
4933                 entry->buf[cnt + 1] = '\0';
4934         } else
4935                 entry->buf[cnt] = '\0';
4936
4937         __buffer_unlock_commit(buffer, event);
4938
4939         written = cnt;
4940
4941         *fpos += written;
4942
4943  out_unlock:
4944         for (i = 0; i < nr_pages; i++){
4945                 kunmap_atomic(map_page[i]);
4946                 put_page(pages[i]);
4947         }
4948  out:
4949         return written;
4950 }
4951
4952 static int tracing_clock_show(struct seq_file *m, void *v)
4953 {
4954         struct trace_array *tr = m->private;
4955         int i;
4956
4957         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4958                 seq_printf(m,
4959                         "%s%s%s%s", i ? " " : "",
4960                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4961                         i == tr->clock_id ? "]" : "");
4962         seq_putc(m, '\n');
4963
4964         return 0;
4965 }
4966
4967 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4968 {
4969         int i;
4970
4971         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4972                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4973                         break;
4974         }
4975         if (i == ARRAY_SIZE(trace_clocks))
4976                 return -EINVAL;
4977
4978         mutex_lock(&trace_types_lock);
4979
4980         tr->clock_id = i;
4981
4982         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4983
4984         /*
4985          * New clock may not be consistent with the previous clock.
4986          * Reset the buffer so that it doesn't have incomparable timestamps.
4987          */
4988         tracing_reset_online_cpus(&tr->trace_buffer);
4989
4990 #ifdef CONFIG_TRACER_MAX_TRACE
4991         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4992                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4993         tracing_reset_online_cpus(&tr->max_buffer);
4994 #endif
4995
4996         mutex_unlock(&trace_types_lock);
4997
4998         return 0;
4999 }
5000
5001 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5002                                    size_t cnt, loff_t *fpos)
5003 {
5004         struct seq_file *m = filp->private_data;
5005         struct trace_array *tr = m->private;
5006         char buf[64];
5007         const char *clockstr;
5008         int ret;
5009
5010         if (cnt >= sizeof(buf))
5011                 return -EINVAL;
5012
5013         if (copy_from_user(&buf, ubuf, cnt))
5014                 return -EFAULT;
5015
5016         buf[cnt] = 0;
5017
5018         clockstr = strstrip(buf);
5019
5020         ret = tracing_set_clock(tr, clockstr);
5021         if (ret)
5022                 return ret;
5023
5024         *fpos += cnt;
5025
5026         return cnt;
5027 }
5028
5029 static int tracing_clock_open(struct inode *inode, struct file *file)
5030 {
5031         struct trace_array *tr = inode->i_private;
5032         int ret;
5033
5034         if (tracing_disabled)
5035                 return -ENODEV;
5036
5037         if (trace_array_get(tr))
5038                 return -ENODEV;
5039
5040         ret = single_open(file, tracing_clock_show, inode->i_private);
5041         if (ret < 0)
5042                 trace_array_put(tr);
5043
5044         return ret;
5045 }
5046
5047 struct ftrace_buffer_info {
5048         struct trace_iterator   iter;
5049         void                    *spare;
5050         unsigned int            read;
5051 };
5052
5053 #ifdef CONFIG_TRACER_SNAPSHOT
5054 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5055 {
5056         struct trace_array *tr = inode->i_private;
5057         struct trace_iterator *iter;
5058         struct seq_file *m;
5059         int ret = 0;
5060
5061         if (trace_array_get(tr) < 0)
5062                 return -ENODEV;
5063
5064         if (file->f_mode & FMODE_READ) {
5065                 iter = __tracing_open(inode, file, true);
5066                 if (IS_ERR(iter))
5067                         ret = PTR_ERR(iter);
5068         } else {
5069                 /* Writes still need the seq_file to hold the private data */
5070                 ret = -ENOMEM;
5071                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5072                 if (!m)
5073                         goto out;
5074                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5075                 if (!iter) {
5076                         kfree(m);
5077                         goto out;
5078                 }
5079                 ret = 0;
5080
5081                 iter->tr = tr;
5082                 iter->trace_buffer = &tr->max_buffer;
5083                 iter->cpu_file = tracing_get_cpu(inode);
5084                 m->private = iter;
5085                 file->private_data = m;
5086         }
5087 out:
5088         if (ret < 0)
5089                 trace_array_put(tr);
5090
5091         return ret;
5092 }
5093
5094 static ssize_t
5095 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5096                        loff_t *ppos)
5097 {
5098         struct seq_file *m = filp->private_data;
5099         struct trace_iterator *iter = m->private;
5100         struct trace_array *tr = iter->tr;
5101         unsigned long val;
5102         int ret;
5103
5104         ret = tracing_update_buffers();
5105         if (ret < 0)
5106                 return ret;
5107
5108         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5109         if (ret)
5110                 return ret;
5111
5112         mutex_lock(&trace_types_lock);
5113
5114         if (tr->current_trace->use_max_tr) {
5115                 ret = -EBUSY;
5116                 goto out;
5117         }
5118
5119         switch (val) {
5120         case 0:
5121                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5122                         ret = -EINVAL;
5123                         break;
5124                 }
5125                 if (tr->allocated_snapshot)
5126                         free_snapshot(tr);
5127                 break;
5128         case 1:
5129 /* Only allow per-cpu swap if the ring buffer supports it */
5130 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5131                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5132                         ret = -EINVAL;
5133                         break;
5134                 }
5135 #endif
5136                 if (!tr->allocated_snapshot) {
5137                         ret = alloc_snapshot(tr);
5138                         if (ret < 0)
5139                                 break;
5140                 }
5141                 local_irq_disable();
5142                 /* Now, we're going to swap */
5143                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5144                         update_max_tr(tr, current, smp_processor_id());
5145                 else
5146                         update_max_tr_single(tr, current, iter->cpu_file);
5147                 local_irq_enable();
5148                 break;
5149         default:
5150                 if (tr->allocated_snapshot) {
5151                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5152                                 tracing_reset_online_cpus(&tr->max_buffer);
5153                         else
5154                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5155                 }
5156                 break;
5157         }
5158
5159         if (ret >= 0) {
5160                 *ppos += cnt;
5161                 ret = cnt;
5162         }
5163 out:
5164         mutex_unlock(&trace_types_lock);
5165         return ret;
5166 }
5167
5168 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5169 {
5170         struct seq_file *m = file->private_data;
5171         int ret;
5172
5173         ret = tracing_release(inode, file);
5174
5175         if (file->f_mode & FMODE_READ)
5176                 return ret;
5177
5178         /* If write only, the seq_file is just a stub */
5179         if (m)
5180                 kfree(m->private);
5181         kfree(m);
5182
5183         return 0;
5184 }
5185
5186 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5187 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5188                                     size_t count, loff_t *ppos);
5189 static int tracing_buffers_release(struct inode *inode, struct file *file);
5190 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5191                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5192
5193 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5194 {
5195         struct ftrace_buffer_info *info;
5196         int ret;
5197
5198         ret = tracing_buffers_open(inode, filp);
5199         if (ret < 0)
5200                 return ret;
5201
5202         info = filp->private_data;
5203
5204         if (info->iter.trace->use_max_tr) {
5205                 tracing_buffers_release(inode, filp);
5206                 return -EBUSY;
5207         }
5208
5209         info->iter.snapshot = true;
5210         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5211
5212         return ret;
5213 }
5214
5215 #endif /* CONFIG_TRACER_SNAPSHOT */
5216
5217
5218 static const struct file_operations tracing_thresh_fops = {
5219         .open           = tracing_open_generic,
5220         .read           = tracing_thresh_read,
5221         .write          = tracing_thresh_write,
5222         .llseek         = generic_file_llseek,
5223 };
5224
5225 static const struct file_operations tracing_max_lat_fops = {
5226         .open           = tracing_open_generic,
5227         .read           = tracing_max_lat_read,
5228         .write          = tracing_max_lat_write,
5229         .llseek         = generic_file_llseek,
5230 };
5231
5232 static const struct file_operations set_tracer_fops = {
5233         .open           = tracing_open_generic,
5234         .read           = tracing_set_trace_read,
5235         .write          = tracing_set_trace_write,
5236         .llseek         = generic_file_llseek,
5237 };
5238
5239 static const struct file_operations tracing_pipe_fops = {
5240         .open           = tracing_open_pipe,
5241         .poll           = tracing_poll_pipe,
5242         .read           = tracing_read_pipe,
5243         .splice_read    = tracing_splice_read_pipe,
5244         .release        = tracing_release_pipe,
5245         .llseek         = no_llseek,
5246 };
5247
5248 static const struct file_operations tracing_entries_fops = {
5249         .open           = tracing_open_generic_tr,
5250         .read           = tracing_entries_read,
5251         .write          = tracing_entries_write,
5252         .llseek         = generic_file_llseek,
5253         .release        = tracing_release_generic_tr,
5254 };
5255
5256 static const struct file_operations tracing_total_entries_fops = {
5257         .open           = tracing_open_generic_tr,
5258         .read           = tracing_total_entries_read,
5259         .llseek         = generic_file_llseek,
5260         .release        = tracing_release_generic_tr,
5261 };
5262
5263 static const struct file_operations tracing_free_buffer_fops = {
5264         .open           = tracing_open_generic_tr,
5265         .write          = tracing_free_buffer_write,
5266         .release        = tracing_free_buffer_release,
5267 };
5268
5269 static const struct file_operations tracing_mark_fops = {
5270         .open           = tracing_open_generic_tr,
5271         .write          = tracing_mark_write,
5272         .llseek         = generic_file_llseek,
5273         .release        = tracing_release_generic_tr,
5274 };
5275
5276 static const struct file_operations trace_clock_fops = {
5277         .open           = tracing_clock_open,
5278         .read           = seq_read,
5279         .llseek         = seq_lseek,
5280         .release        = tracing_single_release_tr,
5281         .write          = tracing_clock_write,
5282 };
5283
5284 #ifdef CONFIG_TRACER_SNAPSHOT
5285 static const struct file_operations snapshot_fops = {
5286         .open           = tracing_snapshot_open,
5287         .read           = seq_read,
5288         .write          = tracing_snapshot_write,
5289         .llseek         = tracing_lseek,
5290         .release        = tracing_snapshot_release,
5291 };
5292
5293 static const struct file_operations snapshot_raw_fops = {
5294         .open           = snapshot_raw_open,
5295         .read           = tracing_buffers_read,
5296         .release        = tracing_buffers_release,
5297         .splice_read    = tracing_buffers_splice_read,
5298         .llseek         = no_llseek,
5299 };
5300
5301 #endif /* CONFIG_TRACER_SNAPSHOT */
5302
5303 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5304 {
5305         struct trace_array *tr = inode->i_private;
5306         struct ftrace_buffer_info *info;
5307         int ret;
5308
5309         if (tracing_disabled)
5310                 return -ENODEV;
5311
5312         if (trace_array_get(tr) < 0)
5313                 return -ENODEV;
5314
5315         info = kzalloc(sizeof(*info), GFP_KERNEL);
5316         if (!info) {
5317                 trace_array_put(tr);
5318                 return -ENOMEM;
5319         }
5320
5321         mutex_lock(&trace_types_lock);
5322
5323         info->iter.tr           = tr;
5324         info->iter.cpu_file     = tracing_get_cpu(inode);
5325         info->iter.trace        = tr->current_trace;
5326         info->iter.trace_buffer = &tr->trace_buffer;
5327         info->spare             = NULL;
5328         /* Force reading ring buffer for first read */
5329         info->read              = (unsigned int)-1;
5330
5331         filp->private_data = info;
5332
5333         tr->current_trace->ref++;
5334
5335         mutex_unlock(&trace_types_lock);
5336
5337         ret = nonseekable_open(inode, filp);
5338         if (ret < 0)
5339                 trace_array_put(tr);
5340
5341         return ret;
5342 }
5343
5344 static unsigned int
5345 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5346 {
5347         struct ftrace_buffer_info *info = filp->private_data;
5348         struct trace_iterator *iter = &info->iter;
5349
5350         return trace_poll(iter, filp, poll_table);
5351 }
5352
5353 static ssize_t
5354 tracing_buffers_read(struct file *filp, char __user *ubuf,
5355                      size_t count, loff_t *ppos)
5356 {
5357         struct ftrace_buffer_info *info = filp->private_data;
5358         struct trace_iterator *iter = &info->iter;
5359         ssize_t ret;
5360         ssize_t size;
5361
5362         if (!count)
5363                 return 0;
5364
5365 #ifdef CONFIG_TRACER_MAX_TRACE
5366         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5367                 return -EBUSY;
5368 #endif
5369
5370         if (!info->spare)
5371                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5372                                                           iter->cpu_file);
5373         if (!info->spare)
5374                 return -ENOMEM;
5375
5376         /* Do we have previous read data to read? */
5377         if (info->read < PAGE_SIZE)
5378                 goto read;
5379
5380  again:
5381         trace_access_lock(iter->cpu_file);
5382         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5383                                     &info->spare,
5384                                     count,
5385                                     iter->cpu_file, 0);
5386         trace_access_unlock(iter->cpu_file);
5387
5388         if (ret < 0) {
5389                 if (trace_empty(iter)) {
5390                         if ((filp->f_flags & O_NONBLOCK))
5391                                 return -EAGAIN;
5392
5393                         ret = wait_on_pipe(iter, false);
5394                         if (ret)
5395                                 return ret;
5396
5397                         goto again;
5398                 }
5399                 return 0;
5400         }
5401
5402         info->read = 0;
5403  read:
5404         size = PAGE_SIZE - info->read;
5405         if (size > count)
5406                 size = count;
5407
5408         ret = copy_to_user(ubuf, info->spare + info->read, size);
5409         if (ret == size)
5410                 return -EFAULT;
5411
5412         size -= ret;
5413
5414         *ppos += size;
5415         info->read += size;
5416
5417         return size;
5418 }
5419
5420 static int tracing_buffers_release(struct inode *inode, struct file *file)
5421 {
5422         struct ftrace_buffer_info *info = file->private_data;
5423         struct trace_iterator *iter = &info->iter;
5424
5425         mutex_lock(&trace_types_lock);
5426
5427         iter->tr->current_trace->ref--;
5428
5429         __trace_array_put(iter->tr);
5430
5431         if (info->spare)
5432                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5433         kfree(info);
5434
5435         mutex_unlock(&trace_types_lock);
5436
5437         return 0;
5438 }
5439
5440 struct buffer_ref {
5441         struct ring_buffer      *buffer;
5442         void                    *page;
5443         int                     ref;
5444 };
5445
5446 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5447                                     struct pipe_buffer *buf)
5448 {
5449         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5450
5451         if (--ref->ref)
5452                 return;
5453
5454         ring_buffer_free_read_page(ref->buffer, ref->page);
5455         kfree(ref);
5456         buf->private = 0;
5457 }
5458
5459 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5460                                 struct pipe_buffer *buf)
5461 {
5462         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5463
5464         ref->ref++;
5465 }
5466
5467 /* Pipe buffer operations for a buffer. */
5468 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5469         .can_merge              = 0,
5470         .confirm                = generic_pipe_buf_confirm,
5471         .release                = buffer_pipe_buf_release,
5472         .steal                  = generic_pipe_buf_steal,
5473         .get                    = buffer_pipe_buf_get,
5474 };
5475
5476 /*
5477  * Callback from splice_to_pipe(), if we need to release some pages
5478  * at the end of the spd in case we error'ed out in filling the pipe.
5479  */
5480 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5481 {
5482         struct buffer_ref *ref =
5483                 (struct buffer_ref *)spd->partial[i].private;
5484
5485         if (--ref->ref)
5486                 return;
5487
5488         ring_buffer_free_read_page(ref->buffer, ref->page);
5489         kfree(ref);
5490         spd->partial[i].private = 0;
5491 }
5492
5493 static ssize_t
5494 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5495                             struct pipe_inode_info *pipe, size_t len,
5496                             unsigned int flags)
5497 {
5498         struct ftrace_buffer_info *info = file->private_data;
5499         struct trace_iterator *iter = &info->iter;
5500         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5501         struct page *pages_def[PIPE_DEF_BUFFERS];
5502         struct splice_pipe_desc spd = {
5503                 .pages          = pages_def,
5504                 .partial        = partial_def,
5505                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5506                 .flags          = flags,
5507                 .ops            = &buffer_pipe_buf_ops,
5508                 .spd_release    = buffer_spd_release,
5509         };
5510         struct buffer_ref *ref;
5511         int entries, size, i;
5512         ssize_t ret = 0;
5513
5514 #ifdef CONFIG_TRACER_MAX_TRACE
5515         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5516                 return -EBUSY;
5517 #endif
5518
5519         if (splice_grow_spd(pipe, &spd))
5520                 return -ENOMEM;
5521
5522         if (*ppos & (PAGE_SIZE - 1))
5523                 return -EINVAL;
5524
5525         if (len & (PAGE_SIZE - 1)) {
5526                 if (len < PAGE_SIZE)
5527                         return -EINVAL;
5528                 len &= PAGE_MASK;
5529         }
5530
5531  again:
5532         trace_access_lock(iter->cpu_file);
5533         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5534
5535         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5536                 struct page *page;
5537                 int r;
5538
5539                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5540                 if (!ref) {
5541                         ret = -ENOMEM;
5542                         break;
5543                 }
5544
5545                 ref->ref = 1;
5546                 ref->buffer = iter->trace_buffer->buffer;
5547                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5548                 if (!ref->page) {
5549                         ret = -ENOMEM;
5550                         kfree(ref);
5551                         break;
5552                 }
5553
5554                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5555                                           len, iter->cpu_file, 1);
5556                 if (r < 0) {
5557                         ring_buffer_free_read_page(ref->buffer, ref->page);
5558                         kfree(ref);
5559                         break;
5560                 }
5561
5562                 /*
5563                  * zero out any left over data, this is going to
5564                  * user land.
5565                  */
5566                 size = ring_buffer_page_len(ref->page);
5567                 if (size < PAGE_SIZE)
5568                         memset(ref->page + size, 0, PAGE_SIZE - size);
5569
5570                 page = virt_to_page(ref->page);
5571
5572                 spd.pages[i] = page;
5573                 spd.partial[i].len = PAGE_SIZE;
5574                 spd.partial[i].offset = 0;
5575                 spd.partial[i].private = (unsigned long)ref;
5576                 spd.nr_pages++;
5577                 *ppos += PAGE_SIZE;
5578
5579                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5580         }
5581
5582         trace_access_unlock(iter->cpu_file);
5583         spd.nr_pages = i;
5584
5585         /* did we read anything? */
5586         if (!spd.nr_pages) {
5587                 if (ret)
5588                         return ret;
5589
5590                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5591                         return -EAGAIN;
5592
5593                 ret = wait_on_pipe(iter, true);
5594                 if (ret)
5595                         return ret;
5596
5597                 goto again;
5598         }
5599
5600         ret = splice_to_pipe(pipe, &spd);
5601         splice_shrink_spd(&spd);
5602
5603         return ret;
5604 }
5605
5606 static const struct file_operations tracing_buffers_fops = {
5607         .open           = tracing_buffers_open,
5608         .read           = tracing_buffers_read,
5609         .poll           = tracing_buffers_poll,
5610         .release        = tracing_buffers_release,
5611         .splice_read    = tracing_buffers_splice_read,
5612         .llseek         = no_llseek,
5613 };
5614
5615 static ssize_t
5616 tracing_stats_read(struct file *filp, char __user *ubuf,
5617                    size_t count, loff_t *ppos)
5618 {
5619         struct inode *inode = file_inode(filp);
5620         struct trace_array *tr = inode->i_private;
5621         struct trace_buffer *trace_buf = &tr->trace_buffer;
5622         int cpu = tracing_get_cpu(inode);
5623         struct trace_seq *s;
5624         unsigned long cnt;
5625         unsigned long long t;
5626         unsigned long usec_rem;
5627
5628         s = kmalloc(sizeof(*s), GFP_KERNEL);
5629         if (!s)
5630                 return -ENOMEM;
5631
5632         trace_seq_init(s);
5633
5634         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5635         trace_seq_printf(s, "entries: %ld\n", cnt);
5636
5637         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5638         trace_seq_printf(s, "overrun: %ld\n", cnt);
5639
5640         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5641         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5642
5643         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5644         trace_seq_printf(s, "bytes: %ld\n", cnt);
5645
5646         if (trace_clocks[tr->clock_id].in_ns) {
5647                 /* local or global for trace_clock */
5648                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5649                 usec_rem = do_div(t, USEC_PER_SEC);
5650                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5651                                                                 t, usec_rem);
5652
5653                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5654                 usec_rem = do_div(t, USEC_PER_SEC);
5655                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5656         } else {
5657                 /* counter or tsc mode for trace_clock */
5658                 trace_seq_printf(s, "oldest event ts: %llu\n",
5659                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5660
5661                 trace_seq_printf(s, "now ts: %llu\n",
5662                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5663         }
5664
5665         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5666         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5667
5668         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5669         trace_seq_printf(s, "read events: %ld\n", cnt);
5670
5671         count = simple_read_from_buffer(ubuf, count, ppos,
5672                                         s->buffer, trace_seq_used(s));
5673
5674         kfree(s);
5675
5676         return count;
5677 }
5678
5679 static const struct file_operations tracing_stats_fops = {
5680         .open           = tracing_open_generic_tr,
5681         .read           = tracing_stats_read,
5682         .llseek         = generic_file_llseek,
5683         .release        = tracing_release_generic_tr,
5684 };
5685
5686 #ifdef CONFIG_DYNAMIC_FTRACE
5687
5688 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5689 {
5690         return 0;
5691 }
5692
5693 static ssize_t
5694 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5695                   size_t cnt, loff_t *ppos)
5696 {
5697         static char ftrace_dyn_info_buffer[1024];
5698         static DEFINE_MUTEX(dyn_info_mutex);
5699         unsigned long *p = filp->private_data;
5700         char *buf = ftrace_dyn_info_buffer;
5701         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5702         int r;
5703
5704         mutex_lock(&dyn_info_mutex);
5705         r = sprintf(buf, "%ld ", *p);
5706
5707         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5708         buf[r++] = '\n';
5709
5710         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5711
5712         mutex_unlock(&dyn_info_mutex);
5713
5714         return r;
5715 }
5716
5717 static const struct file_operations tracing_dyn_info_fops = {
5718         .open           = tracing_open_generic,
5719         .read           = tracing_read_dyn_info,
5720         .llseek         = generic_file_llseek,
5721 };
5722 #endif /* CONFIG_DYNAMIC_FTRACE */
5723
5724 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5725 static void
5726 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5727 {
5728         tracing_snapshot();
5729 }
5730
5731 static void
5732 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5733 {
5734         unsigned long *count = (long *)data;
5735
5736         if (!*count)
5737                 return;
5738
5739         if (*count != -1)
5740                 (*count)--;
5741
5742         tracing_snapshot();
5743 }
5744
5745 static int
5746 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5747                       struct ftrace_probe_ops *ops, void *data)
5748 {
5749         long count = (long)data;
5750
5751         seq_printf(m, "%ps:", (void *)ip);
5752
5753         seq_puts(m, "snapshot");
5754
5755         if (count == -1)
5756                 seq_puts(m, ":unlimited\n");
5757         else
5758                 seq_printf(m, ":count=%ld\n", count);
5759
5760         return 0;
5761 }
5762
5763 static struct ftrace_probe_ops snapshot_probe_ops = {
5764         .func                   = ftrace_snapshot,
5765         .print                  = ftrace_snapshot_print,
5766 };
5767
5768 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5769         .func                   = ftrace_count_snapshot,
5770         .print                  = ftrace_snapshot_print,
5771 };
5772
5773 static int
5774 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5775                                char *glob, char *cmd, char *param, int enable)
5776 {
5777         struct ftrace_probe_ops *ops;
5778         void *count = (void *)-1;
5779         char *number;
5780         int ret;
5781
5782         /* hash funcs only work with set_ftrace_filter */
5783         if (!enable)
5784                 return -EINVAL;
5785
5786         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5787
5788         if (glob[0] == '!') {
5789                 unregister_ftrace_function_probe_func(glob+1, ops);
5790                 return 0;
5791         }
5792
5793         if (!param)
5794                 goto out_reg;
5795
5796         number = strsep(&param, ":");
5797
5798         if (!strlen(number))
5799                 goto out_reg;
5800
5801         /*
5802          * We use the callback data field (which is a pointer)
5803          * as our counter.
5804          */
5805         ret = kstrtoul(number, 0, (unsigned long *)&count);
5806         if (ret)
5807                 return ret;
5808
5809  out_reg:
5810         ret = register_ftrace_function_probe(glob, ops, count);
5811
5812         if (ret >= 0)
5813                 alloc_snapshot(&global_trace);
5814
5815         return ret < 0 ? ret : 0;
5816 }
5817
5818 static struct ftrace_func_command ftrace_snapshot_cmd = {
5819         .name                   = "snapshot",
5820         .func                   = ftrace_trace_snapshot_callback,
5821 };
5822
5823 static __init int register_snapshot_cmd(void)
5824 {
5825         return register_ftrace_command(&ftrace_snapshot_cmd);
5826 }
5827 #else
5828 static inline __init int register_snapshot_cmd(void) { return 0; }
5829 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5830
5831 static struct dentry *tracing_get_dentry(struct trace_array *tr)
5832 {
5833         if (WARN_ON(!tr->dir))
5834                 return ERR_PTR(-ENODEV);
5835
5836         /* Top directory uses NULL as the parent */
5837         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5838                 return NULL;
5839
5840         /* All sub buffers have a descriptor */
5841         return tr->dir;
5842 }
5843
5844 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5845 {
5846         struct dentry *d_tracer;
5847
5848         if (tr->percpu_dir)
5849                 return tr->percpu_dir;
5850
5851         d_tracer = tracing_get_dentry(tr);
5852         if (IS_ERR(d_tracer))
5853                 return NULL;
5854
5855         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
5856
5857         WARN_ONCE(!tr->percpu_dir,
5858                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
5859
5860         return tr->percpu_dir;
5861 }
5862
5863 static struct dentry *
5864 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5865                       void *data, long cpu, const struct file_operations *fops)
5866 {
5867         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5868
5869         if (ret) /* See tracing_get_cpu() */
5870                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5871         return ret;
5872 }
5873
5874 static void
5875 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
5876 {
5877         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5878         struct dentry *d_cpu;
5879         char cpu_dir[30]; /* 30 characters should be more than enough */
5880
5881         if (!d_percpu)
5882                 return;
5883
5884         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5885         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
5886         if (!d_cpu) {
5887                 pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
5888                 return;
5889         }
5890
5891         /* per cpu trace_pipe */
5892         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5893                                 tr, cpu, &tracing_pipe_fops);
5894
5895         /* per cpu trace */
5896         trace_create_cpu_file("trace", 0644, d_cpu,
5897                                 tr, cpu, &tracing_fops);
5898
5899         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5900                                 tr, cpu, &tracing_buffers_fops);
5901
5902         trace_create_cpu_file("stats", 0444, d_cpu,
5903                                 tr, cpu, &tracing_stats_fops);
5904
5905         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5906                                 tr, cpu, &tracing_entries_fops);
5907
5908 #ifdef CONFIG_TRACER_SNAPSHOT
5909         trace_create_cpu_file("snapshot", 0644, d_cpu,
5910                                 tr, cpu, &snapshot_fops);
5911
5912         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5913                                 tr, cpu, &snapshot_raw_fops);
5914 #endif
5915 }
5916
5917 #ifdef CONFIG_FTRACE_SELFTEST
5918 /* Let selftest have access to static functions in this file */
5919 #include "trace_selftest.c"
5920 #endif
5921
5922 struct trace_option_dentry {
5923         struct tracer_opt               *opt;
5924         struct tracer_flags             *flags;
5925         struct trace_array              *tr;
5926         struct dentry                   *entry;
5927 };
5928
5929 static ssize_t
5930 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5931                         loff_t *ppos)
5932 {
5933         struct trace_option_dentry *topt = filp->private_data;
5934         char *buf;
5935
5936         if (topt->flags->val & topt->opt->bit)
5937                 buf = "1\n";
5938         else
5939                 buf = "0\n";
5940
5941         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5942 }
5943
5944 static ssize_t
5945 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5946                          loff_t *ppos)
5947 {
5948         struct trace_option_dentry *topt = filp->private_data;
5949         unsigned long val;
5950         int ret;
5951
5952         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5953         if (ret)
5954                 return ret;
5955
5956         if (val != 0 && val != 1)
5957                 return -EINVAL;
5958
5959         if (!!(topt->flags->val & topt->opt->bit) != val) {
5960                 mutex_lock(&trace_types_lock);
5961                 ret = __set_tracer_option(topt->tr, topt->flags,
5962                                           topt->opt, !val);
5963                 mutex_unlock(&trace_types_lock);
5964                 if (ret)
5965                         return ret;
5966         }
5967
5968         *ppos += cnt;
5969
5970         return cnt;
5971 }
5972
5973
5974 static const struct file_operations trace_options_fops = {
5975         .open = tracing_open_generic,
5976         .read = trace_options_read,
5977         .write = trace_options_write,
5978         .llseek = generic_file_llseek,
5979 };
5980
5981 static ssize_t
5982 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5983                         loff_t *ppos)
5984 {
5985         long index = (long)filp->private_data;
5986         char *buf;
5987
5988         if (trace_flags & (1 << index))
5989                 buf = "1\n";
5990         else
5991                 buf = "0\n";
5992
5993         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5994 }
5995
5996 static ssize_t
5997 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5998                          loff_t *ppos)
5999 {
6000         struct trace_array *tr = &global_trace;
6001         long index = (long)filp->private_data;
6002         unsigned long val;
6003         int ret;
6004
6005         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6006         if (ret)
6007                 return ret;
6008
6009         if (val != 0 && val != 1)
6010                 return -EINVAL;
6011
6012         mutex_lock(&trace_types_lock);
6013         ret = set_tracer_flag(tr, 1 << index, val);
6014         mutex_unlock(&trace_types_lock);
6015
6016         if (ret < 0)
6017                 return ret;
6018
6019         *ppos += cnt;
6020
6021         return cnt;
6022 }
6023
6024 static const struct file_operations trace_options_core_fops = {
6025         .open = tracing_open_generic,
6026         .read = trace_options_core_read,
6027         .write = trace_options_core_write,
6028         .llseek = generic_file_llseek,
6029 };
6030
6031 struct dentry *trace_create_file(const char *name,
6032                                  umode_t mode,
6033                                  struct dentry *parent,
6034                                  void *data,
6035                                  const struct file_operations *fops)
6036 {
6037         struct dentry *ret;
6038
6039         ret = tracefs_create_file(name, mode, parent, data, fops);
6040         if (!ret)
6041                 pr_warning("Could not create tracefs '%s' entry\n", name);
6042
6043         return ret;
6044 }
6045
6046
6047 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6048 {
6049         struct dentry *d_tracer;
6050
6051         if (tr->options)
6052                 return tr->options;
6053
6054         d_tracer = tracing_get_dentry(tr);
6055         if (IS_ERR(d_tracer))
6056                 return NULL;
6057
6058         tr->options = tracefs_create_dir("options", d_tracer);
6059         if (!tr->options) {
6060                 pr_warning("Could not create tracefs directory 'options'\n");
6061                 return NULL;
6062         }
6063
6064         return tr->options;
6065 }
6066
6067 static void
6068 create_trace_option_file(struct trace_array *tr,
6069                          struct trace_option_dentry *topt,
6070                          struct tracer_flags *flags,
6071                          struct tracer_opt *opt)
6072 {
6073         struct dentry *t_options;
6074
6075         t_options = trace_options_init_dentry(tr);
6076         if (!t_options)
6077                 return;
6078
6079         topt->flags = flags;
6080         topt->opt = opt;
6081         topt->tr = tr;
6082
6083         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6084                                     &trace_options_fops);
6085
6086 }
6087
6088 static struct trace_option_dentry *
6089 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6090 {
6091         struct trace_option_dentry *topts;
6092         struct tracer_flags *flags;
6093         struct tracer_opt *opts;
6094         int cnt;
6095
6096         if (!tracer)
6097                 return NULL;
6098
6099         flags = tracer->flags;
6100
6101         if (!flags || !flags->opts)
6102                 return NULL;
6103
6104         opts = flags->opts;
6105
6106         for (cnt = 0; opts[cnt].name; cnt++)
6107                 ;
6108
6109         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6110         if (!topts)
6111                 return NULL;
6112
6113         for (cnt = 0; opts[cnt].name; cnt++)
6114                 create_trace_option_file(tr, &topts[cnt], flags,
6115                                          &opts[cnt]);
6116
6117         return topts;
6118 }
6119
6120 static void
6121 destroy_trace_option_files(struct trace_option_dentry *topts)
6122 {
6123         int cnt;
6124
6125         if (!topts)
6126                 return;
6127
6128         for (cnt = 0; topts[cnt].opt; cnt++)
6129                 tracefs_remove(topts[cnt].entry);
6130
6131         kfree(topts);
6132 }
6133
6134 static struct dentry *
6135 create_trace_option_core_file(struct trace_array *tr,
6136                               const char *option, long index)
6137 {
6138         struct dentry *t_options;
6139
6140         t_options = trace_options_init_dentry(tr);
6141         if (!t_options)
6142                 return NULL;
6143
6144         return trace_create_file(option, 0644, t_options, (void *)index,
6145                                     &trace_options_core_fops);
6146 }
6147
6148 static __init void create_trace_options_dir(struct trace_array *tr)
6149 {
6150         struct dentry *t_options;
6151         int i;
6152
6153         t_options = trace_options_init_dentry(tr);
6154         if (!t_options)
6155                 return;
6156
6157         for (i = 0; trace_options[i]; i++)
6158                 create_trace_option_core_file(tr, trace_options[i], i);
6159 }
6160
6161 static ssize_t
6162 rb_simple_read(struct file *filp, char __user *ubuf,
6163                size_t cnt, loff_t *ppos)
6164 {
6165         struct trace_array *tr = filp->private_data;
6166         char buf[64];
6167         int r;
6168
6169         r = tracer_tracing_is_on(tr);
6170         r = sprintf(buf, "%d\n", r);
6171
6172         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6173 }
6174
6175 static ssize_t
6176 rb_simple_write(struct file *filp, const char __user *ubuf,
6177                 size_t cnt, loff_t *ppos)
6178 {
6179         struct trace_array *tr = filp->private_data;
6180         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6181         unsigned long val;
6182         int ret;
6183
6184         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6185         if (ret)
6186                 return ret;
6187
6188         if (buffer) {
6189                 mutex_lock(&trace_types_lock);
6190                 if (val) {
6191                         tracer_tracing_on(tr);
6192                         if (tr->current_trace->start)
6193                                 tr->current_trace->start(tr);
6194                 } else {
6195                         tracer_tracing_off(tr);
6196                         if (tr->current_trace->stop)
6197                                 tr->current_trace->stop(tr);
6198                 }
6199                 mutex_unlock(&trace_types_lock);
6200         }
6201
6202         (*ppos)++;
6203
6204         return cnt;
6205 }
6206
6207 static const struct file_operations rb_simple_fops = {
6208         .open           = tracing_open_generic_tr,
6209         .read           = rb_simple_read,
6210         .write          = rb_simple_write,
6211         .release        = tracing_release_generic_tr,
6212         .llseek         = default_llseek,
6213 };
6214
6215 struct dentry *trace_instance_dir;
6216
6217 static void
6218 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6219
6220 static int
6221 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6222 {
6223         enum ring_buffer_flags rb_flags;
6224
6225         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6226
6227         buf->tr = tr;
6228
6229         buf->buffer = ring_buffer_alloc(size, rb_flags);
6230         if (!buf->buffer)
6231                 return -ENOMEM;
6232
6233         buf->data = alloc_percpu(struct trace_array_cpu);
6234         if (!buf->data) {
6235                 ring_buffer_free(buf->buffer);
6236                 return -ENOMEM;
6237         }
6238
6239         /* Allocate the first page for all buffers */
6240         set_buffer_entries(&tr->trace_buffer,
6241                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6242
6243         return 0;
6244 }
6245
6246 static int allocate_trace_buffers(struct trace_array *tr, int size)
6247 {
6248         int ret;
6249
6250         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6251         if (ret)
6252                 return ret;
6253
6254 #ifdef CONFIG_TRACER_MAX_TRACE
6255         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6256                                     allocate_snapshot ? size : 1);
6257         if (WARN_ON(ret)) {
6258                 ring_buffer_free(tr->trace_buffer.buffer);
6259                 free_percpu(tr->trace_buffer.data);
6260                 return -ENOMEM;
6261         }
6262         tr->allocated_snapshot = allocate_snapshot;
6263
6264         /*
6265          * Only the top level trace array gets its snapshot allocated
6266          * from the kernel command line.
6267          */
6268         allocate_snapshot = false;
6269 #endif
6270         return 0;
6271 }
6272
6273 static void free_trace_buffer(struct trace_buffer *buf)
6274 {
6275         if (buf->buffer) {
6276                 ring_buffer_free(buf->buffer);
6277                 buf->buffer = NULL;
6278                 free_percpu(buf->data);
6279                 buf->data = NULL;
6280         }
6281 }
6282
6283 static void free_trace_buffers(struct trace_array *tr)
6284 {
6285         if (!tr)
6286                 return;
6287
6288         free_trace_buffer(&tr->trace_buffer);
6289
6290 #ifdef CONFIG_TRACER_MAX_TRACE
6291         free_trace_buffer(&tr->max_buffer);
6292 #endif
6293 }
6294
6295 static int instance_mkdir(const char *name)
6296 {
6297         struct trace_array *tr;
6298         int ret;
6299
6300         mutex_lock(&trace_types_lock);
6301
6302         ret = -EEXIST;
6303         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6304                 if (tr->name && strcmp(tr->name, name) == 0)
6305                         goto out_unlock;
6306         }
6307
6308         ret = -ENOMEM;
6309         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6310         if (!tr)
6311                 goto out_unlock;
6312
6313         tr->name = kstrdup(name, GFP_KERNEL);
6314         if (!tr->name)
6315                 goto out_free_tr;
6316
6317         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6318                 goto out_free_tr;
6319
6320         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6321
6322         raw_spin_lock_init(&tr->start_lock);
6323
6324         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6325
6326         tr->current_trace = &nop_trace;
6327
6328         INIT_LIST_HEAD(&tr->systems);
6329         INIT_LIST_HEAD(&tr->events);
6330
6331         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6332                 goto out_free_tr;
6333
6334         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6335         if (!tr->dir)
6336                 goto out_free_tr;
6337
6338         ret = event_trace_add_tracer(tr->dir, tr);
6339         if (ret) {
6340                 tracefs_remove_recursive(tr->dir);
6341                 goto out_free_tr;
6342         }
6343
6344         init_tracer_tracefs(tr, tr->dir);
6345
6346         list_add(&tr->list, &ftrace_trace_arrays);
6347
6348         mutex_unlock(&trace_types_lock);
6349
6350         return 0;
6351
6352  out_free_tr:
6353         free_trace_buffers(tr);
6354         free_cpumask_var(tr->tracing_cpumask);
6355         kfree(tr->name);
6356         kfree(tr);
6357
6358  out_unlock:
6359         mutex_unlock(&trace_types_lock);
6360
6361         return ret;
6362
6363 }
6364
6365 static int instance_rmdir(const char *name)
6366 {
6367         struct trace_array *tr;
6368         int found = 0;
6369         int ret;
6370
6371         mutex_lock(&trace_types_lock);
6372
6373         ret = -ENODEV;
6374         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6375                 if (tr->name && strcmp(tr->name, name) == 0) {
6376                         found = 1;
6377                         break;
6378                 }
6379         }
6380         if (!found)
6381                 goto out_unlock;
6382
6383         ret = -EBUSY;
6384         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6385                 goto out_unlock;
6386
6387         list_del(&tr->list);
6388
6389         tracing_set_nop(tr);
6390         event_trace_del_tracer(tr);
6391         ftrace_destroy_function_files(tr);
6392         debugfs_remove_recursive(tr->dir);
6393         free_trace_buffers(tr);
6394
6395         kfree(tr->name);
6396         kfree(tr);
6397
6398         ret = 0;
6399
6400  out_unlock:
6401         mutex_unlock(&trace_types_lock);
6402
6403         return ret;
6404 }
6405
6406 static __init void create_trace_instances(struct dentry *d_tracer)
6407 {
6408         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6409                                                          instance_mkdir,
6410                                                          instance_rmdir);
6411         if (WARN_ON(!trace_instance_dir))
6412                 return;
6413 }
6414
6415 static void
6416 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6417 {
6418         int cpu;
6419
6420         trace_create_file("available_tracers", 0444, d_tracer,
6421                         tr, &show_traces_fops);
6422
6423         trace_create_file("current_tracer", 0644, d_tracer,
6424                         tr, &set_tracer_fops);
6425
6426         trace_create_file("tracing_cpumask", 0644, d_tracer,
6427                           tr, &tracing_cpumask_fops);
6428
6429         trace_create_file("trace_options", 0644, d_tracer,
6430                           tr, &tracing_iter_fops);
6431
6432         trace_create_file("trace", 0644, d_tracer,
6433                           tr, &tracing_fops);
6434
6435         trace_create_file("trace_pipe", 0444, d_tracer,
6436                           tr, &tracing_pipe_fops);
6437
6438         trace_create_file("buffer_size_kb", 0644, d_tracer,
6439                           tr, &tracing_entries_fops);
6440
6441         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6442                           tr, &tracing_total_entries_fops);
6443
6444         trace_create_file("free_buffer", 0200, d_tracer,
6445                           tr, &tracing_free_buffer_fops);
6446
6447         trace_create_file("trace_marker", 0220, d_tracer,
6448                           tr, &tracing_mark_fops);
6449
6450         trace_create_file("trace_clock", 0644, d_tracer, tr,
6451                           &trace_clock_fops);
6452
6453         trace_create_file("tracing_on", 0644, d_tracer,
6454                           tr, &rb_simple_fops);
6455
6456 #ifdef CONFIG_TRACER_MAX_TRACE
6457         trace_create_file("tracing_max_latency", 0644, d_tracer,
6458                         &tr->max_latency, &tracing_max_lat_fops);
6459 #endif
6460
6461         if (ftrace_create_function_files(tr, d_tracer))
6462                 WARN(1, "Could not allocate function filter files");
6463
6464 #ifdef CONFIG_TRACER_SNAPSHOT
6465         trace_create_file("snapshot", 0644, d_tracer,
6466                           tr, &snapshot_fops);
6467 #endif
6468
6469         for_each_tracing_cpu(cpu)
6470                 tracing_init_tracefs_percpu(tr, cpu);
6471
6472 }
6473
6474 static struct vfsmount *trace_automount(void *ingore)
6475 {
6476         struct vfsmount *mnt;
6477         struct file_system_type *type;
6478
6479         /*
6480          * To maintain backward compatibility for tools that mount
6481          * debugfs to get to the tracing facility, tracefs is automatically
6482          * mounted to the debugfs/tracing directory.
6483          */
6484         type = get_fs_type("tracefs");
6485         if (!type)
6486                 return NULL;
6487         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6488         put_filesystem(type);
6489         if (IS_ERR(mnt))
6490                 return NULL;
6491         mntget(mnt);
6492
6493         return mnt;
6494 }
6495
6496 /**
6497  * tracing_init_dentry - initialize top level trace array
6498  *
6499  * This is called when creating files or directories in the tracing
6500  * directory. It is called via fs_initcall() by any of the boot up code
6501  * and expects to return the dentry of the top level tracing directory.
6502  */
6503 struct dentry *tracing_init_dentry(void)
6504 {
6505         struct trace_array *tr = &global_trace;
6506
6507         /* The top level trace array uses  NULL as parent */
6508         if (tr->dir)
6509                 return NULL;
6510
6511         if (WARN_ON(!debugfs_initialized()))
6512                 return ERR_PTR(-ENODEV);
6513
6514         /*
6515          * As there may still be users that expect the tracing
6516          * files to exist in debugfs/tracing, we must automount
6517          * the tracefs file system there, so older tools still
6518          * work with the newer kerenl.
6519          */
6520         tr->dir = debugfs_create_automount("tracing", NULL,
6521                                            trace_automount, NULL);
6522         if (!tr->dir) {
6523                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6524                 return ERR_PTR(-ENOMEM);
6525         }
6526
6527         return NULL;
6528 }
6529
6530 static __init int tracer_init_tracefs(void)
6531 {
6532         struct dentry *d_tracer;
6533
6534         trace_access_lock_init();
6535
6536         d_tracer = tracing_init_dentry();
6537         if (IS_ERR(d_tracer))
6538                 return 0;
6539
6540         init_tracer_tracefs(&global_trace, d_tracer);
6541
6542         trace_create_file("tracing_thresh", 0644, d_tracer,
6543                         &global_trace, &tracing_thresh_fops);
6544
6545         trace_create_file("README", 0444, d_tracer,
6546                         NULL, &tracing_readme_fops);
6547
6548         trace_create_file("saved_cmdlines", 0444, d_tracer,
6549                         NULL, &tracing_saved_cmdlines_fops);
6550
6551         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6552                           NULL, &tracing_saved_cmdlines_size_fops);
6553
6554 #ifdef CONFIG_DYNAMIC_FTRACE
6555         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6556                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6557 #endif
6558
6559         create_trace_instances(d_tracer);
6560
6561         create_trace_options_dir(&global_trace);
6562
6563         /* If the tracer was started via cmdline, create options for it here */
6564         if (global_trace.current_trace != &nop_trace)
6565                 update_tracer_options(&global_trace, global_trace.current_trace);
6566
6567         return 0;
6568 }
6569
6570 static int trace_panic_handler(struct notifier_block *this,
6571                                unsigned long event, void *unused)
6572 {
6573         if (ftrace_dump_on_oops)
6574                 ftrace_dump(ftrace_dump_on_oops);
6575         return NOTIFY_OK;
6576 }
6577
6578 static struct notifier_block trace_panic_notifier = {
6579         .notifier_call  = trace_panic_handler,
6580         .next           = NULL,
6581         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6582 };
6583
6584 static int trace_die_handler(struct notifier_block *self,
6585                              unsigned long val,
6586                              void *data)
6587 {
6588         switch (val) {
6589         case DIE_OOPS:
6590                 if (ftrace_dump_on_oops)
6591                         ftrace_dump(ftrace_dump_on_oops);
6592                 break;
6593         default:
6594                 break;
6595         }
6596         return NOTIFY_OK;
6597 }
6598
6599 static struct notifier_block trace_die_notifier = {
6600         .notifier_call = trace_die_handler,
6601         .priority = 200
6602 };
6603
6604 /*
6605  * printk is set to max of 1024, we really don't need it that big.
6606  * Nothing should be printing 1000 characters anyway.
6607  */
6608 #define TRACE_MAX_PRINT         1000
6609
6610 /*
6611  * Define here KERN_TRACE so that we have one place to modify
6612  * it if we decide to change what log level the ftrace dump
6613  * should be at.
6614  */
6615 #define KERN_TRACE              KERN_EMERG
6616
6617 void
6618 trace_printk_seq(struct trace_seq *s)
6619 {
6620         /* Probably should print a warning here. */
6621         if (s->seq.len >= TRACE_MAX_PRINT)
6622                 s->seq.len = TRACE_MAX_PRINT;
6623
6624         /*
6625          * More paranoid code. Although the buffer size is set to
6626          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
6627          * an extra layer of protection.
6628          */
6629         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
6630                 s->seq.len = s->seq.size - 1;
6631
6632         /* should be zero ended, but we are paranoid. */
6633         s->buffer[s->seq.len] = 0;
6634
6635         printk(KERN_TRACE "%s", s->buffer);
6636
6637         trace_seq_init(s);
6638 }
6639
6640 void trace_init_global_iter(struct trace_iterator *iter)
6641 {
6642         iter->tr = &global_trace;
6643         iter->trace = iter->tr->current_trace;
6644         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6645         iter->trace_buffer = &global_trace.trace_buffer;
6646
6647         if (iter->trace && iter->trace->open)
6648                 iter->trace->open(iter);
6649
6650         /* Annotate start of buffers if we had overruns */
6651         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6652                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6653
6654         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6655         if (trace_clocks[iter->tr->clock_id].in_ns)
6656                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6657 }
6658
6659 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6660 {
6661         /* use static because iter can be a bit big for the stack */
6662         static struct trace_iterator iter;
6663         static atomic_t dump_running;
6664         unsigned int old_userobj;
6665         unsigned long flags;
6666         int cnt = 0, cpu;
6667
6668         /* Only allow one dump user at a time. */
6669         if (atomic_inc_return(&dump_running) != 1) {
6670                 atomic_dec(&dump_running);
6671                 return;
6672         }
6673
6674         /*
6675          * Always turn off tracing when we dump.
6676          * We don't need to show trace output of what happens
6677          * between multiple crashes.
6678          *
6679          * If the user does a sysrq-z, then they can re-enable
6680          * tracing with echo 1 > tracing_on.
6681          */
6682         tracing_off();
6683
6684         local_irq_save(flags);
6685
6686         /* Simulate the iterator */
6687         trace_init_global_iter(&iter);
6688
6689         for_each_tracing_cpu(cpu) {
6690                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6691         }
6692
6693         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6694
6695         /* don't look at user memory in panic mode */
6696         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6697
6698         switch (oops_dump_mode) {
6699         case DUMP_ALL:
6700                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6701                 break;
6702         case DUMP_ORIG:
6703                 iter.cpu_file = raw_smp_processor_id();
6704                 break;
6705         case DUMP_NONE:
6706                 goto out_enable;
6707         default:
6708                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6709                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6710         }
6711
6712         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6713
6714         /* Did function tracer already get disabled? */
6715         if (ftrace_is_dead()) {
6716                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6717                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6718         }
6719
6720         /*
6721          * We need to stop all tracing on all CPUS to read the
6722          * the next buffer. This is a bit expensive, but is
6723          * not done often. We fill all what we can read,
6724          * and then release the locks again.
6725          */
6726
6727         while (!trace_empty(&iter)) {
6728
6729                 if (!cnt)
6730                         printk(KERN_TRACE "---------------------------------\n");
6731
6732                 cnt++;
6733
6734                 /* reset all but tr, trace, and overruns */
6735                 memset(&iter.seq, 0,
6736                        sizeof(struct trace_iterator) -
6737                        offsetof(struct trace_iterator, seq));
6738                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6739                 iter.pos = -1;
6740
6741                 if (trace_find_next_entry_inc(&iter) != NULL) {
6742                         int ret;
6743
6744                         ret = print_trace_line(&iter);
6745                         if (ret != TRACE_TYPE_NO_CONSUME)
6746                                 trace_consume(&iter);
6747                 }
6748                 touch_nmi_watchdog();
6749
6750                 trace_printk_seq(&iter.seq);
6751         }
6752
6753         if (!cnt)
6754                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6755         else
6756                 printk(KERN_TRACE "---------------------------------\n");
6757
6758  out_enable:
6759         trace_flags |= old_userobj;
6760
6761         for_each_tracing_cpu(cpu) {
6762                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6763         }
6764         atomic_dec(&dump_running);
6765         local_irq_restore(flags);
6766 }
6767 EXPORT_SYMBOL_GPL(ftrace_dump);
6768
6769 __init static int tracer_alloc_buffers(void)
6770 {
6771         int ring_buf_size;
6772         int ret = -ENOMEM;
6773
6774         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6775                 goto out;
6776
6777         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6778                 goto out_free_buffer_mask;
6779
6780         /* Only allocate trace_printk buffers if a trace_printk exists */
6781         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6782                 /* Must be called before global_trace.buffer is allocated */
6783                 trace_printk_init_buffers();
6784
6785         /* To save memory, keep the ring buffer size to its minimum */
6786         if (ring_buffer_expanded)
6787                 ring_buf_size = trace_buf_size;
6788         else
6789                 ring_buf_size = 1;
6790
6791         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6792         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6793
6794         raw_spin_lock_init(&global_trace.start_lock);
6795
6796         /* Used for event triggers */
6797         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6798         if (!temp_buffer)
6799                 goto out_free_cpumask;
6800
6801         if (trace_create_savedcmd() < 0)
6802                 goto out_free_temp_buffer;
6803
6804         /* TODO: make the number of buffers hot pluggable with CPUS */
6805         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6806                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6807                 WARN_ON(1);
6808                 goto out_free_savedcmd;
6809         }
6810
6811         if (global_trace.buffer_disabled)
6812                 tracing_off();
6813
6814         if (trace_boot_clock) {
6815                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6816                 if (ret < 0)
6817                         pr_warning("Trace clock %s not defined, going back to default\n",
6818                                    trace_boot_clock);
6819         }
6820
6821         /*
6822          * register_tracer() might reference current_trace, so it
6823          * needs to be set before we register anything. This is
6824          * just a bootstrap of current_trace anyway.
6825          */
6826         global_trace.current_trace = &nop_trace;
6827
6828         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6829
6830         ftrace_init_global_array_ops(&global_trace);
6831
6832         register_tracer(&nop_trace);
6833
6834         /* All seems OK, enable tracing */
6835         tracing_disabled = 0;
6836
6837         atomic_notifier_chain_register(&panic_notifier_list,
6838                                        &trace_panic_notifier);
6839
6840         register_die_notifier(&trace_die_notifier);
6841
6842         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6843
6844         INIT_LIST_HEAD(&global_trace.systems);
6845         INIT_LIST_HEAD(&global_trace.events);
6846         list_add(&global_trace.list, &ftrace_trace_arrays);
6847
6848         while (trace_boot_options) {
6849                 char *option;
6850
6851                 option = strsep(&trace_boot_options, ",");
6852                 trace_set_options(&global_trace, option);
6853         }
6854
6855         register_snapshot_cmd();
6856
6857         return 0;
6858
6859 out_free_savedcmd:
6860         free_saved_cmdlines_buffer(savedcmd);
6861 out_free_temp_buffer:
6862         ring_buffer_free(temp_buffer);
6863 out_free_cpumask:
6864         free_cpumask_var(global_trace.tracing_cpumask);
6865 out_free_buffer_mask:
6866         free_cpumask_var(tracing_buffer_mask);
6867 out:
6868         return ret;
6869 }
6870
6871 void __init trace_init(void)
6872 {
6873         if (tracepoint_printk) {
6874                 tracepoint_print_iter =
6875                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
6876                 if (WARN_ON(!tracepoint_print_iter))
6877                         tracepoint_printk = 0;
6878         }
6879         tracer_alloc_buffers();
6880         trace_event_init();     
6881 }
6882
6883 __init static int clear_boot_tracer(void)
6884 {
6885         /*
6886          * The default tracer at boot buffer is an init section.
6887          * This function is called in lateinit. If we did not
6888          * find the boot tracer, then clear it out, to prevent
6889          * later registration from accessing the buffer that is
6890          * about to be freed.
6891          */
6892         if (!default_bootup_tracer)
6893                 return 0;
6894
6895         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6896                default_bootup_tracer);
6897         default_bootup_tracer = NULL;
6898
6899         return 0;
6900 }
6901
6902 fs_initcall(tracer_init_tracefs);
6903 late_initcall(clear_boot_tracer);