GNU Linux-libre 4.9.337-gnu1
[releases.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         /* Ignore the "tp_printk_stop_on_boot" param */
232         if (*str == '_')
233                 return 0;
234
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(cycle_t nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         TRACE_ITER_EVENT_FORK
263
264 /*
265  * The global_trace is the descriptor that holds the tracing
266  * buffers for the live tracing. For each CPU, it contains
267  * a link list of pages that will store trace entries. The
268  * page descriptor of the pages in the memory is used to hold
269  * the link list by linking the lru item in the page descriptor
270  * to each of the pages in the buffer per CPU.
271  *
272  * For each active CPU there is a data field that holds the
273  * pages for the buffer for that CPU. Each CPU has the same number
274  * of pages allocated for its buffer.
275  */
276 static struct trace_array global_trace = {
277         .trace_flags = TRACE_DEFAULT_FLAGS,
278 };
279
280 LIST_HEAD(ftrace_trace_arrays);
281
282 int trace_array_get(struct trace_array *this_tr)
283 {
284         struct trace_array *tr;
285         int ret = -ENODEV;
286
287         mutex_lock(&trace_types_lock);
288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
289                 if (tr == this_tr) {
290                         tr->ref++;
291                         ret = 0;
292                         break;
293                 }
294         }
295         mutex_unlock(&trace_types_lock);
296
297         return ret;
298 }
299
300 static void __trace_array_put(struct trace_array *this_tr)
301 {
302         WARN_ON(!this_tr->ref);
303         this_tr->ref--;
304 }
305
306 void trace_array_put(struct trace_array *this_tr)
307 {
308         mutex_lock(&trace_types_lock);
309         __trace_array_put(this_tr);
310         mutex_unlock(&trace_types_lock);
311 }
312
313 int call_filter_check_discard(struct trace_event_call *call, void *rec,
314                               struct ring_buffer *buffer,
315                               struct ring_buffer_event *event)
316 {
317         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
318             !filter_match_preds(call->filter, rec)) {
319                 __trace_event_discard_commit(buffer, event);
320                 return 1;
321         }
322
323         return 0;
324 }
325
326 void trace_free_pid_list(struct trace_pid_list *pid_list)
327 {
328         vfree(pid_list->pids);
329         kfree(pid_list);
330 }
331
332 /**
333  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
334  * @filtered_pids: The list of pids to check
335  * @search_pid: The PID to find in @filtered_pids
336  *
337  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
338  */
339 bool
340 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
341 {
342         /*
343          * If pid_max changed after filtered_pids was created, we
344          * by default ignore all pids greater than the previous pid_max.
345          */
346         if (search_pid >= filtered_pids->pid_max)
347                 return false;
348
349         return test_bit(search_pid, filtered_pids->pids);
350 }
351
352 /**
353  * trace_ignore_this_task - should a task be ignored for tracing
354  * @filtered_pids: The list of pids to check
355  * @task: The task that should be ignored if not filtered
356  *
357  * Checks if @task should be traced or not from @filtered_pids.
358  * Returns true if @task should *NOT* be traced.
359  * Returns false if @task should be traced.
360  */
361 bool
362 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
363 {
364         /*
365          * Return false, because if filtered_pids does not exist,
366          * all pids are good to trace.
367          */
368         if (!filtered_pids)
369                 return false;
370
371         return !trace_find_filtered_pid(filtered_pids, task->pid);
372 }
373
374 /**
375  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
376  * @pid_list: The list to modify
377  * @self: The current task for fork or NULL for exit
378  * @task: The task to add or remove
379  *
380  * If adding a task, if @self is defined, the task is only added if @self
381  * is also included in @pid_list. This happens on fork and tasks should
382  * only be added when the parent is listed. If @self is NULL, then the
383  * @task pid will be removed from the list, which would happen on exit
384  * of a task.
385  */
386 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
387                                   struct task_struct *self,
388                                   struct task_struct *task)
389 {
390         if (!pid_list)
391                 return;
392
393         /* For forks, we only add if the forking task is listed */
394         if (self) {
395                 if (!trace_find_filtered_pid(pid_list, self->pid))
396                         return;
397         }
398
399         /* Sorry, but we don't support pid_max changing after setting */
400         if (task->pid >= pid_list->pid_max)
401                 return;
402
403         /* "self" is set for forks, and NULL for exits */
404         if (self)
405                 set_bit(task->pid, pid_list->pids);
406         else
407                 clear_bit(task->pid, pid_list->pids);
408 }
409
410 /**
411  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
412  * @pid_list: The pid list to show
413  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
414  * @pos: The position of the file
415  *
416  * This is used by the seq_file "next" operation to iterate the pids
417  * listed in a trace_pid_list structure.
418  *
419  * Returns the pid+1 as we want to display pid of zero, but NULL would
420  * stop the iteration.
421  */
422 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
423 {
424         unsigned long pid = (unsigned long)v;
425
426         (*pos)++;
427
428         /* pid already is +1 of the actual prevous bit */
429         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
430
431         /* Return pid + 1 to allow zero to be represented */
432         if (pid < pid_list->pid_max)
433                 return (void *)(pid + 1);
434
435         return NULL;
436 }
437
438 /**
439  * trace_pid_start - Used for seq_file to start reading pid lists
440  * @pid_list: The pid list to show
441  * @pos: The position of the file
442  *
443  * This is used by seq_file "start" operation to start the iteration
444  * of listing pids.
445  *
446  * Returns the pid+1 as we want to display pid of zero, but NULL would
447  * stop the iteration.
448  */
449 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
450 {
451         unsigned long pid;
452         loff_t l = 0;
453
454         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
455         if (pid >= pid_list->pid_max)
456                 return NULL;
457
458         /* Return pid + 1 so that zero can be the exit value */
459         for (pid++; pid && l < *pos;
460              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
461                 ;
462         return (void *)pid;
463 }
464
465 /**
466  * trace_pid_show - show the current pid in seq_file processing
467  * @m: The seq_file structure to write into
468  * @v: A void pointer of the pid (+1) value to display
469  *
470  * Can be directly used by seq_file operations to display the current
471  * pid value.
472  */
473 int trace_pid_show(struct seq_file *m, void *v)
474 {
475         unsigned long pid = (unsigned long)v - 1;
476
477         seq_printf(m, "%lu\n", pid);
478         return 0;
479 }
480
481 /* 128 should be much more than enough */
482 #define PID_BUF_SIZE            127
483
484 int trace_pid_write(struct trace_pid_list *filtered_pids,
485                     struct trace_pid_list **new_pid_list,
486                     const char __user *ubuf, size_t cnt)
487 {
488         struct trace_pid_list *pid_list;
489         struct trace_parser parser;
490         unsigned long val;
491         int nr_pids = 0;
492         ssize_t read = 0;
493         ssize_t ret = 0;
494         loff_t pos;
495         pid_t pid;
496
497         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
498                 return -ENOMEM;
499
500         /*
501          * Always recreate a new array. The write is an all or nothing
502          * operation. Always create a new array when adding new pids by
503          * the user. If the operation fails, then the current list is
504          * not modified.
505          */
506         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
507         if (!pid_list) {
508                 trace_parser_put(&parser);
509                 return -ENOMEM;
510         }
511
512         pid_list->pid_max = READ_ONCE(pid_max);
513
514         /* Only truncating will shrink pid_max */
515         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
516                 pid_list->pid_max = filtered_pids->pid_max;
517
518         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
519         if (!pid_list->pids) {
520                 trace_parser_put(&parser);
521                 kfree(pid_list);
522                 return -ENOMEM;
523         }
524
525         if (filtered_pids) {
526                 /* copy the current bits to the new max */
527                 for_each_set_bit(pid, filtered_pids->pids,
528                                  filtered_pids->pid_max) {
529                         set_bit(pid, pid_list->pids);
530                         nr_pids++;
531                 }
532         }
533
534         while (cnt > 0) {
535
536                 pos = 0;
537
538                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
539                 if (ret < 0 || !trace_parser_loaded(&parser))
540                         break;
541
542                 read += ret;
543                 ubuf += ret;
544                 cnt -= ret;
545
546                 parser.buffer[parser.idx] = 0;
547
548                 ret = -EINVAL;
549                 if (kstrtoul(parser.buffer, 0, &val))
550                         break;
551                 if (val >= pid_list->pid_max)
552                         break;
553
554                 pid = (pid_t)val;
555
556                 set_bit(pid, pid_list->pids);
557                 nr_pids++;
558
559                 trace_parser_clear(&parser);
560                 ret = 0;
561         }
562         trace_parser_put(&parser);
563
564         if (ret < 0) {
565                 trace_free_pid_list(pid_list);
566                 return ret;
567         }
568
569         if (!nr_pids) {
570                 /* Cleared the list of pids */
571                 trace_free_pid_list(pid_list);
572                 read = ret;
573                 pid_list = NULL;
574         }
575
576         *new_pid_list = pid_list;
577
578         return read;
579 }
580
581 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
582 {
583         u64 ts;
584
585         /* Early boot up does not have a buffer yet */
586         if (!buf->buffer)
587                 return trace_clock_local();
588
589         ts = ring_buffer_time_stamp(buf->buffer, cpu);
590         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
591
592         return ts;
593 }
594
595 cycle_t ftrace_now(int cpu)
596 {
597         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
598 }
599
600 /**
601  * tracing_is_enabled - Show if global_trace has been disabled
602  *
603  * Shows if the global trace has been enabled or not. It uses the
604  * mirror flag "buffer_disabled" to be used in fast paths such as for
605  * the irqsoff tracer. But it may be inaccurate due to races. If you
606  * need to know the accurate state, use tracing_is_on() which is a little
607  * slower, but accurate.
608  */
609 int tracing_is_enabled(void)
610 {
611         /*
612          * For quick access (irqsoff uses this in fast path), just
613          * return the mirror variable of the state of the ring buffer.
614          * It's a little racy, but we don't really care.
615          */
616         smp_rmb();
617         return !global_trace.buffer_disabled;
618 }
619
620 /*
621  * trace_buf_size is the size in bytes that is allocated
622  * for a buffer. Note, the number of bytes is always rounded
623  * to page size.
624  *
625  * This number is purposely set to a low number of 16384.
626  * If the dump on oops happens, it will be much appreciated
627  * to not have to wait for all that output. Anyway this can be
628  * boot time and run time configurable.
629  */
630 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
631
632 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
633
634 /* trace_types holds a link list of available tracers. */
635 static struct tracer            *trace_types __read_mostly;
636
637 /*
638  * trace_types_lock is used to protect the trace_types list.
639  */
640 DEFINE_MUTEX(trace_types_lock);
641
642 /*
643  * serialize the access of the ring buffer
644  *
645  * ring buffer serializes readers, but it is low level protection.
646  * The validity of the events (which returns by ring_buffer_peek() ..etc)
647  * are not protected by ring buffer.
648  *
649  * The content of events may become garbage if we allow other process consumes
650  * these events concurrently:
651  *   A) the page of the consumed events may become a normal page
652  *      (not reader page) in ring buffer, and this page will be rewrited
653  *      by events producer.
654  *   B) The page of the consumed events may become a page for splice_read,
655  *      and this page will be returned to system.
656  *
657  * These primitives allow multi process access to different cpu ring buffer
658  * concurrently.
659  *
660  * These primitives don't distinguish read-only and read-consume access.
661  * Multi read-only access are also serialized.
662  */
663
664 #ifdef CONFIG_SMP
665 static DECLARE_RWSEM(all_cpu_access_lock);
666 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
667
668 static inline void trace_access_lock(int cpu)
669 {
670         if (cpu == RING_BUFFER_ALL_CPUS) {
671                 /* gain it for accessing the whole ring buffer. */
672                 down_write(&all_cpu_access_lock);
673         } else {
674                 /* gain it for accessing a cpu ring buffer. */
675
676                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
677                 down_read(&all_cpu_access_lock);
678
679                 /* Secondly block other access to this @cpu ring buffer. */
680                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
681         }
682 }
683
684 static inline void trace_access_unlock(int cpu)
685 {
686         if (cpu == RING_BUFFER_ALL_CPUS) {
687                 up_write(&all_cpu_access_lock);
688         } else {
689                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
690                 up_read(&all_cpu_access_lock);
691         }
692 }
693
694 static inline void trace_access_lock_init(void)
695 {
696         int cpu;
697
698         for_each_possible_cpu(cpu)
699                 mutex_init(&per_cpu(cpu_access_lock, cpu));
700 }
701
702 #else
703
704 static DEFINE_MUTEX(access_lock);
705
706 static inline void trace_access_lock(int cpu)
707 {
708         (void)cpu;
709         mutex_lock(&access_lock);
710 }
711
712 static inline void trace_access_unlock(int cpu)
713 {
714         (void)cpu;
715         mutex_unlock(&access_lock);
716 }
717
718 static inline void trace_access_lock_init(void)
719 {
720 }
721
722 #endif
723
724 #ifdef CONFIG_STACKTRACE
725 static void __ftrace_trace_stack(struct ring_buffer *buffer,
726                                  unsigned long flags,
727                                  int skip, int pc, struct pt_regs *regs);
728 static inline void ftrace_trace_stack(struct trace_array *tr,
729                                       struct ring_buffer *buffer,
730                                       unsigned long flags,
731                                       int skip, int pc, struct pt_regs *regs);
732
733 #else
734 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
735                                         unsigned long flags,
736                                         int skip, int pc, struct pt_regs *regs)
737 {
738 }
739 static inline void ftrace_trace_stack(struct trace_array *tr,
740                                       struct ring_buffer *buffer,
741                                       unsigned long flags,
742                                       int skip, int pc, struct pt_regs *regs)
743 {
744 }
745
746 #endif
747
748 static void tracer_tracing_on(struct trace_array *tr)
749 {
750         if (tr->trace_buffer.buffer)
751                 ring_buffer_record_on(tr->trace_buffer.buffer);
752         /*
753          * This flag is looked at when buffers haven't been allocated
754          * yet, or by some tracers (like irqsoff), that just want to
755          * know if the ring buffer has been disabled, but it can handle
756          * races of where it gets disabled but we still do a record.
757          * As the check is in the fast path of the tracers, it is more
758          * important to be fast than accurate.
759          */
760         tr->buffer_disabled = 0;
761         /* Make the flag seen by readers */
762         smp_wmb();
763 }
764
765 /**
766  * tracing_on - enable tracing buffers
767  *
768  * This function enables tracing buffers that may have been
769  * disabled with tracing_off.
770  */
771 void tracing_on(void)
772 {
773         tracer_tracing_on(&global_trace);
774 }
775 EXPORT_SYMBOL_GPL(tracing_on);
776
777 /**
778  * __trace_puts - write a constant string into the trace buffer.
779  * @ip:    The address of the caller
780  * @str:   The constant string to write
781  * @size:  The size of the string.
782  */
783 int __trace_puts(unsigned long ip, const char *str, int size)
784 {
785         struct ring_buffer_event *event;
786         struct ring_buffer *buffer;
787         struct print_entry *entry;
788         unsigned long irq_flags;
789         int alloc;
790         int pc;
791
792         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
793                 return 0;
794
795         pc = preempt_count();
796
797         if (unlikely(tracing_selftest_running || tracing_disabled))
798                 return 0;
799
800         alloc = sizeof(*entry) + size + 2; /* possible \n added */
801
802         local_save_flags(irq_flags);
803         buffer = global_trace.trace_buffer.buffer;
804         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
805                                           irq_flags, pc);
806         if (!event)
807                 return 0;
808
809         entry = ring_buffer_event_data(event);
810         entry->ip = ip;
811
812         memcpy(&entry->buf, str, size);
813
814         /* Add a newline if necessary */
815         if (entry->buf[size - 1] != '\n') {
816                 entry->buf[size] = '\n';
817                 entry->buf[size + 1] = '\0';
818         } else
819                 entry->buf[size] = '\0';
820
821         __buffer_unlock_commit(buffer, event);
822         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
823
824         return size;
825 }
826 EXPORT_SYMBOL_GPL(__trace_puts);
827
828 /**
829  * __trace_bputs - write the pointer to a constant string into trace buffer
830  * @ip:    The address of the caller
831  * @str:   The constant string to write to the buffer to
832  */
833 int __trace_bputs(unsigned long ip, const char *str)
834 {
835         struct ring_buffer_event *event;
836         struct ring_buffer *buffer;
837         struct bputs_entry *entry;
838         unsigned long irq_flags;
839         int size = sizeof(struct bputs_entry);
840         int pc;
841
842         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
843                 return 0;
844
845         pc = preempt_count();
846
847         if (unlikely(tracing_selftest_running || tracing_disabled))
848                 return 0;
849
850         local_save_flags(irq_flags);
851         buffer = global_trace.trace_buffer.buffer;
852         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
853                                           irq_flags, pc);
854         if (!event)
855                 return 0;
856
857         entry = ring_buffer_event_data(event);
858         entry->ip                       = ip;
859         entry->str                      = str;
860
861         __buffer_unlock_commit(buffer, event);
862         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
863
864         return 1;
865 }
866 EXPORT_SYMBOL_GPL(__trace_bputs);
867
868 #ifdef CONFIG_TRACER_SNAPSHOT
869 /**
870  * trace_snapshot - take a snapshot of the current buffer.
871  *
872  * This causes a swap between the snapshot buffer and the current live
873  * tracing buffer. You can use this to take snapshots of the live
874  * trace when some condition is triggered, but continue to trace.
875  *
876  * Note, make sure to allocate the snapshot with either
877  * a tracing_snapshot_alloc(), or by doing it manually
878  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
879  *
880  * If the snapshot buffer is not allocated, it will stop tracing.
881  * Basically making a permanent snapshot.
882  */
883 void tracing_snapshot(void)
884 {
885         struct trace_array *tr = &global_trace;
886         struct tracer *tracer = tr->current_trace;
887         unsigned long flags;
888
889         if (in_nmi()) {
890                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
891                 internal_trace_puts("*** snapshot is being ignored        ***\n");
892                 return;
893         }
894
895         if (!tr->allocated_snapshot) {
896                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
897                 internal_trace_puts("*** stopping trace here!   ***\n");
898                 tracing_off();
899                 return;
900         }
901
902         /* Note, snapshot can not be used when the tracer uses it */
903         if (tracer->use_max_tr) {
904                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
905                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
906                 return;
907         }
908
909         local_irq_save(flags);
910         update_max_tr(tr, current, smp_processor_id());
911         local_irq_restore(flags);
912 }
913 EXPORT_SYMBOL_GPL(tracing_snapshot);
914
915 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
916                                         struct trace_buffer *size_buf, int cpu_id);
917 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
918
919 static int alloc_snapshot(struct trace_array *tr)
920 {
921         int ret;
922
923         if (!tr->allocated_snapshot) {
924
925                 /* allocate spare buffer */
926                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
927                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
928                 if (ret < 0)
929                         return ret;
930
931                 tr->allocated_snapshot = true;
932         }
933
934         return 0;
935 }
936
937 static void free_snapshot(struct trace_array *tr)
938 {
939         /*
940          * We don't free the ring buffer. instead, resize it because
941          * The max_tr ring buffer has some state (e.g. ring->clock) and
942          * we want preserve it.
943          */
944         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
945         set_buffer_entries(&tr->max_buffer, 1);
946         tracing_reset_online_cpus(&tr->max_buffer);
947         tr->allocated_snapshot = false;
948 }
949
950 /**
951  * tracing_alloc_snapshot - allocate snapshot buffer.
952  *
953  * This only allocates the snapshot buffer if it isn't already
954  * allocated - it doesn't also take a snapshot.
955  *
956  * This is meant to be used in cases where the snapshot buffer needs
957  * to be set up for events that can't sleep but need to be able to
958  * trigger a snapshot.
959  */
960 int tracing_alloc_snapshot(void)
961 {
962         struct trace_array *tr = &global_trace;
963         int ret;
964
965         ret = alloc_snapshot(tr);
966         WARN_ON(ret < 0);
967
968         return ret;
969 }
970 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
971
972 /**
973  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
974  *
975  * This is similar to trace_snapshot(), but it will allocate the
976  * snapshot buffer if it isn't already allocated. Use this only
977  * where it is safe to sleep, as the allocation may sleep.
978  *
979  * This causes a swap between the snapshot buffer and the current live
980  * tracing buffer. You can use this to take snapshots of the live
981  * trace when some condition is triggered, but continue to trace.
982  */
983 void tracing_snapshot_alloc(void)
984 {
985         int ret;
986
987         ret = tracing_alloc_snapshot();
988         if (ret < 0)
989                 return;
990
991         tracing_snapshot();
992 }
993 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
994 #else
995 void tracing_snapshot(void)
996 {
997         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
998 }
999 EXPORT_SYMBOL_GPL(tracing_snapshot);
1000 int tracing_alloc_snapshot(void)
1001 {
1002         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1003         return -ENODEV;
1004 }
1005 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1006 void tracing_snapshot_alloc(void)
1007 {
1008         /* Give warning */
1009         tracing_snapshot();
1010 }
1011 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1012 #endif /* CONFIG_TRACER_SNAPSHOT */
1013
1014 static void tracer_tracing_off(struct trace_array *tr)
1015 {
1016         if (tr->trace_buffer.buffer)
1017                 ring_buffer_record_off(tr->trace_buffer.buffer);
1018         /*
1019          * This flag is looked at when buffers haven't been allocated
1020          * yet, or by some tracers (like irqsoff), that just want to
1021          * know if the ring buffer has been disabled, but it can handle
1022          * races of where it gets disabled but we still do a record.
1023          * As the check is in the fast path of the tracers, it is more
1024          * important to be fast than accurate.
1025          */
1026         tr->buffer_disabled = 1;
1027         /* Make the flag seen by readers */
1028         smp_wmb();
1029 }
1030
1031 /**
1032  * tracing_off - turn off tracing buffers
1033  *
1034  * This function stops the tracing buffers from recording data.
1035  * It does not disable any overhead the tracers themselves may
1036  * be causing. This function simply causes all recording to
1037  * the ring buffers to fail.
1038  */
1039 void tracing_off(void)
1040 {
1041         tracer_tracing_off(&global_trace);
1042 }
1043 EXPORT_SYMBOL_GPL(tracing_off);
1044
1045 void disable_trace_on_warning(void)
1046 {
1047         if (__disable_trace_on_warning)
1048                 tracing_off();
1049 }
1050
1051 /**
1052  * tracer_tracing_is_on - show real state of ring buffer enabled
1053  * @tr : the trace array to know if ring buffer is enabled
1054  *
1055  * Shows real state of the ring buffer if it is enabled or not.
1056  */
1057 int tracer_tracing_is_on(struct trace_array *tr)
1058 {
1059         if (tr->trace_buffer.buffer)
1060                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1061         return !tr->buffer_disabled;
1062 }
1063
1064 /**
1065  * tracing_is_on - show state of ring buffers enabled
1066  */
1067 int tracing_is_on(void)
1068 {
1069         return tracer_tracing_is_on(&global_trace);
1070 }
1071 EXPORT_SYMBOL_GPL(tracing_is_on);
1072
1073 static int __init set_buf_size(char *str)
1074 {
1075         unsigned long buf_size;
1076
1077         if (!str)
1078                 return 0;
1079         buf_size = memparse(str, &str);
1080         /*
1081          * nr_entries can not be zero and the startup
1082          * tests require some buffer space. Therefore
1083          * ensure we have at least 4096 bytes of buffer.
1084          */
1085         trace_buf_size = max(4096UL, buf_size);
1086         return 1;
1087 }
1088 __setup("trace_buf_size=", set_buf_size);
1089
1090 static int __init set_tracing_thresh(char *str)
1091 {
1092         unsigned long threshold;
1093         int ret;
1094
1095         if (!str)
1096                 return 0;
1097         ret = kstrtoul(str, 0, &threshold);
1098         if (ret < 0)
1099                 return 0;
1100         tracing_thresh = threshold * 1000;
1101         return 1;
1102 }
1103 __setup("tracing_thresh=", set_tracing_thresh);
1104
1105 unsigned long nsecs_to_usecs(unsigned long nsecs)
1106 {
1107         return nsecs / 1000;
1108 }
1109
1110 /*
1111  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1112  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1113  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1114  * of strings in the order that the enums were defined.
1115  */
1116 #undef C
1117 #define C(a, b) b
1118
1119 /* These must match the bit postions in trace_iterator_flags */
1120 static const char *trace_options[] = {
1121         TRACE_FLAGS
1122         NULL
1123 };
1124
1125 static struct {
1126         u64 (*func)(void);
1127         const char *name;
1128         int in_ns;              /* is this clock in nanoseconds? */
1129 } trace_clocks[] = {
1130         { trace_clock_local,            "local",        1 },
1131         { trace_clock_global,           "global",       1 },
1132         { trace_clock_counter,          "counter",      0 },
1133         { trace_clock_jiffies,          "uptime",       0 },
1134         { trace_clock,                  "perf",         1 },
1135         { ktime_get_mono_fast_ns,       "mono",         1 },
1136         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1137         ARCH_TRACE_CLOCKS
1138 };
1139
1140 /*
1141  * trace_parser_get_init - gets the buffer for trace parser
1142  */
1143 int trace_parser_get_init(struct trace_parser *parser, int size)
1144 {
1145         memset(parser, 0, sizeof(*parser));
1146
1147         parser->buffer = kmalloc(size, GFP_KERNEL);
1148         if (!parser->buffer)
1149                 return 1;
1150
1151         parser->size = size;
1152         return 0;
1153 }
1154
1155 /*
1156  * trace_parser_put - frees the buffer for trace parser
1157  */
1158 void trace_parser_put(struct trace_parser *parser)
1159 {
1160         kfree(parser->buffer);
1161 }
1162
1163 /*
1164  * trace_get_user - reads the user input string separated by  space
1165  * (matched by isspace(ch))
1166  *
1167  * For each string found the 'struct trace_parser' is updated,
1168  * and the function returns.
1169  *
1170  * Returns number of bytes read.
1171  *
1172  * See kernel/trace/trace.h for 'struct trace_parser' details.
1173  */
1174 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1175         size_t cnt, loff_t *ppos)
1176 {
1177         char ch;
1178         size_t read = 0;
1179         ssize_t ret;
1180
1181         if (!*ppos)
1182                 trace_parser_clear(parser);
1183
1184         ret = get_user(ch, ubuf++);
1185         if (ret)
1186                 goto out;
1187
1188         read++;
1189         cnt--;
1190
1191         /*
1192          * The parser is not finished with the last write,
1193          * continue reading the user input without skipping spaces.
1194          */
1195         if (!parser->cont) {
1196                 /* skip white space */
1197                 while (cnt && isspace(ch)) {
1198                         ret = get_user(ch, ubuf++);
1199                         if (ret)
1200                                 goto out;
1201                         read++;
1202                         cnt--;
1203                 }
1204
1205                 /* only spaces were written */
1206                 if (isspace(ch)) {
1207                         *ppos += read;
1208                         ret = read;
1209                         goto out;
1210                 }
1211
1212                 parser->idx = 0;
1213         }
1214
1215         /* read the non-space input */
1216         while (cnt && !isspace(ch)) {
1217                 if (parser->idx < parser->size - 1)
1218                         parser->buffer[parser->idx++] = ch;
1219                 else {
1220                         ret = -EINVAL;
1221                         goto out;
1222                 }
1223                 ret = get_user(ch, ubuf++);
1224                 if (ret)
1225                         goto out;
1226                 read++;
1227                 cnt--;
1228         }
1229
1230         /* We either got finished input or we have to wait for another call. */
1231         if (isspace(ch)) {
1232                 parser->buffer[parser->idx] = 0;
1233                 parser->cont = false;
1234         } else if (parser->idx < parser->size - 1) {
1235                 parser->cont = true;
1236                 parser->buffer[parser->idx++] = ch;
1237         } else {
1238                 ret = -EINVAL;
1239                 goto out;
1240         }
1241
1242         *ppos += read;
1243         ret = read;
1244
1245 out:
1246         return ret;
1247 }
1248
1249 /* TODO add a seq_buf_to_buffer() */
1250 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1251 {
1252         int len;
1253
1254         if (trace_seq_used(s) <= s->seq.readpos)
1255                 return -EBUSY;
1256
1257         len = trace_seq_used(s) - s->seq.readpos;
1258         if (cnt > len)
1259                 cnt = len;
1260         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1261
1262         s->seq.readpos += cnt;
1263         return cnt;
1264 }
1265
1266 unsigned long __read_mostly     tracing_thresh;
1267
1268 #ifdef CONFIG_TRACER_MAX_TRACE
1269 /*
1270  * Copy the new maximum trace into the separate maximum-trace
1271  * structure. (this way the maximum trace is permanently saved,
1272  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1273  */
1274 static void
1275 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1276 {
1277         struct trace_buffer *trace_buf = &tr->trace_buffer;
1278         struct trace_buffer *max_buf = &tr->max_buffer;
1279         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1280         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1281
1282         max_buf->cpu = cpu;
1283         max_buf->time_start = data->preempt_timestamp;
1284
1285         max_data->saved_latency = tr->max_latency;
1286         max_data->critical_start = data->critical_start;
1287         max_data->critical_end = data->critical_end;
1288
1289         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1290         max_data->pid = tsk->pid;
1291         /*
1292          * If tsk == current, then use current_uid(), as that does not use
1293          * RCU. The irq tracer can be called out of RCU scope.
1294          */
1295         if (tsk == current)
1296                 max_data->uid = current_uid();
1297         else
1298                 max_data->uid = task_uid(tsk);
1299
1300         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1301         max_data->policy = tsk->policy;
1302         max_data->rt_priority = tsk->rt_priority;
1303
1304         /* record this tasks comm */
1305         tracing_record_cmdline(tsk);
1306 }
1307
1308 /**
1309  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1310  * @tr: tracer
1311  * @tsk: the task with the latency
1312  * @cpu: The cpu that initiated the trace.
1313  *
1314  * Flip the buffers between the @tr and the max_tr and record information
1315  * about which task was the cause of this latency.
1316  */
1317 void
1318 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct ring_buffer *buf;
1321
1322         if (tr->stop_count)
1323                 return;
1324
1325         WARN_ON_ONCE(!irqs_disabled());
1326
1327         if (!tr->allocated_snapshot) {
1328                 /* Only the nop tracer should hit this when disabling */
1329                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1330                 return;
1331         }
1332
1333         arch_spin_lock(&tr->max_lock);
1334
1335         /* Inherit the recordable setting from trace_buffer */
1336         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1337                 ring_buffer_record_on(tr->max_buffer.buffer);
1338         else
1339                 ring_buffer_record_off(tr->max_buffer.buffer);
1340
1341         buf = tr->trace_buffer.buffer;
1342         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1343         tr->max_buffer.buffer = buf;
1344
1345         __update_max_tr(tr, tsk, cpu);
1346         arch_spin_unlock(&tr->max_lock);
1347 }
1348
1349 /**
1350  * update_max_tr_single - only copy one trace over, and reset the rest
1351  * @tr - tracer
1352  * @tsk - task with the latency
1353  * @cpu - the cpu of the buffer to copy.
1354  *
1355  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1356  */
1357 void
1358 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1359 {
1360         int ret;
1361
1362         if (tr->stop_count)
1363                 return;
1364
1365         WARN_ON_ONCE(!irqs_disabled());
1366         if (!tr->allocated_snapshot) {
1367                 /* Only the nop tracer should hit this when disabling */
1368                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1369                 return;
1370         }
1371
1372         arch_spin_lock(&tr->max_lock);
1373
1374         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1375
1376         if (ret == -EBUSY) {
1377                 /*
1378                  * We failed to swap the buffer due to a commit taking
1379                  * place on this CPU. We fail to record, but we reset
1380                  * the max trace buffer (no one writes directly to it)
1381                  * and flag that it failed.
1382                  */
1383                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1384                         "Failed to swap buffers due to commit in progress\n");
1385         }
1386
1387         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1388
1389         __update_max_tr(tr, tsk, cpu);
1390         arch_spin_unlock(&tr->max_lock);
1391 }
1392 #endif /* CONFIG_TRACER_MAX_TRACE */
1393
1394 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1395 {
1396         /* Iterators are static, they should be filled or empty */
1397         if (trace_buffer_iter(iter, iter->cpu_file))
1398                 return 0;
1399
1400         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1401                                 full);
1402 }
1403
1404 #ifdef CONFIG_FTRACE_STARTUP_TEST
1405 static int run_tracer_selftest(struct tracer *type)
1406 {
1407         struct trace_array *tr = &global_trace;
1408         struct tracer *saved_tracer = tr->current_trace;
1409         int ret;
1410
1411         if (!type->selftest || tracing_selftest_disabled)
1412                 return 0;
1413
1414         /*
1415          * Run a selftest on this tracer.
1416          * Here we reset the trace buffer, and set the current
1417          * tracer to be this tracer. The tracer can then run some
1418          * internal tracing to verify that everything is in order.
1419          * If we fail, we do not register this tracer.
1420          */
1421         tracing_reset_online_cpus(&tr->trace_buffer);
1422
1423         tr->current_trace = type;
1424
1425 #ifdef CONFIG_TRACER_MAX_TRACE
1426         if (type->use_max_tr) {
1427                 /* If we expanded the buffers, make sure the max is expanded too */
1428                 if (ring_buffer_expanded)
1429                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1430                                            RING_BUFFER_ALL_CPUS);
1431                 tr->allocated_snapshot = true;
1432         }
1433 #endif
1434
1435         /* the test is responsible for initializing and enabling */
1436         pr_info("Testing tracer %s: ", type->name);
1437         ret = type->selftest(type, tr);
1438         /* the test is responsible for resetting too */
1439         tr->current_trace = saved_tracer;
1440         if (ret) {
1441                 printk(KERN_CONT "FAILED!\n");
1442                 /* Add the warning after printing 'FAILED' */
1443                 WARN_ON(1);
1444                 return -1;
1445         }
1446         /* Only reset on passing, to avoid touching corrupted buffers */
1447         tracing_reset_online_cpus(&tr->trace_buffer);
1448
1449 #ifdef CONFIG_TRACER_MAX_TRACE
1450         if (type->use_max_tr) {
1451                 tr->allocated_snapshot = false;
1452
1453                 /* Shrink the max buffer again */
1454                 if (ring_buffer_expanded)
1455                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1456                                            RING_BUFFER_ALL_CPUS);
1457         }
1458 #endif
1459
1460         printk(KERN_CONT "PASSED\n");
1461         return 0;
1462 }
1463 #else
1464 static inline int run_tracer_selftest(struct tracer *type)
1465 {
1466         return 0;
1467 }
1468 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1469
1470 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1471
1472 static void __init apply_trace_boot_options(void);
1473
1474 /**
1475  * register_tracer - register a tracer with the ftrace system.
1476  * @type - the plugin for the tracer
1477  *
1478  * Register a new plugin tracer.
1479  */
1480 int __init register_tracer(struct tracer *type)
1481 {
1482         struct tracer *t;
1483         int ret = 0;
1484
1485         if (!type->name) {
1486                 pr_info("Tracer must have a name\n");
1487                 return -1;
1488         }
1489
1490         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1491                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1492                 return -1;
1493         }
1494
1495         mutex_lock(&trace_types_lock);
1496
1497         tracing_selftest_running = true;
1498
1499         for (t = trace_types; t; t = t->next) {
1500                 if (strcmp(type->name, t->name) == 0) {
1501                         /* already found */
1502                         pr_info("Tracer %s already registered\n",
1503                                 type->name);
1504                         ret = -1;
1505                         goto out;
1506                 }
1507         }
1508
1509         if (!type->set_flag)
1510                 type->set_flag = &dummy_set_flag;
1511         if (!type->flags) {
1512                 /*allocate a dummy tracer_flags*/
1513                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1514                 if (!type->flags) {
1515                         ret = -ENOMEM;
1516                         goto out;
1517                 }
1518                 type->flags->val = 0;
1519                 type->flags->opts = dummy_tracer_opt;
1520         } else
1521                 if (!type->flags->opts)
1522                         type->flags->opts = dummy_tracer_opt;
1523
1524         /* store the tracer for __set_tracer_option */
1525         type->flags->trace = type;
1526
1527         ret = run_tracer_selftest(type);
1528         if (ret < 0)
1529                 goto out;
1530
1531         type->next = trace_types;
1532         trace_types = type;
1533         add_tracer_options(&global_trace, type);
1534
1535  out:
1536         tracing_selftest_running = false;
1537         mutex_unlock(&trace_types_lock);
1538
1539         if (ret || !default_bootup_tracer)
1540                 goto out_unlock;
1541
1542         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1543                 goto out_unlock;
1544
1545         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1546         /* Do we want this tracer to start on bootup? */
1547         tracing_set_tracer(&global_trace, type->name);
1548         default_bootup_tracer = NULL;
1549
1550         apply_trace_boot_options();
1551
1552         /* disable other selftests, since this will break it. */
1553         tracing_selftest_disabled = true;
1554 #ifdef CONFIG_FTRACE_STARTUP_TEST
1555         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1556                type->name);
1557 #endif
1558
1559  out_unlock:
1560         return ret;
1561 }
1562
1563 void tracing_reset(struct trace_buffer *buf, int cpu)
1564 {
1565         struct ring_buffer *buffer = buf->buffer;
1566
1567         if (!buffer)
1568                 return;
1569
1570         ring_buffer_record_disable(buffer);
1571
1572         /* Make sure all commits have finished */
1573         synchronize_sched();
1574         ring_buffer_reset_cpu(buffer, cpu);
1575
1576         ring_buffer_record_enable(buffer);
1577 }
1578
1579 void tracing_reset_online_cpus(struct trace_buffer *buf)
1580 {
1581         struct ring_buffer *buffer = buf->buffer;
1582         int cpu;
1583
1584         if (!buffer)
1585                 return;
1586
1587         ring_buffer_record_disable(buffer);
1588
1589         /* Make sure all commits have finished */
1590         synchronize_sched();
1591
1592         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1593
1594         for_each_online_cpu(cpu)
1595                 ring_buffer_reset_cpu(buffer, cpu);
1596
1597         ring_buffer_record_enable(buffer);
1598 }
1599
1600 /* Must have trace_types_lock held */
1601 void tracing_reset_all_online_cpus(void)
1602 {
1603         struct trace_array *tr;
1604
1605         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1606                 tracing_reset_online_cpus(&tr->trace_buffer);
1607 #ifdef CONFIG_TRACER_MAX_TRACE
1608                 tracing_reset_online_cpus(&tr->max_buffer);
1609 #endif
1610         }
1611 }
1612
1613 #define SAVED_CMDLINES_DEFAULT 128
1614 #define NO_CMDLINE_MAP UINT_MAX
1615 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1616 struct saved_cmdlines_buffer {
1617         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1618         unsigned *map_cmdline_to_pid;
1619         unsigned cmdline_num;
1620         int cmdline_idx;
1621         char *saved_cmdlines;
1622 };
1623 static struct saved_cmdlines_buffer *savedcmd;
1624
1625 static inline char *get_saved_cmdlines(int idx)
1626 {
1627         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1628 }
1629
1630 static inline void set_cmdline(int idx, const char *cmdline)
1631 {
1632         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1633 }
1634
1635 static int allocate_cmdlines_buffer(unsigned int val,
1636                                     struct saved_cmdlines_buffer *s)
1637 {
1638         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1639                                         GFP_KERNEL);
1640         if (!s->map_cmdline_to_pid)
1641                 return -ENOMEM;
1642
1643         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1644         if (!s->saved_cmdlines) {
1645                 kfree(s->map_cmdline_to_pid);
1646                 return -ENOMEM;
1647         }
1648
1649         s->cmdline_idx = 0;
1650         s->cmdline_num = val;
1651         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1652                sizeof(s->map_pid_to_cmdline));
1653         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1654                val * sizeof(*s->map_cmdline_to_pid));
1655
1656         return 0;
1657 }
1658
1659 static int trace_create_savedcmd(void)
1660 {
1661         int ret;
1662
1663         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1664         if (!savedcmd)
1665                 return -ENOMEM;
1666
1667         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1668         if (ret < 0) {
1669                 kfree(savedcmd);
1670                 savedcmd = NULL;
1671                 return -ENOMEM;
1672         }
1673
1674         return 0;
1675 }
1676
1677 int is_tracing_stopped(void)
1678 {
1679         return global_trace.stop_count;
1680 }
1681
1682 /**
1683  * tracing_start - quick start of the tracer
1684  *
1685  * If tracing is enabled but was stopped by tracing_stop,
1686  * this will start the tracer back up.
1687  */
1688 void tracing_start(void)
1689 {
1690         struct ring_buffer *buffer;
1691         unsigned long flags;
1692
1693         if (tracing_disabled)
1694                 return;
1695
1696         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1697         if (--global_trace.stop_count) {
1698                 if (global_trace.stop_count < 0) {
1699                         /* Someone screwed up their debugging */
1700                         WARN_ON_ONCE(1);
1701                         global_trace.stop_count = 0;
1702                 }
1703                 goto out;
1704         }
1705
1706         /* Prevent the buffers from switching */
1707         arch_spin_lock(&global_trace.max_lock);
1708
1709         buffer = global_trace.trace_buffer.buffer;
1710         if (buffer)
1711                 ring_buffer_record_enable(buffer);
1712
1713 #ifdef CONFIG_TRACER_MAX_TRACE
1714         buffer = global_trace.max_buffer.buffer;
1715         if (buffer)
1716                 ring_buffer_record_enable(buffer);
1717 #endif
1718
1719         arch_spin_unlock(&global_trace.max_lock);
1720
1721  out:
1722         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1723 }
1724
1725 static void tracing_start_tr(struct trace_array *tr)
1726 {
1727         struct ring_buffer *buffer;
1728         unsigned long flags;
1729
1730         if (tracing_disabled)
1731                 return;
1732
1733         /* If global, we need to also start the max tracer */
1734         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1735                 return tracing_start();
1736
1737         raw_spin_lock_irqsave(&tr->start_lock, flags);
1738
1739         if (--tr->stop_count) {
1740                 if (tr->stop_count < 0) {
1741                         /* Someone screwed up their debugging */
1742                         WARN_ON_ONCE(1);
1743                         tr->stop_count = 0;
1744                 }
1745                 goto out;
1746         }
1747
1748         buffer = tr->trace_buffer.buffer;
1749         if (buffer)
1750                 ring_buffer_record_enable(buffer);
1751
1752  out:
1753         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1754 }
1755
1756 /**
1757  * tracing_stop - quick stop of the tracer
1758  *
1759  * Light weight way to stop tracing. Use in conjunction with
1760  * tracing_start.
1761  */
1762 void tracing_stop(void)
1763 {
1764         struct ring_buffer *buffer;
1765         unsigned long flags;
1766
1767         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1768         if (global_trace.stop_count++)
1769                 goto out;
1770
1771         /* Prevent the buffers from switching */
1772         arch_spin_lock(&global_trace.max_lock);
1773
1774         buffer = global_trace.trace_buffer.buffer;
1775         if (buffer)
1776                 ring_buffer_record_disable(buffer);
1777
1778 #ifdef CONFIG_TRACER_MAX_TRACE
1779         buffer = global_trace.max_buffer.buffer;
1780         if (buffer)
1781                 ring_buffer_record_disable(buffer);
1782 #endif
1783
1784         arch_spin_unlock(&global_trace.max_lock);
1785
1786  out:
1787         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1788 }
1789
1790 static void tracing_stop_tr(struct trace_array *tr)
1791 {
1792         struct ring_buffer *buffer;
1793         unsigned long flags;
1794
1795         /* If global, we need to also stop the max tracer */
1796         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1797                 return tracing_stop();
1798
1799         raw_spin_lock_irqsave(&tr->start_lock, flags);
1800         if (tr->stop_count++)
1801                 goto out;
1802
1803         buffer = tr->trace_buffer.buffer;
1804         if (buffer)
1805                 ring_buffer_record_disable(buffer);
1806
1807  out:
1808         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1809 }
1810
1811 void trace_stop_cmdline_recording(void);
1812
1813 static int trace_save_cmdline(struct task_struct *tsk)
1814 {
1815         unsigned tpid, idx;
1816
1817         /* treat recording of idle task as a success */
1818         if (!tsk->pid)
1819                 return 1;
1820
1821         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1822
1823         /*
1824          * It's not the end of the world if we don't get
1825          * the lock, but we also don't want to spin
1826          * nor do we want to disable interrupts,
1827          * so if we miss here, then better luck next time.
1828          */
1829         if (!arch_spin_trylock(&trace_cmdline_lock))
1830                 return 0;
1831
1832         idx = savedcmd->map_pid_to_cmdline[tpid];
1833         if (idx == NO_CMDLINE_MAP) {
1834                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1835
1836                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1837                 savedcmd->cmdline_idx = idx;
1838         }
1839
1840         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1841         set_cmdline(idx, tsk->comm);
1842
1843         arch_spin_unlock(&trace_cmdline_lock);
1844
1845         return 1;
1846 }
1847
1848 static void __trace_find_cmdline(int pid, char comm[])
1849 {
1850         unsigned map;
1851         int tpid;
1852
1853         if (!pid) {
1854                 strcpy(comm, "<idle>");
1855                 return;
1856         }
1857
1858         if (WARN_ON_ONCE(pid < 0)) {
1859                 strcpy(comm, "<XXX>");
1860                 return;
1861         }
1862
1863         tpid = pid & (PID_MAX_DEFAULT - 1);
1864         map = savedcmd->map_pid_to_cmdline[tpid];
1865         if (map != NO_CMDLINE_MAP) {
1866                 tpid = savedcmd->map_cmdline_to_pid[map];
1867                 if (tpid == pid) {
1868                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1869                         return;
1870                 }
1871         }
1872         strcpy(comm, "<...>");
1873 }
1874
1875 void trace_find_cmdline(int pid, char comm[])
1876 {
1877         preempt_disable();
1878         arch_spin_lock(&trace_cmdline_lock);
1879
1880         __trace_find_cmdline(pid, comm);
1881
1882         arch_spin_unlock(&trace_cmdline_lock);
1883         preempt_enable();
1884 }
1885
1886 void tracing_record_cmdline(struct task_struct *tsk)
1887 {
1888         if (!__this_cpu_read(trace_cmdline_save))
1889                 return;
1890
1891         if (trace_save_cmdline(tsk))
1892                 __this_cpu_write(trace_cmdline_save, false);
1893 }
1894
1895 void
1896 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1897                              int pc)
1898 {
1899         struct task_struct *tsk = current;
1900
1901         entry->preempt_count            = pc & 0xff;
1902         entry->pid                      = (tsk) ? tsk->pid : 0;
1903         entry->flags =
1904 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1905                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1906 #else
1907                 TRACE_FLAG_IRQS_NOSUPPORT |
1908 #endif
1909                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1910                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1911                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1912                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1913                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1914 }
1915 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1916
1917 static __always_inline void
1918 trace_event_setup(struct ring_buffer_event *event,
1919                   int type, unsigned long flags, int pc)
1920 {
1921         struct trace_entry *ent = ring_buffer_event_data(event);
1922
1923         tracing_generic_entry_update(ent, flags, pc);
1924         ent->type = type;
1925 }
1926
1927 struct ring_buffer_event *
1928 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1929                           int type,
1930                           unsigned long len,
1931                           unsigned long flags, int pc)
1932 {
1933         struct ring_buffer_event *event;
1934
1935         event = ring_buffer_lock_reserve(buffer, len);
1936         if (event != NULL)
1937                 trace_event_setup(event, type, flags, pc);
1938
1939         return event;
1940 }
1941
1942 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1943 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1944 static int trace_buffered_event_ref;
1945
1946 /**
1947  * trace_buffered_event_enable - enable buffering events
1948  *
1949  * When events are being filtered, it is quicker to use a temporary
1950  * buffer to write the event data into if there's a likely chance
1951  * that it will not be committed. The discard of the ring buffer
1952  * is not as fast as committing, and is much slower than copying
1953  * a commit.
1954  *
1955  * When an event is to be filtered, allocate per cpu buffers to
1956  * write the event data into, and if the event is filtered and discarded
1957  * it is simply dropped, otherwise, the entire data is to be committed
1958  * in one shot.
1959  */
1960 void trace_buffered_event_enable(void)
1961 {
1962         struct ring_buffer_event *event;
1963         struct page *page;
1964         int cpu;
1965
1966         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1967
1968         if (trace_buffered_event_ref++)
1969                 return;
1970
1971         for_each_tracing_cpu(cpu) {
1972                 page = alloc_pages_node(cpu_to_node(cpu),
1973                                         GFP_KERNEL | __GFP_NORETRY, 0);
1974                 if (!page)
1975                         goto failed;
1976
1977                 event = page_address(page);
1978                 memset(event, 0, sizeof(*event));
1979
1980                 per_cpu(trace_buffered_event, cpu) = event;
1981
1982                 preempt_disable();
1983                 if (cpu == smp_processor_id() &&
1984                     this_cpu_read(trace_buffered_event) !=
1985                     per_cpu(trace_buffered_event, cpu))
1986                         WARN_ON_ONCE(1);
1987                 preempt_enable();
1988         }
1989
1990         return;
1991  failed:
1992         trace_buffered_event_disable();
1993 }
1994
1995 static void enable_trace_buffered_event(void *data)
1996 {
1997         /* Probably not needed, but do it anyway */
1998         smp_rmb();
1999         this_cpu_dec(trace_buffered_event_cnt);
2000 }
2001
2002 static void disable_trace_buffered_event(void *data)
2003 {
2004         this_cpu_inc(trace_buffered_event_cnt);
2005 }
2006
2007 /**
2008  * trace_buffered_event_disable - disable buffering events
2009  *
2010  * When a filter is removed, it is faster to not use the buffered
2011  * events, and to commit directly into the ring buffer. Free up
2012  * the temp buffers when there are no more users. This requires
2013  * special synchronization with current events.
2014  */
2015 void trace_buffered_event_disable(void)
2016 {
2017         int cpu;
2018
2019         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2020
2021         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2022                 return;
2023
2024         if (--trace_buffered_event_ref)
2025                 return;
2026
2027         preempt_disable();
2028         /* For each CPU, set the buffer as used. */
2029         smp_call_function_many(tracing_buffer_mask,
2030                                disable_trace_buffered_event, NULL, 1);
2031         preempt_enable();
2032
2033         /* Wait for all current users to finish */
2034         synchronize_sched();
2035
2036         for_each_tracing_cpu(cpu) {
2037                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2038                 per_cpu(trace_buffered_event, cpu) = NULL;
2039         }
2040         /*
2041          * Make sure trace_buffered_event is NULL before clearing
2042          * trace_buffered_event_cnt.
2043          */
2044         smp_wmb();
2045
2046         preempt_disable();
2047         /* Do the work on each cpu */
2048         smp_call_function_many(tracing_buffer_mask,
2049                                enable_trace_buffered_event, NULL, 1);
2050         preempt_enable();
2051 }
2052
2053 void
2054 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2055 {
2056         __this_cpu_write(trace_cmdline_save, true);
2057
2058         /* If this is the temp buffer, we need to commit fully */
2059         if (this_cpu_read(trace_buffered_event) == event) {
2060                 /* Length is in event->array[0] */
2061                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2062                 /* Release the temp buffer */
2063                 this_cpu_dec(trace_buffered_event_cnt);
2064         } else
2065                 ring_buffer_unlock_commit(buffer, event);
2066 }
2067
2068 static struct ring_buffer *temp_buffer;
2069
2070 struct ring_buffer_event *
2071 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2072                           struct trace_event_file *trace_file,
2073                           int type, unsigned long len,
2074                           unsigned long flags, int pc)
2075 {
2076         struct ring_buffer_event *entry;
2077         int val;
2078
2079         *current_rb = trace_file->tr->trace_buffer.buffer;
2080
2081         if ((trace_file->flags &
2082              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2083             (entry = this_cpu_read(trace_buffered_event))) {
2084                 /* Try to use the per cpu buffer first */
2085                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2086                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2087                         trace_event_setup(entry, type, flags, pc);
2088                         entry->array[0] = len;
2089                         return entry;
2090                 }
2091                 this_cpu_dec(trace_buffered_event_cnt);
2092         }
2093
2094         entry = trace_buffer_lock_reserve(*current_rb,
2095                                          type, len, flags, pc);
2096         /*
2097          * If tracing is off, but we have triggers enabled
2098          * we still need to look at the event data. Use the temp_buffer
2099          * to store the trace event for the tigger to use. It's recusive
2100          * safe and will not be recorded anywhere.
2101          */
2102         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2103                 *current_rb = temp_buffer;
2104                 entry = trace_buffer_lock_reserve(*current_rb,
2105                                                   type, len, flags, pc);
2106         }
2107         return entry;
2108 }
2109 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2110
2111 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2112                                      struct ring_buffer *buffer,
2113                                      struct ring_buffer_event *event,
2114                                      unsigned long flags, int pc,
2115                                      struct pt_regs *regs)
2116 {
2117         __buffer_unlock_commit(buffer, event);
2118
2119         /*
2120          * If regs is not set, then skip the following callers:
2121          *   trace_buffer_unlock_commit_regs
2122          *   event_trigger_unlock_commit
2123          *   trace_event_buffer_commit
2124          *   trace_event_raw_event_sched_switch
2125          * Note, we can still get here via blktrace, wakeup tracer
2126          * and mmiotrace, but that's ok if they lose a function or
2127          * two. They are that meaningful.
2128          */
2129         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2130         ftrace_trace_userstack(tr, buffer, flags, pc);
2131 }
2132
2133 void
2134 trace_function(struct trace_array *tr,
2135                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2136                int pc)
2137 {
2138         struct trace_event_call *call = &event_function;
2139         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2140         struct ring_buffer_event *event;
2141         struct ftrace_entry *entry;
2142
2143         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2144                                           flags, pc);
2145         if (!event)
2146                 return;
2147         entry   = ring_buffer_event_data(event);
2148         entry->ip                       = ip;
2149         entry->parent_ip                = parent_ip;
2150
2151         if (!call_filter_check_discard(call, entry, buffer, event))
2152                 __buffer_unlock_commit(buffer, event);
2153 }
2154
2155 #ifdef CONFIG_STACKTRACE
2156
2157 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2158 struct ftrace_stack {
2159         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2160 };
2161
2162 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2163 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2164
2165 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2166                                  unsigned long flags,
2167                                  int skip, int pc, struct pt_regs *regs)
2168 {
2169         struct trace_event_call *call = &event_kernel_stack;
2170         struct ring_buffer_event *event;
2171         struct stack_entry *entry;
2172         struct stack_trace trace;
2173         int use_stack;
2174         int size = FTRACE_STACK_ENTRIES;
2175
2176         trace.nr_entries        = 0;
2177         trace.skip              = skip;
2178
2179         /*
2180          * Add two, for this function and the call to save_stack_trace()
2181          * If regs is set, then these functions will not be in the way.
2182          */
2183         if (!regs)
2184                 trace.skip += 2;
2185
2186         /*
2187          * Since events can happen in NMIs there's no safe way to
2188          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2189          * or NMI comes in, it will just have to use the default
2190          * FTRACE_STACK_SIZE.
2191          */
2192         preempt_disable_notrace();
2193
2194         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2195         /*
2196          * We don't need any atomic variables, just a barrier.
2197          * If an interrupt comes in, we don't care, because it would
2198          * have exited and put the counter back to what we want.
2199          * We just need a barrier to keep gcc from moving things
2200          * around.
2201          */
2202         barrier();
2203         if (use_stack == 1) {
2204                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2205                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2206
2207                 if (regs)
2208                         save_stack_trace_regs(regs, &trace);
2209                 else
2210                         save_stack_trace(&trace);
2211
2212                 if (trace.nr_entries > size)
2213                         size = trace.nr_entries;
2214         } else
2215                 /* From now on, use_stack is a boolean */
2216                 use_stack = 0;
2217
2218         size *= sizeof(unsigned long);
2219
2220         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2221                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2222                                     flags, pc);
2223         if (!event)
2224                 goto out;
2225         entry = ring_buffer_event_data(event);
2226
2227         memset(&entry->caller, 0, size);
2228
2229         if (use_stack)
2230                 memcpy(&entry->caller, trace.entries,
2231                        trace.nr_entries * sizeof(unsigned long));
2232         else {
2233                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2234                 trace.entries           = entry->caller;
2235                 if (regs)
2236                         save_stack_trace_regs(regs, &trace);
2237                 else
2238                         save_stack_trace(&trace);
2239         }
2240
2241         entry->size = trace.nr_entries;
2242
2243         if (!call_filter_check_discard(call, entry, buffer, event))
2244                 __buffer_unlock_commit(buffer, event);
2245
2246  out:
2247         /* Again, don't let gcc optimize things here */
2248         barrier();
2249         __this_cpu_dec(ftrace_stack_reserve);
2250         preempt_enable_notrace();
2251
2252 }
2253
2254 static inline void ftrace_trace_stack(struct trace_array *tr,
2255                                       struct ring_buffer *buffer,
2256                                       unsigned long flags,
2257                                       int skip, int pc, struct pt_regs *regs)
2258 {
2259         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2260                 return;
2261
2262         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2263 }
2264
2265 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2266                    int pc)
2267 {
2268         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2269 }
2270
2271 /**
2272  * trace_dump_stack - record a stack back trace in the trace buffer
2273  * @skip: Number of functions to skip (helper handlers)
2274  */
2275 void trace_dump_stack(int skip)
2276 {
2277         unsigned long flags;
2278
2279         if (tracing_disabled || tracing_selftest_running)
2280                 return;
2281
2282         local_save_flags(flags);
2283
2284         /*
2285          * Skip 3 more, seems to get us at the caller of
2286          * this function.
2287          */
2288         skip += 3;
2289         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2290                              flags, skip, preempt_count(), NULL);
2291 }
2292
2293 static DEFINE_PER_CPU(int, user_stack_count);
2294
2295 void
2296 ftrace_trace_userstack(struct trace_array *tr,
2297                        struct ring_buffer *buffer, unsigned long flags, int pc)
2298 {
2299         struct trace_event_call *call = &event_user_stack;
2300         struct ring_buffer_event *event;
2301         struct userstack_entry *entry;
2302         struct stack_trace trace;
2303
2304         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2305                 return;
2306
2307         /*
2308          * NMIs can not handle page faults, even with fix ups.
2309          * The save user stack can (and often does) fault.
2310          */
2311         if (unlikely(in_nmi()))
2312                 return;
2313
2314         /*
2315          * prevent recursion, since the user stack tracing may
2316          * trigger other kernel events.
2317          */
2318         preempt_disable();
2319         if (__this_cpu_read(user_stack_count))
2320                 goto out;
2321
2322         __this_cpu_inc(user_stack_count);
2323
2324         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2325                                           sizeof(*entry), flags, pc);
2326         if (!event)
2327                 goto out_drop_count;
2328         entry   = ring_buffer_event_data(event);
2329
2330         entry->tgid             = current->tgid;
2331         memset(&entry->caller, 0, sizeof(entry->caller));
2332
2333         trace.nr_entries        = 0;
2334         trace.max_entries       = FTRACE_STACK_ENTRIES;
2335         trace.skip              = 0;
2336         trace.entries           = entry->caller;
2337
2338         save_stack_trace_user(&trace);
2339         if (!call_filter_check_discard(call, entry, buffer, event))
2340                 __buffer_unlock_commit(buffer, event);
2341
2342  out_drop_count:
2343         __this_cpu_dec(user_stack_count);
2344  out:
2345         preempt_enable();
2346 }
2347
2348 #ifdef UNUSED
2349 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2350 {
2351         ftrace_trace_userstack(tr, flags, preempt_count());
2352 }
2353 #endif /* UNUSED */
2354
2355 #endif /* CONFIG_STACKTRACE */
2356
2357 /* created for use with alloc_percpu */
2358 struct trace_buffer_struct {
2359         int nesting;
2360         char buffer[4][TRACE_BUF_SIZE];
2361 };
2362
2363 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
2364
2365 /*
2366  * Thise allows for lockless recording.  If we're nested too deeply, then
2367  * this returns NULL.
2368  */
2369 static char *get_trace_buf(void)
2370 {
2371         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2372
2373         if (!trace_percpu_buffer || buffer->nesting >= 4)
2374                 return NULL;
2375
2376         buffer->nesting++;
2377
2378         /* Interrupts must see nesting incremented before we use the buffer */
2379         barrier();
2380         return &buffer->buffer[buffer->nesting - 1][0];
2381 }
2382
2383 static void put_trace_buf(void)
2384 {
2385         /* Don't let the decrement of nesting leak before this */
2386         barrier();
2387         this_cpu_dec(trace_percpu_buffer->nesting);
2388 }
2389
2390 static int alloc_percpu_trace_buffer(void)
2391 {
2392         struct trace_buffer_struct __percpu *buffers;
2393
2394         buffers = alloc_percpu(struct trace_buffer_struct);
2395         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2396                 return -ENOMEM;
2397
2398         trace_percpu_buffer = buffers;
2399         return 0;
2400 }
2401
2402 static int buffers_allocated;
2403
2404 void trace_printk_init_buffers(void)
2405 {
2406         if (buffers_allocated)
2407                 return;
2408
2409         if (alloc_percpu_trace_buffer())
2410                 return;
2411
2412         /* trace_printk() is for debug use only. Don't use it in production. */
2413
2414         pr_warn("\n");
2415         pr_warn("**********************************************************\n");
2416         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2417         pr_warn("**                                                      **\n");
2418         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2419         pr_warn("**                                                      **\n");
2420         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2421         pr_warn("** unsafe for production use.                           **\n");
2422         pr_warn("**                                                      **\n");
2423         pr_warn("** If you see this message and you are not debugging    **\n");
2424         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2425         pr_warn("**                                                      **\n");
2426         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2427         pr_warn("**********************************************************\n");
2428
2429         /* Expand the buffers to set size */
2430         tracing_update_buffers();
2431
2432         buffers_allocated = 1;
2433
2434         /*
2435          * trace_printk_init_buffers() can be called by modules.
2436          * If that happens, then we need to start cmdline recording
2437          * directly here. If the global_trace.buffer is already
2438          * allocated here, then this was called by module code.
2439          */
2440         if (global_trace.trace_buffer.buffer)
2441                 tracing_start_cmdline_record();
2442 }
2443
2444 void trace_printk_start_comm(void)
2445 {
2446         /* Start tracing comms if trace printk is set */
2447         if (!buffers_allocated)
2448                 return;
2449         tracing_start_cmdline_record();
2450 }
2451
2452 static void trace_printk_start_stop_comm(int enabled)
2453 {
2454         if (!buffers_allocated)
2455                 return;
2456
2457         if (enabled)
2458                 tracing_start_cmdline_record();
2459         else
2460                 tracing_stop_cmdline_record();
2461 }
2462
2463 /**
2464  * trace_vbprintk - write binary msg to tracing buffer
2465  *
2466  */
2467 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2468 {
2469         struct trace_event_call *call = &event_bprint;
2470         struct ring_buffer_event *event;
2471         struct ring_buffer *buffer;
2472         struct trace_array *tr = &global_trace;
2473         struct bprint_entry *entry;
2474         unsigned long flags;
2475         char *tbuffer;
2476         int len = 0, size, pc;
2477
2478         if (unlikely(tracing_selftest_running || tracing_disabled))
2479                 return 0;
2480
2481         /* Don't pollute graph traces with trace_vprintk internals */
2482         pause_graph_tracing();
2483
2484         pc = preempt_count();
2485         preempt_disable_notrace();
2486
2487         tbuffer = get_trace_buf();
2488         if (!tbuffer) {
2489                 len = 0;
2490                 goto out_nobuffer;
2491         }
2492
2493         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2494
2495         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2496                 goto out;
2497
2498         local_save_flags(flags);
2499         size = sizeof(*entry) + sizeof(u32) * len;
2500         buffer = tr->trace_buffer.buffer;
2501         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2502                                           flags, pc);
2503         if (!event)
2504                 goto out;
2505         entry = ring_buffer_event_data(event);
2506         entry->ip                       = ip;
2507         entry->fmt                      = fmt;
2508
2509         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2510         if (!call_filter_check_discard(call, entry, buffer, event)) {
2511                 __buffer_unlock_commit(buffer, event);
2512                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2513         }
2514
2515 out:
2516         put_trace_buf();
2517
2518 out_nobuffer:
2519         preempt_enable_notrace();
2520         unpause_graph_tracing();
2521
2522         return len;
2523 }
2524 EXPORT_SYMBOL_GPL(trace_vbprintk);
2525
2526 __printf(3, 0)
2527 static int
2528 __trace_array_vprintk(struct ring_buffer *buffer,
2529                       unsigned long ip, const char *fmt, va_list args)
2530 {
2531         struct trace_event_call *call = &event_print;
2532         struct ring_buffer_event *event;
2533         int len = 0, size, pc;
2534         struct print_entry *entry;
2535         unsigned long flags;
2536         char *tbuffer;
2537
2538         if (tracing_disabled || tracing_selftest_running)
2539                 return 0;
2540
2541         /* Don't pollute graph traces with trace_vprintk internals */
2542         pause_graph_tracing();
2543
2544         pc = preempt_count();
2545         preempt_disable_notrace();
2546
2547
2548         tbuffer = get_trace_buf();
2549         if (!tbuffer) {
2550                 len = 0;
2551                 goto out_nobuffer;
2552         }
2553
2554         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2555
2556         local_save_flags(flags);
2557         size = sizeof(*entry) + len + 1;
2558         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2559                                           flags, pc);
2560         if (!event)
2561                 goto out;
2562         entry = ring_buffer_event_data(event);
2563         entry->ip = ip;
2564
2565         memcpy(&entry->buf, tbuffer, len + 1);
2566         if (!call_filter_check_discard(call, entry, buffer, event)) {
2567                 __buffer_unlock_commit(buffer, event);
2568                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2569         }
2570
2571 out:
2572         put_trace_buf();
2573
2574 out_nobuffer:
2575         preempt_enable_notrace();
2576         unpause_graph_tracing();
2577
2578         return len;
2579 }
2580
2581 __printf(3, 0)
2582 int trace_array_vprintk(struct trace_array *tr,
2583                         unsigned long ip, const char *fmt, va_list args)
2584 {
2585         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2586 }
2587
2588 __printf(3, 0)
2589 int trace_array_printk(struct trace_array *tr,
2590                        unsigned long ip, const char *fmt, ...)
2591 {
2592         int ret;
2593         va_list ap;
2594
2595         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2596                 return 0;
2597
2598         if (!tr)
2599                 return -ENOENT;
2600
2601         va_start(ap, fmt);
2602         ret = trace_array_vprintk(tr, ip, fmt, ap);
2603         va_end(ap);
2604         return ret;
2605 }
2606
2607 __printf(3, 4)
2608 int trace_array_printk_buf(struct ring_buffer *buffer,
2609                            unsigned long ip, const char *fmt, ...)
2610 {
2611         int ret;
2612         va_list ap;
2613
2614         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2615                 return 0;
2616
2617         va_start(ap, fmt);
2618         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2619         va_end(ap);
2620         return ret;
2621 }
2622
2623 __printf(2, 0)
2624 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2625 {
2626         return trace_array_vprintk(&global_trace, ip, fmt, args);
2627 }
2628 EXPORT_SYMBOL_GPL(trace_vprintk);
2629
2630 static void trace_iterator_increment(struct trace_iterator *iter)
2631 {
2632         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2633
2634         iter->idx++;
2635         if (buf_iter)
2636                 ring_buffer_read(buf_iter, NULL);
2637 }
2638
2639 static struct trace_entry *
2640 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2641                 unsigned long *lost_events)
2642 {
2643         struct ring_buffer_event *event;
2644         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2645
2646         if (buf_iter)
2647                 event = ring_buffer_iter_peek(buf_iter, ts);
2648         else
2649                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2650                                          lost_events);
2651
2652         if (event) {
2653                 iter->ent_size = ring_buffer_event_length(event);
2654                 return ring_buffer_event_data(event);
2655         }
2656         iter->ent_size = 0;
2657         return NULL;
2658 }
2659
2660 static struct trace_entry *
2661 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2662                   unsigned long *missing_events, u64 *ent_ts)
2663 {
2664         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2665         struct trace_entry *ent, *next = NULL;
2666         unsigned long lost_events = 0, next_lost = 0;
2667         int cpu_file = iter->cpu_file;
2668         u64 next_ts = 0, ts;
2669         int next_cpu = -1;
2670         int next_size = 0;
2671         int cpu;
2672
2673         /*
2674          * If we are in a per_cpu trace file, don't bother by iterating over
2675          * all cpu and peek directly.
2676          */
2677         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2678                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2679                         return NULL;
2680                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2681                 if (ent_cpu)
2682                         *ent_cpu = cpu_file;
2683
2684                 return ent;
2685         }
2686
2687         for_each_tracing_cpu(cpu) {
2688
2689                 if (ring_buffer_empty_cpu(buffer, cpu))
2690                         continue;
2691
2692                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2693
2694                 /*
2695                  * Pick the entry with the smallest timestamp:
2696                  */
2697                 if (ent && (!next || ts < next_ts)) {
2698                         next = ent;
2699                         next_cpu = cpu;
2700                         next_ts = ts;
2701                         next_lost = lost_events;
2702                         next_size = iter->ent_size;
2703                 }
2704         }
2705
2706         iter->ent_size = next_size;
2707
2708         if (ent_cpu)
2709                 *ent_cpu = next_cpu;
2710
2711         if (ent_ts)
2712                 *ent_ts = next_ts;
2713
2714         if (missing_events)
2715                 *missing_events = next_lost;
2716
2717         return next;
2718 }
2719
2720 /* Find the next real entry, without updating the iterator itself */
2721 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2722                                           int *ent_cpu, u64 *ent_ts)
2723 {
2724         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2725 }
2726
2727 /* Find the next real entry, and increment the iterator to the next entry */
2728 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2729 {
2730         iter->ent = __find_next_entry(iter, &iter->cpu,
2731                                       &iter->lost_events, &iter->ts);
2732
2733         if (iter->ent)
2734                 trace_iterator_increment(iter);
2735
2736         return iter->ent ? iter : NULL;
2737 }
2738
2739 static void trace_consume(struct trace_iterator *iter)
2740 {
2741         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2742                             &iter->lost_events);
2743 }
2744
2745 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2746 {
2747         struct trace_iterator *iter = m->private;
2748         int i = (int)*pos;
2749         void *ent;
2750
2751         WARN_ON_ONCE(iter->leftover);
2752
2753         (*pos)++;
2754
2755         /* can't go backwards */
2756         if (iter->idx > i)
2757                 return NULL;
2758
2759         if (iter->idx < 0)
2760                 ent = trace_find_next_entry_inc(iter);
2761         else
2762                 ent = iter;
2763
2764         while (ent && iter->idx < i)
2765                 ent = trace_find_next_entry_inc(iter);
2766
2767         iter->pos = *pos;
2768
2769         return ent;
2770 }
2771
2772 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2773 {
2774         struct ring_buffer_event *event;
2775         struct ring_buffer_iter *buf_iter;
2776         unsigned long entries = 0;
2777         u64 ts;
2778
2779         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2780
2781         buf_iter = trace_buffer_iter(iter, cpu);
2782         if (!buf_iter)
2783                 return;
2784
2785         ring_buffer_iter_reset(buf_iter);
2786
2787         /*
2788          * We could have the case with the max latency tracers
2789          * that a reset never took place on a cpu. This is evident
2790          * by the timestamp being before the start of the buffer.
2791          */
2792         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2793                 if (ts >= iter->trace_buffer->time_start)
2794                         break;
2795                 entries++;
2796                 ring_buffer_read(buf_iter, NULL);
2797         }
2798
2799         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2800 }
2801
2802 /*
2803  * The current tracer is copied to avoid a global locking
2804  * all around.
2805  */
2806 static void *s_start(struct seq_file *m, loff_t *pos)
2807 {
2808         struct trace_iterator *iter = m->private;
2809         struct trace_array *tr = iter->tr;
2810         int cpu_file = iter->cpu_file;
2811         void *p = NULL;
2812         loff_t l = 0;
2813         int cpu;
2814
2815         /*
2816          * copy the tracer to avoid using a global lock all around.
2817          * iter->trace is a copy of current_trace, the pointer to the
2818          * name may be used instead of a strcmp(), as iter->trace->name
2819          * will point to the same string as current_trace->name.
2820          */
2821         mutex_lock(&trace_types_lock);
2822         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2823                 *iter->trace = *tr->current_trace;
2824         mutex_unlock(&trace_types_lock);
2825
2826 #ifdef CONFIG_TRACER_MAX_TRACE
2827         if (iter->snapshot && iter->trace->use_max_tr)
2828                 return ERR_PTR(-EBUSY);
2829 #endif
2830
2831         if (*pos != iter->pos) {
2832                 iter->ent = NULL;
2833                 iter->cpu = 0;
2834                 iter->idx = -1;
2835
2836                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2837                         for_each_tracing_cpu(cpu)
2838                                 tracing_iter_reset(iter, cpu);
2839                 } else
2840                         tracing_iter_reset(iter, cpu_file);
2841
2842                 iter->leftover = 0;
2843                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2844                         ;
2845
2846         } else {
2847                 /*
2848                  * If we overflowed the seq_file before, then we want
2849                  * to just reuse the trace_seq buffer again.
2850                  */
2851                 if (iter->leftover)
2852                         p = iter;
2853                 else {
2854                         l = *pos - 1;
2855                         p = s_next(m, p, &l);
2856                 }
2857         }
2858
2859         trace_event_read_lock();
2860         trace_access_lock(cpu_file);
2861         return p;
2862 }
2863
2864 static void s_stop(struct seq_file *m, void *p)
2865 {
2866         struct trace_iterator *iter = m->private;
2867
2868 #ifdef CONFIG_TRACER_MAX_TRACE
2869         if (iter->snapshot && iter->trace->use_max_tr)
2870                 return;
2871 #endif
2872
2873         trace_access_unlock(iter->cpu_file);
2874         trace_event_read_unlock();
2875 }
2876
2877 static void
2878 get_total_entries(struct trace_buffer *buf,
2879                   unsigned long *total, unsigned long *entries)
2880 {
2881         unsigned long count;
2882         int cpu;
2883
2884         *total = 0;
2885         *entries = 0;
2886
2887         for_each_tracing_cpu(cpu) {
2888                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2889                 /*
2890                  * If this buffer has skipped entries, then we hold all
2891                  * entries for the trace and we need to ignore the
2892                  * ones before the time stamp.
2893                  */
2894                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2895                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2896                         /* total is the same as the entries */
2897                         *total += count;
2898                 } else
2899                         *total += count +
2900                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2901                 *entries += count;
2902         }
2903 }
2904
2905 static void print_lat_help_header(struct seq_file *m)
2906 {
2907         seq_puts(m, "#                  _------=> CPU#            \n"
2908                     "#                 / _-----=> irqs-off        \n"
2909                     "#                | / _----=> need-resched    \n"
2910                     "#                || / _---=> hardirq/softirq \n"
2911                     "#                ||| / _--=> preempt-depth   \n"
2912                     "#                |||| /     delay            \n"
2913                     "#  cmd     pid   ||||| time  |   caller      \n"
2914                     "#     \\   /      |||||  \\    |   /         \n");
2915 }
2916
2917 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2918 {
2919         unsigned long total;
2920         unsigned long entries;
2921
2922         get_total_entries(buf, &total, &entries);
2923         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2924                    entries, total, num_online_cpus());
2925         seq_puts(m, "#\n");
2926 }
2927
2928 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2929 {
2930         print_event_info(buf, m);
2931         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2932                     "#              | |       |          |         |\n");
2933 }
2934
2935 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2936 {
2937         print_event_info(buf, m);
2938         seq_puts(m, "#                              _-----=> irqs-off\n"
2939                     "#                             / _----=> need-resched\n"
2940                     "#                            | / _---=> hardirq/softirq\n"
2941                     "#                            || / _--=> preempt-depth\n"
2942                     "#                            ||| /     delay\n"
2943                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2944                     "#              | |       |   ||||       |         |\n");
2945 }
2946
2947 void
2948 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2949 {
2950         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2951         struct trace_buffer *buf = iter->trace_buffer;
2952         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2953         struct tracer *type = iter->trace;
2954         unsigned long entries;
2955         unsigned long total;
2956         const char *name = "preemption";
2957
2958         name = type->name;
2959
2960         get_total_entries(buf, &total, &entries);
2961
2962         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2963                    name, UTS_RELEASE);
2964         seq_puts(m, "# -----------------------------------"
2965                  "---------------------------------\n");
2966         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2967                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2968                    nsecs_to_usecs(data->saved_latency),
2969                    entries,
2970                    total,
2971                    buf->cpu,
2972 #if defined(CONFIG_PREEMPT_NONE)
2973                    "server",
2974 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2975                    "desktop",
2976 #elif defined(CONFIG_PREEMPT)
2977                    "preempt",
2978 #else
2979                    "unknown",
2980 #endif
2981                    /* These are reserved for later use */
2982                    0, 0, 0, 0);
2983 #ifdef CONFIG_SMP
2984         seq_printf(m, " #P:%d)\n", num_online_cpus());
2985 #else
2986         seq_puts(m, ")\n");
2987 #endif
2988         seq_puts(m, "#    -----------------\n");
2989         seq_printf(m, "#    | task: %.16s-%d "
2990                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2991                    data->comm, data->pid,
2992                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2993                    data->policy, data->rt_priority);
2994         seq_puts(m, "#    -----------------\n");
2995
2996         if (data->critical_start) {
2997                 seq_puts(m, "#  => started at: ");
2998                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2999                 trace_print_seq(m, &iter->seq);
3000                 seq_puts(m, "\n#  => ended at:   ");
3001                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3002                 trace_print_seq(m, &iter->seq);
3003                 seq_puts(m, "\n#\n");
3004         }
3005
3006         seq_puts(m, "#\n");
3007 }
3008
3009 static void test_cpu_buff_start(struct trace_iterator *iter)
3010 {
3011         struct trace_seq *s = &iter->seq;
3012         struct trace_array *tr = iter->tr;
3013
3014         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3015                 return;
3016
3017         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3018                 return;
3019
3020         if (cpumask_available(iter->started) &&
3021             cpumask_test_cpu(iter->cpu, iter->started))
3022                 return;
3023
3024         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3025                 return;
3026
3027         if (cpumask_available(iter->started))
3028                 cpumask_set_cpu(iter->cpu, iter->started);
3029
3030         /* Don't print started cpu buffer for the first entry of the trace */
3031         if (iter->idx > 1)
3032                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3033                                 iter->cpu);
3034 }
3035
3036 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3037 {
3038         struct trace_array *tr = iter->tr;
3039         struct trace_seq *s = &iter->seq;
3040         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3041         struct trace_entry *entry;
3042         struct trace_event *event;
3043
3044         entry = iter->ent;
3045
3046         test_cpu_buff_start(iter);
3047
3048         event = ftrace_find_event(entry->type);
3049
3050         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3051                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3052                         trace_print_lat_context(iter);
3053                 else
3054                         trace_print_context(iter);
3055         }
3056
3057         if (trace_seq_has_overflowed(s))
3058                 return TRACE_TYPE_PARTIAL_LINE;
3059
3060         if (event)
3061                 return event->funcs->trace(iter, sym_flags, event);
3062
3063         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3064
3065         return trace_handle_return(s);
3066 }
3067
3068 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3069 {
3070         struct trace_array *tr = iter->tr;
3071         struct trace_seq *s = &iter->seq;
3072         struct trace_entry *entry;
3073         struct trace_event *event;
3074
3075         entry = iter->ent;
3076
3077         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3078                 trace_seq_printf(s, "%d %d %llu ",
3079                                  entry->pid, iter->cpu, iter->ts);
3080
3081         if (trace_seq_has_overflowed(s))
3082                 return TRACE_TYPE_PARTIAL_LINE;
3083
3084         event = ftrace_find_event(entry->type);
3085         if (event)
3086                 return event->funcs->raw(iter, 0, event);
3087
3088         trace_seq_printf(s, "%d ?\n", entry->type);
3089
3090         return trace_handle_return(s);
3091 }
3092
3093 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3094 {
3095         struct trace_array *tr = iter->tr;
3096         struct trace_seq *s = &iter->seq;
3097         unsigned char newline = '\n';
3098         struct trace_entry *entry;
3099         struct trace_event *event;
3100
3101         entry = iter->ent;
3102
3103         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3104                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3105                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3106                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3107                 if (trace_seq_has_overflowed(s))
3108                         return TRACE_TYPE_PARTIAL_LINE;
3109         }
3110
3111         event = ftrace_find_event(entry->type);
3112         if (event) {
3113                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3114                 if (ret != TRACE_TYPE_HANDLED)
3115                         return ret;
3116         }
3117
3118         SEQ_PUT_FIELD(s, newline);
3119
3120         return trace_handle_return(s);
3121 }
3122
3123 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3124 {
3125         struct trace_array *tr = iter->tr;
3126         struct trace_seq *s = &iter->seq;
3127         struct trace_entry *entry;
3128         struct trace_event *event;
3129
3130         entry = iter->ent;
3131
3132         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3133                 SEQ_PUT_FIELD(s, entry->pid);
3134                 SEQ_PUT_FIELD(s, iter->cpu);
3135                 SEQ_PUT_FIELD(s, iter->ts);
3136                 if (trace_seq_has_overflowed(s))
3137                         return TRACE_TYPE_PARTIAL_LINE;
3138         }
3139
3140         event = ftrace_find_event(entry->type);
3141         return event ? event->funcs->binary(iter, 0, event) :
3142                 TRACE_TYPE_HANDLED;
3143 }
3144
3145 int trace_empty(struct trace_iterator *iter)
3146 {
3147         struct ring_buffer_iter *buf_iter;
3148         int cpu;
3149
3150         /* If we are looking at one CPU buffer, only check that one */
3151         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3152                 cpu = iter->cpu_file;
3153                 buf_iter = trace_buffer_iter(iter, cpu);
3154                 if (buf_iter) {
3155                         if (!ring_buffer_iter_empty(buf_iter))
3156                                 return 0;
3157                 } else {
3158                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3159                                 return 0;
3160                 }
3161                 return 1;
3162         }
3163
3164         for_each_tracing_cpu(cpu) {
3165                 buf_iter = trace_buffer_iter(iter, cpu);
3166                 if (buf_iter) {
3167                         if (!ring_buffer_iter_empty(buf_iter))
3168                                 return 0;
3169                 } else {
3170                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3171                                 return 0;
3172                 }
3173         }
3174
3175         return 1;
3176 }
3177
3178 /*  Called with trace_event_read_lock() held. */
3179 enum print_line_t print_trace_line(struct trace_iterator *iter)
3180 {
3181         struct trace_array *tr = iter->tr;
3182         unsigned long trace_flags = tr->trace_flags;
3183         enum print_line_t ret;
3184
3185         if (iter->lost_events) {
3186                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3187                                  iter->cpu, iter->lost_events);
3188                 if (trace_seq_has_overflowed(&iter->seq))
3189                         return TRACE_TYPE_PARTIAL_LINE;
3190         }
3191
3192         if (iter->trace && iter->trace->print_line) {
3193                 ret = iter->trace->print_line(iter);
3194                 if (ret != TRACE_TYPE_UNHANDLED)
3195                         return ret;
3196         }
3197
3198         if (iter->ent->type == TRACE_BPUTS &&
3199                         trace_flags & TRACE_ITER_PRINTK &&
3200                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3201                 return trace_print_bputs_msg_only(iter);
3202
3203         if (iter->ent->type == TRACE_BPRINT &&
3204                         trace_flags & TRACE_ITER_PRINTK &&
3205                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3206                 return trace_print_bprintk_msg_only(iter);
3207
3208         if (iter->ent->type == TRACE_PRINT &&
3209                         trace_flags & TRACE_ITER_PRINTK &&
3210                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3211                 return trace_print_printk_msg_only(iter);
3212
3213         if (trace_flags & TRACE_ITER_BIN)
3214                 return print_bin_fmt(iter);
3215
3216         if (trace_flags & TRACE_ITER_HEX)
3217                 return print_hex_fmt(iter);
3218
3219         if (trace_flags & TRACE_ITER_RAW)
3220                 return print_raw_fmt(iter);
3221
3222         return print_trace_fmt(iter);
3223 }
3224
3225 void trace_latency_header(struct seq_file *m)
3226 {
3227         struct trace_iterator *iter = m->private;
3228         struct trace_array *tr = iter->tr;
3229
3230         /* print nothing if the buffers are empty */
3231         if (trace_empty(iter))
3232                 return;
3233
3234         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3235                 print_trace_header(m, iter);
3236
3237         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3238                 print_lat_help_header(m);
3239 }
3240
3241 void trace_default_header(struct seq_file *m)
3242 {
3243         struct trace_iterator *iter = m->private;
3244         struct trace_array *tr = iter->tr;
3245         unsigned long trace_flags = tr->trace_flags;
3246
3247         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3248                 return;
3249
3250         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3251                 /* print nothing if the buffers are empty */
3252                 if (trace_empty(iter))
3253                         return;
3254                 print_trace_header(m, iter);
3255                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3256                         print_lat_help_header(m);
3257         } else {
3258                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3259                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3260                                 print_func_help_header_irq(iter->trace_buffer, m);
3261                         else
3262                                 print_func_help_header(iter->trace_buffer, m);
3263                 }
3264         }
3265 }
3266
3267 static void test_ftrace_alive(struct seq_file *m)
3268 {
3269         if (!ftrace_is_dead())
3270                 return;
3271         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3272                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3273 }
3274
3275 #ifdef CONFIG_TRACER_MAX_TRACE
3276 static void show_snapshot_main_help(struct seq_file *m)
3277 {
3278         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3279                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3280                     "#                      Takes a snapshot of the main buffer.\n"
3281                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3282                     "#                      (Doesn't have to be '2' works with any number that\n"
3283                     "#                       is not a '0' or '1')\n");
3284 }
3285
3286 static void show_snapshot_percpu_help(struct seq_file *m)
3287 {
3288         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3289 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3290         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3291                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3292 #else
3293         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3294                     "#                     Must use main snapshot file to allocate.\n");
3295 #endif
3296         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3297                     "#                      (Doesn't have to be '2' works with any number that\n"
3298                     "#                       is not a '0' or '1')\n");
3299 }
3300
3301 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3302 {
3303         if (iter->tr->allocated_snapshot)
3304                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3305         else
3306                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3307
3308         seq_puts(m, "# Snapshot commands:\n");
3309         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3310                 show_snapshot_main_help(m);
3311         else
3312                 show_snapshot_percpu_help(m);
3313 }
3314 #else
3315 /* Should never be called */
3316 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3317 #endif
3318
3319 static int s_show(struct seq_file *m, void *v)
3320 {
3321         struct trace_iterator *iter = v;
3322         int ret;
3323
3324         if (iter->ent == NULL) {
3325                 if (iter->tr) {
3326                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3327                         seq_puts(m, "#\n");
3328                         test_ftrace_alive(m);
3329                 }
3330                 if (iter->snapshot && trace_empty(iter))
3331                         print_snapshot_help(m, iter);
3332                 else if (iter->trace && iter->trace->print_header)
3333                         iter->trace->print_header(m);
3334                 else
3335                         trace_default_header(m);
3336
3337         } else if (iter->leftover) {
3338                 /*
3339                  * If we filled the seq_file buffer earlier, we
3340                  * want to just show it now.
3341                  */
3342                 ret = trace_print_seq(m, &iter->seq);
3343
3344                 /* ret should this time be zero, but you never know */
3345                 iter->leftover = ret;
3346
3347         } else {
3348                 print_trace_line(iter);
3349                 ret = trace_print_seq(m, &iter->seq);
3350                 /*
3351                  * If we overflow the seq_file buffer, then it will
3352                  * ask us for this data again at start up.
3353                  * Use that instead.
3354                  *  ret is 0 if seq_file write succeeded.
3355                  *        -1 otherwise.
3356                  */
3357                 iter->leftover = ret;
3358         }
3359
3360         return 0;
3361 }
3362
3363 /*
3364  * Should be used after trace_array_get(), trace_types_lock
3365  * ensures that i_cdev was already initialized.
3366  */
3367 static inline int tracing_get_cpu(struct inode *inode)
3368 {
3369         if (inode->i_cdev) /* See trace_create_cpu_file() */
3370                 return (long)inode->i_cdev - 1;
3371         return RING_BUFFER_ALL_CPUS;
3372 }
3373
3374 static const struct seq_operations tracer_seq_ops = {
3375         .start          = s_start,
3376         .next           = s_next,
3377         .stop           = s_stop,
3378         .show           = s_show,
3379 };
3380
3381 static struct trace_iterator *
3382 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3383 {
3384         struct trace_array *tr = inode->i_private;
3385         struct trace_iterator *iter;
3386         int cpu;
3387
3388         if (tracing_disabled)
3389                 return ERR_PTR(-ENODEV);
3390
3391         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3392         if (!iter)
3393                 return ERR_PTR(-ENOMEM);
3394
3395         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3396                                     GFP_KERNEL);
3397         if (!iter->buffer_iter)
3398                 goto release;
3399
3400         /*
3401          * We make a copy of the current tracer to avoid concurrent
3402          * changes on it while we are reading.
3403          */
3404         mutex_lock(&trace_types_lock);
3405         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3406         if (!iter->trace)
3407                 goto fail;
3408
3409         *iter->trace = *tr->current_trace;
3410
3411         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3412                 goto fail;
3413
3414         iter->tr = tr;
3415
3416 #ifdef CONFIG_TRACER_MAX_TRACE
3417         /* Currently only the top directory has a snapshot */
3418         if (tr->current_trace->print_max || snapshot)
3419                 iter->trace_buffer = &tr->max_buffer;
3420         else
3421 #endif
3422                 iter->trace_buffer = &tr->trace_buffer;
3423         iter->snapshot = snapshot;
3424         iter->pos = -1;
3425         iter->cpu_file = tracing_get_cpu(inode);
3426         mutex_init(&iter->mutex);
3427
3428         /* Notify the tracer early; before we stop tracing. */
3429         if (iter->trace && iter->trace->open)
3430                 iter->trace->open(iter);
3431
3432         /* Annotate start of buffers if we had overruns */
3433         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3434                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3435
3436         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3437         if (trace_clocks[tr->clock_id].in_ns)
3438                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3439
3440         /* stop the trace while dumping if we are not opening "snapshot" */
3441         if (!iter->snapshot)
3442                 tracing_stop_tr(tr);
3443
3444         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3445                 for_each_tracing_cpu(cpu) {
3446                         iter->buffer_iter[cpu] =
3447                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3448                                                          cpu, GFP_KERNEL);
3449                 }
3450                 ring_buffer_read_prepare_sync();
3451                 for_each_tracing_cpu(cpu) {
3452                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3453                         tracing_iter_reset(iter, cpu);
3454                 }
3455         } else {
3456                 cpu = iter->cpu_file;
3457                 iter->buffer_iter[cpu] =
3458                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3459                                                  cpu, GFP_KERNEL);
3460                 ring_buffer_read_prepare_sync();
3461                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3462                 tracing_iter_reset(iter, cpu);
3463         }
3464
3465         mutex_unlock(&trace_types_lock);
3466
3467         return iter;
3468
3469  fail:
3470         mutex_unlock(&trace_types_lock);
3471         kfree(iter->trace);
3472         kfree(iter->buffer_iter);
3473 release:
3474         seq_release_private(inode, file);
3475         return ERR_PTR(-ENOMEM);
3476 }
3477
3478 int tracing_open_generic(struct inode *inode, struct file *filp)
3479 {
3480         if (tracing_disabled)
3481                 return -ENODEV;
3482
3483         filp->private_data = inode->i_private;
3484         return 0;
3485 }
3486
3487 bool tracing_is_disabled(void)
3488 {
3489         return (tracing_disabled) ? true: false;
3490 }
3491
3492 /*
3493  * Open and update trace_array ref count.
3494  * Must have the current trace_array passed to it.
3495  */
3496 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3497 {
3498         struct trace_array *tr = inode->i_private;
3499
3500         if (tracing_disabled)
3501                 return -ENODEV;
3502
3503         if (trace_array_get(tr) < 0)
3504                 return -ENODEV;
3505
3506         filp->private_data = inode->i_private;
3507
3508         return 0;
3509 }
3510
3511 static int tracing_release(struct inode *inode, struct file *file)
3512 {
3513         struct trace_array *tr = inode->i_private;
3514         struct seq_file *m = file->private_data;
3515         struct trace_iterator *iter;
3516         int cpu;
3517
3518         if (!(file->f_mode & FMODE_READ)) {
3519                 trace_array_put(tr);
3520                 return 0;
3521         }
3522
3523         /* Writes do not use seq_file */
3524         iter = m->private;
3525         mutex_lock(&trace_types_lock);
3526
3527         for_each_tracing_cpu(cpu) {
3528                 if (iter->buffer_iter[cpu])
3529                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3530         }
3531
3532         if (iter->trace && iter->trace->close)
3533                 iter->trace->close(iter);
3534
3535         if (!iter->snapshot)
3536                 /* reenable tracing if it was previously enabled */
3537                 tracing_start_tr(tr);
3538
3539         __trace_array_put(tr);
3540
3541         mutex_unlock(&trace_types_lock);
3542
3543         mutex_destroy(&iter->mutex);
3544         free_cpumask_var(iter->started);
3545         kfree(iter->trace);
3546         kfree(iter->buffer_iter);
3547         seq_release_private(inode, file);
3548
3549         return 0;
3550 }
3551
3552 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3553 {
3554         struct trace_array *tr = inode->i_private;
3555
3556         trace_array_put(tr);
3557         return 0;
3558 }
3559
3560 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3561 {
3562         struct trace_array *tr = inode->i_private;
3563
3564         trace_array_put(tr);
3565
3566         return single_release(inode, file);
3567 }
3568
3569 static int tracing_open(struct inode *inode, struct file *file)
3570 {
3571         struct trace_array *tr = inode->i_private;
3572         struct trace_iterator *iter;
3573         int ret = 0;
3574
3575         if (trace_array_get(tr) < 0)
3576                 return -ENODEV;
3577
3578         /* If this file was open for write, then erase contents */
3579         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3580                 int cpu = tracing_get_cpu(inode);
3581                 struct trace_buffer *trace_buf = &tr->trace_buffer;
3582
3583 #ifdef CONFIG_TRACER_MAX_TRACE
3584                 if (tr->current_trace->print_max)
3585                         trace_buf = &tr->max_buffer;
3586 #endif
3587
3588                 if (cpu == RING_BUFFER_ALL_CPUS)
3589                         tracing_reset_online_cpus(trace_buf);
3590                 else
3591                         tracing_reset(trace_buf, cpu);
3592         }
3593
3594         if (file->f_mode & FMODE_READ) {
3595                 iter = __tracing_open(inode, file, false);
3596                 if (IS_ERR(iter))
3597                         ret = PTR_ERR(iter);
3598                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3599                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3600         }
3601
3602         if (ret < 0)
3603                 trace_array_put(tr);
3604
3605         return ret;
3606 }
3607
3608 /*
3609  * Some tracers are not suitable for instance buffers.
3610  * A tracer is always available for the global array (toplevel)
3611  * or if it explicitly states that it is.
3612  */
3613 static bool
3614 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3615 {
3616         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3617 }
3618
3619 /* Find the next tracer that this trace array may use */
3620 static struct tracer *
3621 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3622 {
3623         while (t && !trace_ok_for_array(t, tr))
3624                 t = t->next;
3625
3626         return t;
3627 }
3628
3629 static void *
3630 t_next(struct seq_file *m, void *v, loff_t *pos)
3631 {
3632         struct trace_array *tr = m->private;
3633         struct tracer *t = v;
3634
3635         (*pos)++;
3636
3637         if (t)
3638                 t = get_tracer_for_array(tr, t->next);
3639
3640         return t;
3641 }
3642
3643 static void *t_start(struct seq_file *m, loff_t *pos)
3644 {
3645         struct trace_array *tr = m->private;
3646         struct tracer *t;
3647         loff_t l = 0;
3648
3649         mutex_lock(&trace_types_lock);
3650
3651         t = get_tracer_for_array(tr, trace_types);
3652         for (; t && l < *pos; t = t_next(m, t, &l))
3653                         ;
3654
3655         return t;
3656 }
3657
3658 static void t_stop(struct seq_file *m, void *p)
3659 {
3660         mutex_unlock(&trace_types_lock);
3661 }
3662
3663 static int t_show(struct seq_file *m, void *v)
3664 {
3665         struct tracer *t = v;
3666
3667         if (!t)
3668                 return 0;
3669
3670         seq_puts(m, t->name);
3671         if (t->next)
3672                 seq_putc(m, ' ');
3673         else
3674                 seq_putc(m, '\n');
3675
3676         return 0;
3677 }
3678
3679 static const struct seq_operations show_traces_seq_ops = {
3680         .start          = t_start,
3681         .next           = t_next,
3682         .stop           = t_stop,
3683         .show           = t_show,
3684 };
3685
3686 static int show_traces_open(struct inode *inode, struct file *file)
3687 {
3688         struct trace_array *tr = inode->i_private;
3689         struct seq_file *m;
3690         int ret;
3691
3692         if (tracing_disabled)
3693                 return -ENODEV;
3694
3695         if (trace_array_get(tr) < 0)
3696                 return -ENODEV;
3697
3698         ret = seq_open(file, &show_traces_seq_ops);
3699         if (ret) {
3700                 trace_array_put(tr);
3701                 return ret;
3702         }
3703
3704         m = file->private_data;
3705         m->private = tr;
3706
3707         return 0;
3708 }
3709
3710 static int show_traces_release(struct inode *inode, struct file *file)
3711 {
3712         struct trace_array *tr = inode->i_private;
3713
3714         trace_array_put(tr);
3715         return seq_release(inode, file);
3716 }
3717
3718 static ssize_t
3719 tracing_write_stub(struct file *filp, const char __user *ubuf,
3720                    size_t count, loff_t *ppos)
3721 {
3722         return count;
3723 }
3724
3725 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3726 {
3727         int ret;
3728
3729         if (file->f_mode & FMODE_READ)
3730                 ret = seq_lseek(file, offset, whence);
3731         else
3732                 file->f_pos = ret = 0;
3733
3734         return ret;
3735 }
3736
3737 static const struct file_operations tracing_fops = {
3738         .open           = tracing_open,
3739         .read           = seq_read,
3740         .write          = tracing_write_stub,
3741         .llseek         = tracing_lseek,
3742         .release        = tracing_release,
3743 };
3744
3745 static const struct file_operations show_traces_fops = {
3746         .open           = show_traces_open,
3747         .read           = seq_read,
3748         .llseek         = seq_lseek,
3749         .release        = show_traces_release,
3750 };
3751
3752 static ssize_t
3753 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3754                      size_t count, loff_t *ppos)
3755 {
3756         struct trace_array *tr = file_inode(filp)->i_private;
3757         char *mask_str;
3758         int len;
3759
3760         len = snprintf(NULL, 0, "%*pb\n",
3761                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
3762         mask_str = kmalloc(len, GFP_KERNEL);
3763         if (!mask_str)
3764                 return -ENOMEM;
3765
3766         len = snprintf(mask_str, len, "%*pb\n",
3767                        cpumask_pr_args(tr->tracing_cpumask));
3768         if (len >= count) {
3769                 count = -EINVAL;
3770                 goto out_err;
3771         }
3772         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3773
3774 out_err:
3775         kfree(mask_str);
3776
3777         return count;
3778 }
3779
3780 static ssize_t
3781 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3782                       size_t count, loff_t *ppos)
3783 {
3784         struct trace_array *tr = file_inode(filp)->i_private;
3785         cpumask_var_t tracing_cpumask_new;
3786         int err, cpu;
3787
3788         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3789                 return -ENOMEM;
3790
3791         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3792         if (err)
3793                 goto err_unlock;
3794
3795         local_irq_disable();
3796         arch_spin_lock(&tr->max_lock);
3797         for_each_tracing_cpu(cpu) {
3798                 /*
3799                  * Increase/decrease the disabled counter if we are
3800                  * about to flip a bit in the cpumask:
3801                  */
3802                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3803                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3804                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3805                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3806                 }
3807                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3808                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3809                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3810                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3811                 }
3812         }
3813         arch_spin_unlock(&tr->max_lock);
3814         local_irq_enable();
3815
3816         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3817         free_cpumask_var(tracing_cpumask_new);
3818
3819         return count;
3820
3821 err_unlock:
3822         free_cpumask_var(tracing_cpumask_new);
3823
3824         return err;
3825 }
3826
3827 static const struct file_operations tracing_cpumask_fops = {
3828         .open           = tracing_open_generic_tr,
3829         .read           = tracing_cpumask_read,
3830         .write          = tracing_cpumask_write,
3831         .release        = tracing_release_generic_tr,
3832         .llseek         = generic_file_llseek,
3833 };
3834
3835 static int tracing_trace_options_show(struct seq_file *m, void *v)
3836 {
3837         struct tracer_opt *trace_opts;
3838         struct trace_array *tr = m->private;
3839         u32 tracer_flags;
3840         int i;
3841
3842         mutex_lock(&trace_types_lock);
3843         tracer_flags = tr->current_trace->flags->val;
3844         trace_opts = tr->current_trace->flags->opts;
3845
3846         for (i = 0; trace_options[i]; i++) {
3847                 if (tr->trace_flags & (1 << i))
3848                         seq_printf(m, "%s\n", trace_options[i]);
3849                 else
3850                         seq_printf(m, "no%s\n", trace_options[i]);
3851         }
3852
3853         for (i = 0; trace_opts[i].name; i++) {
3854                 if (tracer_flags & trace_opts[i].bit)
3855                         seq_printf(m, "%s\n", trace_opts[i].name);
3856                 else
3857                         seq_printf(m, "no%s\n", trace_opts[i].name);
3858         }
3859         mutex_unlock(&trace_types_lock);
3860
3861         return 0;
3862 }
3863
3864 static int __set_tracer_option(struct trace_array *tr,
3865                                struct tracer_flags *tracer_flags,
3866                                struct tracer_opt *opts, int neg)
3867 {
3868         struct tracer *trace = tracer_flags->trace;
3869         int ret;
3870
3871         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3872         if (ret)
3873                 return ret;
3874
3875         if (neg)
3876                 tracer_flags->val &= ~opts->bit;
3877         else
3878                 tracer_flags->val |= opts->bit;
3879         return 0;
3880 }
3881
3882 /* Try to assign a tracer specific option */
3883 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3884 {
3885         struct tracer *trace = tr->current_trace;
3886         struct tracer_flags *tracer_flags = trace->flags;
3887         struct tracer_opt *opts = NULL;
3888         int i;
3889
3890         for (i = 0; tracer_flags->opts[i].name; i++) {
3891                 opts = &tracer_flags->opts[i];
3892
3893                 if (strcmp(cmp, opts->name) == 0)
3894                         return __set_tracer_option(tr, trace->flags, opts, neg);
3895         }
3896
3897         return -EINVAL;
3898 }
3899
3900 /* Some tracers require overwrite to stay enabled */
3901 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3902 {
3903         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3904                 return -1;
3905
3906         return 0;
3907 }
3908
3909 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3910 {
3911         /* do nothing if flag is already set */
3912         if (!!(tr->trace_flags & mask) == !!enabled)
3913                 return 0;
3914
3915         /* Give the tracer a chance to approve the change */
3916         if (tr->current_trace->flag_changed)
3917                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3918                         return -EINVAL;
3919
3920         if (enabled)
3921                 tr->trace_flags |= mask;
3922         else
3923                 tr->trace_flags &= ~mask;
3924
3925         if (mask == TRACE_ITER_RECORD_CMD)
3926                 trace_event_enable_cmd_record(enabled);
3927
3928         if (mask == TRACE_ITER_EVENT_FORK)
3929                 trace_event_follow_fork(tr, enabled);
3930
3931         if (mask == TRACE_ITER_OVERWRITE) {
3932                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3933 #ifdef CONFIG_TRACER_MAX_TRACE
3934                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3935 #endif
3936         }
3937
3938         if (mask == TRACE_ITER_PRINTK) {
3939                 trace_printk_start_stop_comm(enabled);
3940                 trace_printk_control(enabled);
3941         }
3942
3943         return 0;
3944 }
3945
3946 static int trace_set_options(struct trace_array *tr, char *option)
3947 {
3948         char *cmp;
3949         int neg = 0;
3950         int ret = -ENODEV;
3951         int i;
3952         size_t orig_len = strlen(option);
3953
3954         cmp = strstrip(option);
3955
3956         if (strncmp(cmp, "no", 2) == 0) {
3957                 neg = 1;
3958                 cmp += 2;
3959         }
3960
3961         mutex_lock(&trace_types_lock);
3962
3963         for (i = 0; trace_options[i]; i++) {
3964                 if (strcmp(cmp, trace_options[i]) == 0) {
3965                         ret = set_tracer_flag(tr, 1 << i, !neg);
3966                         break;
3967                 }
3968         }
3969
3970         /* If no option could be set, test the specific tracer options */
3971         if (!trace_options[i])
3972                 ret = set_tracer_option(tr, cmp, neg);
3973
3974         mutex_unlock(&trace_types_lock);
3975
3976         /*
3977          * If the first trailing whitespace is replaced with '\0' by strstrip,
3978          * turn it back into a space.
3979          */
3980         if (orig_len > strlen(option))
3981                 option[strlen(option)] = ' ';
3982
3983         return ret;
3984 }
3985
3986 static void __init apply_trace_boot_options(void)
3987 {
3988         char *buf = trace_boot_options_buf;
3989         char *option;
3990
3991         while (true) {
3992                 option = strsep(&buf, ",");
3993
3994                 if (!option)
3995                         break;
3996
3997                 if (*option)
3998                         trace_set_options(&global_trace, option);
3999
4000                 /* Put back the comma to allow this to be called again */
4001                 if (buf)
4002                         *(buf - 1) = ',';
4003         }
4004 }
4005
4006 static ssize_t
4007 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4008                         size_t cnt, loff_t *ppos)
4009 {
4010         struct seq_file *m = filp->private_data;
4011         struct trace_array *tr = m->private;
4012         char buf[64];
4013         int ret;
4014
4015         if (cnt >= sizeof(buf))
4016                 return -EINVAL;
4017
4018         if (copy_from_user(buf, ubuf, cnt))
4019                 return -EFAULT;
4020
4021         buf[cnt] = 0;
4022
4023         ret = trace_set_options(tr, buf);
4024         if (ret < 0)
4025                 return ret;
4026
4027         *ppos += cnt;
4028
4029         return cnt;
4030 }
4031
4032 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4033 {
4034         struct trace_array *tr = inode->i_private;
4035         int ret;
4036
4037         if (tracing_disabled)
4038                 return -ENODEV;
4039
4040         if (trace_array_get(tr) < 0)
4041                 return -ENODEV;
4042
4043         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4044         if (ret < 0)
4045                 trace_array_put(tr);
4046
4047         return ret;
4048 }
4049
4050 static const struct file_operations tracing_iter_fops = {
4051         .open           = tracing_trace_options_open,
4052         .read           = seq_read,
4053         .llseek         = seq_lseek,
4054         .release        = tracing_single_release_tr,
4055         .write          = tracing_trace_options_write,
4056 };
4057
4058 static const char readme_msg[] =
4059         "tracing mini-HOWTO:\n\n"
4060         "# echo 0 > tracing_on : quick way to disable tracing\n"
4061         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4062         " Important files:\n"
4063         "  trace\t\t\t- The static contents of the buffer\n"
4064         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4065         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4066         "  current_tracer\t- function and latency tracers\n"
4067         "  available_tracers\t- list of configured tracers for current_tracer\n"
4068         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4069         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4070         "  trace_clock\t\t-change the clock used to order events\n"
4071         "       local:   Per cpu clock but may not be synced across CPUs\n"
4072         "      global:   Synced across CPUs but slows tracing down.\n"
4073         "     counter:   Not a clock, but just an increment\n"
4074         "      uptime:   Jiffy counter from time of boot\n"
4075         "        perf:   Same clock that perf events use\n"
4076 #ifdef CONFIG_X86_64
4077         "     x86-tsc:   TSC cycle counter\n"
4078 #endif
4079         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4080         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4081         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4082         "\t\t\t  Remove sub-buffer with rmdir\n"
4083         "  trace_options\t\t- Set format or modify how tracing happens\n"
4084         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4085         "\t\t\t  option name\n"
4086         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4087 #ifdef CONFIG_DYNAMIC_FTRACE
4088         "\n  available_filter_functions - list of functions that can be filtered on\n"
4089         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4090         "\t\t\t  functions\n"
4091         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4092         "\t     modules: Can select a group via module\n"
4093         "\t      Format: :mod:<module-name>\n"
4094         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4095         "\t    triggers: a command to perform when function is hit\n"
4096         "\t      Format: <function>:<trigger>[:count]\n"
4097         "\t     trigger: traceon, traceoff\n"
4098         "\t\t      enable_event:<system>:<event>\n"
4099         "\t\t      disable_event:<system>:<event>\n"
4100 #ifdef CONFIG_STACKTRACE
4101         "\t\t      stacktrace\n"
4102 #endif
4103 #ifdef CONFIG_TRACER_SNAPSHOT
4104         "\t\t      snapshot\n"
4105 #endif
4106         "\t\t      dump\n"
4107         "\t\t      cpudump\n"
4108         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4109         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4110         "\t     The first one will disable tracing every time do_fault is hit\n"
4111         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4112         "\t       The first time do trap is hit and it disables tracing, the\n"
4113         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4114         "\t       the counter will not decrement. It only decrements when the\n"
4115         "\t       trigger did work\n"
4116         "\t     To remove trigger without count:\n"
4117         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4118         "\t     To remove trigger with a count:\n"
4119         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4120         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4121         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4122         "\t    modules: Can select a group via module command :mod:\n"
4123         "\t    Does not accept triggers\n"
4124 #endif /* CONFIG_DYNAMIC_FTRACE */
4125 #ifdef CONFIG_FUNCTION_TRACER
4126         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4127         "\t\t    (function)\n"
4128 #endif
4129 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4130         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4131         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4132         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4133 #endif
4134 #ifdef CONFIG_TRACER_SNAPSHOT
4135         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4136         "\t\t\t  snapshot buffer. Read the contents for more\n"
4137         "\t\t\t  information\n"
4138 #endif
4139 #ifdef CONFIG_STACK_TRACER
4140         "  stack_trace\t\t- Shows the max stack trace when active\n"
4141         "  stack_max_size\t- Shows current max stack size that was traced\n"
4142         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4143         "\t\t\t  new trace)\n"
4144 #ifdef CONFIG_DYNAMIC_FTRACE
4145         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4146         "\t\t\t  traces\n"
4147 #endif
4148 #endif /* CONFIG_STACK_TRACER */
4149 #ifdef CONFIG_KPROBE_EVENT
4150         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4151         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4152 #endif
4153 #ifdef CONFIG_UPROBE_EVENT
4154         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4155         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4156 #endif
4157 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4158         "\t  accepts: event-definitions (one definition per line)\n"
4159         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4160         "\t           -:[<group>/]<event>\n"
4161 #ifdef CONFIG_KPROBE_EVENT
4162         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4163 #endif
4164 #ifdef CONFIG_UPROBE_EVENT
4165         "\t    place: <path>:<offset>\n"
4166 #endif
4167         "\t     args: <name>=fetcharg[:type]\n"
4168         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4169         "\t           $stack<index>, $stack, $retval, $comm\n"
4170         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4171         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4172 #endif
4173         "  events/\t\t- Directory containing all trace event subsystems:\n"
4174         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4175         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4176         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4177         "\t\t\t  events\n"
4178         "      filter\t\t- If set, only events passing filter are traced\n"
4179         "  events/<system>/<event>/\t- Directory containing control files for\n"
4180         "\t\t\t  <event>:\n"
4181         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4182         "      filter\t\t- If set, only events passing filter are traced\n"
4183         "      trigger\t\t- If set, a command to perform when event is hit\n"
4184         "\t    Format: <trigger>[:count][if <filter>]\n"
4185         "\t   trigger: traceon, traceoff\n"
4186         "\t            enable_event:<system>:<event>\n"
4187         "\t            disable_event:<system>:<event>\n"
4188 #ifdef CONFIG_HIST_TRIGGERS
4189         "\t            enable_hist:<system>:<event>\n"
4190         "\t            disable_hist:<system>:<event>\n"
4191 #endif
4192 #ifdef CONFIG_STACKTRACE
4193         "\t\t    stacktrace\n"
4194 #endif
4195 #ifdef CONFIG_TRACER_SNAPSHOT
4196         "\t\t    snapshot\n"
4197 #endif
4198 #ifdef CONFIG_HIST_TRIGGERS
4199         "\t\t    hist (see below)\n"
4200 #endif
4201         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4202         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4203         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4204         "\t                  events/block/block_unplug/trigger\n"
4205         "\t   The first disables tracing every time block_unplug is hit.\n"
4206         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4207         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4208         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4209         "\t   Like function triggers, the counter is only decremented if it\n"
4210         "\t    enabled or disabled tracing.\n"
4211         "\t   To remove a trigger without a count:\n"
4212         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4213         "\t   To remove a trigger with a count:\n"
4214         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4215         "\t   Filters can be ignored when removing a trigger.\n"
4216 #ifdef CONFIG_HIST_TRIGGERS
4217         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4218         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4219         "\t            [:values=<field1[,field2,...]>]\n"
4220         "\t            [:sort=<field1[,field2,...]>]\n"
4221         "\t            [:size=#entries]\n"
4222         "\t            [:pause][:continue][:clear]\n"
4223         "\t            [:name=histname1]\n"
4224         "\t            [if <filter>]\n\n"
4225         "\t    When a matching event is hit, an entry is added to a hash\n"
4226         "\t    table using the key(s) and value(s) named, and the value of a\n"
4227         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4228         "\t    correspond to fields in the event's format description.  Keys\n"
4229         "\t    can be any field, or the special string 'stacktrace'.\n"
4230         "\t    Compound keys consisting of up to two fields can be specified\n"
4231         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4232         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4233         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4234         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4235         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4236         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4237         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4238         "\t    its histogram data will be shared with other triggers of the\n"
4239         "\t    same name, and trigger hits will update this common data.\n\n"
4240         "\t    Reading the 'hist' file for the event will dump the hash\n"
4241         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4242         "\t    triggers attached to an event, there will be a table for each\n"
4243         "\t    trigger in the output.  The table displayed for a named\n"
4244         "\t    trigger will be the same as any other instance having the\n"
4245         "\t    same name.  The default format used to display a given field\n"
4246         "\t    can be modified by appending any of the following modifiers\n"
4247         "\t    to the field name, as applicable:\n\n"
4248         "\t            .hex        display a number as a hex value\n"
4249         "\t            .sym        display an address as a symbol\n"
4250         "\t            .sym-offset display an address as a symbol and offset\n"
4251         "\t            .execname   display a common_pid as a program name\n"
4252         "\t            .syscall    display a syscall id as a syscall name\n\n"
4253         "\t            .log2       display log2 value rather than raw number\n\n"
4254         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4255         "\t    trigger or to start a hist trigger but not log any events\n"
4256         "\t    until told to do so.  'continue' can be used to start or\n"
4257         "\t    restart a paused hist trigger.\n\n"
4258         "\t    The 'clear' parameter will clear the contents of a running\n"
4259         "\t    hist trigger and leave its current paused/active state\n"
4260         "\t    unchanged.\n\n"
4261         "\t    The enable_hist and disable_hist triggers can be used to\n"
4262         "\t    have one event conditionally start and stop another event's\n"
4263         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4264         "\t    the enable_event and disable_event triggers.\n"
4265 #endif
4266 ;
4267
4268 static ssize_t
4269 tracing_readme_read(struct file *filp, char __user *ubuf,
4270                        size_t cnt, loff_t *ppos)
4271 {
4272         return simple_read_from_buffer(ubuf, cnt, ppos,
4273                                         readme_msg, strlen(readme_msg));
4274 }
4275
4276 static const struct file_operations tracing_readme_fops = {
4277         .open           = tracing_open_generic,
4278         .read           = tracing_readme_read,
4279         .llseek         = generic_file_llseek,
4280 };
4281
4282 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4283 {
4284         unsigned int *ptr = v;
4285
4286         if (*pos || m->count)
4287                 ptr++;
4288
4289         (*pos)++;
4290
4291         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4292              ptr++) {
4293                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4294                         continue;
4295
4296                 return ptr;
4297         }
4298
4299         return NULL;
4300 }
4301
4302 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4303 {
4304         void *v;
4305         loff_t l = 0;
4306
4307         preempt_disable();
4308         arch_spin_lock(&trace_cmdline_lock);
4309
4310         v = &savedcmd->map_cmdline_to_pid[0];
4311         while (l <= *pos) {
4312                 v = saved_cmdlines_next(m, v, &l);
4313                 if (!v)
4314                         return NULL;
4315         }
4316
4317         return v;
4318 }
4319
4320 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4321 {
4322         arch_spin_unlock(&trace_cmdline_lock);
4323         preempt_enable();
4324 }
4325
4326 static int saved_cmdlines_show(struct seq_file *m, void *v)
4327 {
4328         char buf[TASK_COMM_LEN];
4329         unsigned int *pid = v;
4330
4331         __trace_find_cmdline(*pid, buf);
4332         seq_printf(m, "%d %s\n", *pid, buf);
4333         return 0;
4334 }
4335
4336 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4337         .start          = saved_cmdlines_start,
4338         .next           = saved_cmdlines_next,
4339         .stop           = saved_cmdlines_stop,
4340         .show           = saved_cmdlines_show,
4341 };
4342
4343 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4344 {
4345         if (tracing_disabled)
4346                 return -ENODEV;
4347
4348         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4349 }
4350
4351 static const struct file_operations tracing_saved_cmdlines_fops = {
4352         .open           = tracing_saved_cmdlines_open,
4353         .read           = seq_read,
4354         .llseek         = seq_lseek,
4355         .release        = seq_release,
4356 };
4357
4358 static ssize_t
4359 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4360                                  size_t cnt, loff_t *ppos)
4361 {
4362         char buf[64];
4363         int r;
4364
4365         arch_spin_lock(&trace_cmdline_lock);
4366         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4367         arch_spin_unlock(&trace_cmdline_lock);
4368
4369         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4370 }
4371
4372 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4373 {
4374         kfree(s->saved_cmdlines);
4375         kfree(s->map_cmdline_to_pid);
4376         kfree(s);
4377 }
4378
4379 static int tracing_resize_saved_cmdlines(unsigned int val)
4380 {
4381         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4382
4383         s = kmalloc(sizeof(*s), GFP_KERNEL);
4384         if (!s)
4385                 return -ENOMEM;
4386
4387         if (allocate_cmdlines_buffer(val, s) < 0) {
4388                 kfree(s);
4389                 return -ENOMEM;
4390         }
4391
4392         arch_spin_lock(&trace_cmdline_lock);
4393         savedcmd_temp = savedcmd;
4394         savedcmd = s;
4395         arch_spin_unlock(&trace_cmdline_lock);
4396         free_saved_cmdlines_buffer(savedcmd_temp);
4397
4398         return 0;
4399 }
4400
4401 static ssize_t
4402 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4403                                   size_t cnt, loff_t *ppos)
4404 {
4405         unsigned long val;
4406         int ret;
4407
4408         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4409         if (ret)
4410                 return ret;
4411
4412         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4413         if (!val || val > PID_MAX_DEFAULT)
4414                 return -EINVAL;
4415
4416         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4417         if (ret < 0)
4418                 return ret;
4419
4420         *ppos += cnt;
4421
4422         return cnt;
4423 }
4424
4425 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4426         .open           = tracing_open_generic,
4427         .read           = tracing_saved_cmdlines_size_read,
4428         .write          = tracing_saved_cmdlines_size_write,
4429 };
4430
4431 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4432 static union trace_enum_map_item *
4433 update_enum_map(union trace_enum_map_item *ptr)
4434 {
4435         if (!ptr->map.enum_string) {
4436                 if (ptr->tail.next) {
4437                         ptr = ptr->tail.next;
4438                         /* Set ptr to the next real item (skip head) */
4439                         ptr++;
4440                 } else
4441                         return NULL;
4442         }
4443         return ptr;
4444 }
4445
4446 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4447 {
4448         union trace_enum_map_item *ptr = v;
4449
4450         /*
4451          * Paranoid! If ptr points to end, we don't want to increment past it.
4452          * This really should never happen.
4453          */
4454         ptr = update_enum_map(ptr);
4455         if (WARN_ON_ONCE(!ptr))
4456                 return NULL;
4457
4458         ptr++;
4459
4460         (*pos)++;
4461
4462         ptr = update_enum_map(ptr);
4463
4464         return ptr;
4465 }
4466
4467 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4468 {
4469         union trace_enum_map_item *v;
4470         loff_t l = 0;
4471
4472         mutex_lock(&trace_enum_mutex);
4473
4474         v = trace_enum_maps;
4475         if (v)
4476                 v++;
4477
4478         while (v && l < *pos) {
4479                 v = enum_map_next(m, v, &l);
4480         }
4481
4482         return v;
4483 }
4484
4485 static void enum_map_stop(struct seq_file *m, void *v)
4486 {
4487         mutex_unlock(&trace_enum_mutex);
4488 }
4489
4490 static int enum_map_show(struct seq_file *m, void *v)
4491 {
4492         union trace_enum_map_item *ptr = v;
4493
4494         seq_printf(m, "%s %ld (%s)\n",
4495                    ptr->map.enum_string, ptr->map.enum_value,
4496                    ptr->map.system);
4497
4498         return 0;
4499 }
4500
4501 static const struct seq_operations tracing_enum_map_seq_ops = {
4502         .start          = enum_map_start,
4503         .next           = enum_map_next,
4504         .stop           = enum_map_stop,
4505         .show           = enum_map_show,
4506 };
4507
4508 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4509 {
4510         if (tracing_disabled)
4511                 return -ENODEV;
4512
4513         return seq_open(filp, &tracing_enum_map_seq_ops);
4514 }
4515
4516 static const struct file_operations tracing_enum_map_fops = {
4517         .open           = tracing_enum_map_open,
4518         .read           = seq_read,
4519         .llseek         = seq_lseek,
4520         .release        = seq_release,
4521 };
4522
4523 static inline union trace_enum_map_item *
4524 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4525 {
4526         /* Return tail of array given the head */
4527         return ptr + ptr->head.length + 1;
4528 }
4529
4530 static void
4531 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4532                            int len)
4533 {
4534         struct trace_enum_map **stop;
4535         struct trace_enum_map **map;
4536         union trace_enum_map_item *map_array;
4537         union trace_enum_map_item *ptr;
4538
4539         stop = start + len;
4540
4541         /*
4542          * The trace_enum_maps contains the map plus a head and tail item,
4543          * where the head holds the module and length of array, and the
4544          * tail holds a pointer to the next list.
4545          */
4546         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4547         if (!map_array) {
4548                 pr_warn("Unable to allocate trace enum mapping\n");
4549                 return;
4550         }
4551
4552         mutex_lock(&trace_enum_mutex);
4553
4554         if (!trace_enum_maps)
4555                 trace_enum_maps = map_array;
4556         else {
4557                 ptr = trace_enum_maps;
4558                 for (;;) {
4559                         ptr = trace_enum_jmp_to_tail(ptr);
4560                         if (!ptr->tail.next)
4561                                 break;
4562                         ptr = ptr->tail.next;
4563
4564                 }
4565                 ptr->tail.next = map_array;
4566         }
4567         map_array->head.mod = mod;
4568         map_array->head.length = len;
4569         map_array++;
4570
4571         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4572                 map_array->map = **map;
4573                 map_array++;
4574         }
4575         memset(map_array, 0, sizeof(*map_array));
4576
4577         mutex_unlock(&trace_enum_mutex);
4578 }
4579
4580 static void trace_create_enum_file(struct dentry *d_tracer)
4581 {
4582         trace_create_file("enum_map", 0444, d_tracer,
4583                           NULL, &tracing_enum_map_fops);
4584 }
4585
4586 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4587 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4588 static inline void trace_insert_enum_map_file(struct module *mod,
4589                               struct trace_enum_map **start, int len) { }
4590 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4591
4592 static void trace_insert_enum_map(struct module *mod,
4593                                   struct trace_enum_map **start, int len)
4594 {
4595         struct trace_enum_map **map;
4596
4597         if (len <= 0)
4598                 return;
4599
4600         map = start;
4601
4602         trace_event_enum_update(map, len);
4603
4604         trace_insert_enum_map_file(mod, start, len);
4605 }
4606
4607 static ssize_t
4608 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4609                        size_t cnt, loff_t *ppos)
4610 {
4611         struct trace_array *tr = filp->private_data;
4612         char buf[MAX_TRACER_SIZE+2];
4613         int r;
4614
4615         mutex_lock(&trace_types_lock);
4616         r = sprintf(buf, "%s\n", tr->current_trace->name);
4617         mutex_unlock(&trace_types_lock);
4618
4619         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4620 }
4621
4622 int tracer_init(struct tracer *t, struct trace_array *tr)
4623 {
4624         tracing_reset_online_cpus(&tr->trace_buffer);
4625         return t->init(tr);
4626 }
4627
4628 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4629 {
4630         int cpu;
4631
4632         for_each_tracing_cpu(cpu)
4633                 per_cpu_ptr(buf->data, cpu)->entries = val;
4634 }
4635
4636 #ifdef CONFIG_TRACER_MAX_TRACE
4637 /* resize @tr's buffer to the size of @size_tr's entries */
4638 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4639                                         struct trace_buffer *size_buf, int cpu_id)
4640 {
4641         int cpu, ret = 0;
4642
4643         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4644                 for_each_tracing_cpu(cpu) {
4645                         ret = ring_buffer_resize(trace_buf->buffer,
4646                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4647                         if (ret < 0)
4648                                 break;
4649                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4650                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4651                 }
4652         } else {
4653                 ret = ring_buffer_resize(trace_buf->buffer,
4654                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4655                 if (ret == 0)
4656                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4657                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4658         }
4659
4660         return ret;
4661 }
4662 #endif /* CONFIG_TRACER_MAX_TRACE */
4663
4664 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4665                                         unsigned long size, int cpu)
4666 {
4667         int ret;
4668
4669         /*
4670          * If kernel or user changes the size of the ring buffer
4671          * we use the size that was given, and we can forget about
4672          * expanding it later.
4673          */
4674         ring_buffer_expanded = true;
4675
4676         /* May be called before buffers are initialized */
4677         if (!tr->trace_buffer.buffer)
4678                 return 0;
4679
4680         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4681         if (ret < 0)
4682                 return ret;
4683
4684 #ifdef CONFIG_TRACER_MAX_TRACE
4685         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4686             !tr->current_trace->use_max_tr)
4687                 goto out;
4688
4689         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4690         if (ret < 0) {
4691                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4692                                                      &tr->trace_buffer, cpu);
4693                 if (r < 0) {
4694                         /*
4695                          * AARGH! We are left with different
4696                          * size max buffer!!!!
4697                          * The max buffer is our "snapshot" buffer.
4698                          * When a tracer needs a snapshot (one of the
4699                          * latency tracers), it swaps the max buffer
4700                          * with the saved snap shot. We succeeded to
4701                          * update the size of the main buffer, but failed to
4702                          * update the size of the max buffer. But when we tried
4703                          * to reset the main buffer to the original size, we
4704                          * failed there too. This is very unlikely to
4705                          * happen, but if it does, warn and kill all
4706                          * tracing.
4707                          */
4708                         WARN_ON(1);
4709                         tracing_disabled = 1;
4710                 }
4711                 return ret;
4712         }
4713
4714         if (cpu == RING_BUFFER_ALL_CPUS)
4715                 set_buffer_entries(&tr->max_buffer, size);
4716         else
4717                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4718
4719  out:
4720 #endif /* CONFIG_TRACER_MAX_TRACE */
4721
4722         if (cpu == RING_BUFFER_ALL_CPUS)
4723                 set_buffer_entries(&tr->trace_buffer, size);
4724         else
4725                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4726
4727         return ret;
4728 }
4729
4730 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4731                                           unsigned long size, int cpu_id)
4732 {
4733         int ret = size;
4734
4735         mutex_lock(&trace_types_lock);
4736
4737         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4738                 /* make sure, this cpu is enabled in the mask */
4739                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4740                         ret = -EINVAL;
4741                         goto out;
4742                 }
4743         }
4744
4745         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4746         if (ret < 0)
4747                 ret = -ENOMEM;
4748
4749 out:
4750         mutex_unlock(&trace_types_lock);
4751
4752         return ret;
4753 }
4754
4755
4756 /**
4757  * tracing_update_buffers - used by tracing facility to expand ring buffers
4758  *
4759  * To save on memory when the tracing is never used on a system with it
4760  * configured in. The ring buffers are set to a minimum size. But once
4761  * a user starts to use the tracing facility, then they need to grow
4762  * to their default size.
4763  *
4764  * This function is to be called when a tracer is about to be used.
4765  */
4766 int tracing_update_buffers(void)
4767 {
4768         int ret = 0;
4769
4770         mutex_lock(&trace_types_lock);
4771         if (!ring_buffer_expanded)
4772                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4773                                                 RING_BUFFER_ALL_CPUS);
4774         mutex_unlock(&trace_types_lock);
4775
4776         return ret;
4777 }
4778
4779 struct trace_option_dentry;
4780
4781 static void
4782 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4783
4784 /*
4785  * Used to clear out the tracer before deletion of an instance.
4786  * Must have trace_types_lock held.
4787  */
4788 static void tracing_set_nop(struct trace_array *tr)
4789 {
4790         if (tr->current_trace == &nop_trace)
4791                 return;
4792         
4793         tr->current_trace->enabled--;
4794
4795         if (tr->current_trace->reset)
4796                 tr->current_trace->reset(tr);
4797
4798         tr->current_trace = &nop_trace;
4799 }
4800
4801 static bool tracer_options_updated;
4802
4803 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4804 {
4805         /* Only enable if the directory has been created already. */
4806         if (!tr->dir)
4807                 return;
4808
4809         /* Only create trace option files after update_tracer_options finish */
4810         if (!tracer_options_updated)
4811                 return;
4812
4813         create_trace_option_files(tr, t);
4814 }
4815
4816 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4817 {
4818         struct tracer *t;
4819 #ifdef CONFIG_TRACER_MAX_TRACE
4820         bool had_max_tr;
4821 #endif
4822         int ret = 0;
4823
4824         mutex_lock(&trace_types_lock);
4825
4826         if (!ring_buffer_expanded) {
4827                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4828                                                 RING_BUFFER_ALL_CPUS);
4829                 if (ret < 0)
4830                         goto out;
4831                 ret = 0;
4832         }
4833
4834         for (t = trace_types; t; t = t->next) {
4835                 if (strcmp(t->name, buf) == 0)
4836                         break;
4837         }
4838         if (!t) {
4839                 ret = -EINVAL;
4840                 goto out;
4841         }
4842         if (t == tr->current_trace)
4843                 goto out;
4844
4845         /* Some tracers are only allowed for the top level buffer */
4846         if (!trace_ok_for_array(t, tr)) {
4847                 ret = -EINVAL;
4848                 goto out;
4849         }
4850
4851         /* If trace pipe files are being read, we can't change the tracer */
4852         if (tr->current_trace->ref) {
4853                 ret = -EBUSY;
4854                 goto out;
4855         }
4856
4857         trace_branch_disable();
4858
4859         tr->current_trace->enabled--;
4860
4861         if (tr->current_trace->reset)
4862                 tr->current_trace->reset(tr);
4863
4864         /* Current trace needs to be nop_trace before synchronize_sched */
4865         tr->current_trace = &nop_trace;
4866
4867 #ifdef CONFIG_TRACER_MAX_TRACE
4868         had_max_tr = tr->allocated_snapshot;
4869
4870         if (had_max_tr && !t->use_max_tr) {
4871                 /*
4872                  * We need to make sure that the update_max_tr sees that
4873                  * current_trace changed to nop_trace to keep it from
4874                  * swapping the buffers after we resize it.
4875                  * The update_max_tr is called from interrupts disabled
4876                  * so a synchronized_sched() is sufficient.
4877                  */
4878                 synchronize_sched();
4879                 free_snapshot(tr);
4880         }
4881 #endif
4882
4883 #ifdef CONFIG_TRACER_MAX_TRACE
4884         if (t->use_max_tr && !had_max_tr) {
4885                 ret = alloc_snapshot(tr);
4886                 if (ret < 0)
4887                         goto out;
4888         }
4889 #endif
4890
4891         if (t->init) {
4892                 ret = tracer_init(t, tr);
4893                 if (ret)
4894                         goto out;
4895         }
4896
4897         tr->current_trace = t;
4898         tr->current_trace->enabled++;
4899         trace_branch_enable(tr);
4900  out:
4901         mutex_unlock(&trace_types_lock);
4902
4903         return ret;
4904 }
4905
4906 static ssize_t
4907 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4908                         size_t cnt, loff_t *ppos)
4909 {
4910         struct trace_array *tr = filp->private_data;
4911         char buf[MAX_TRACER_SIZE+1];
4912         int i;
4913         size_t ret;
4914         int err;
4915
4916         ret = cnt;
4917
4918         if (cnt > MAX_TRACER_SIZE)
4919                 cnt = MAX_TRACER_SIZE;
4920
4921         if (copy_from_user(buf, ubuf, cnt))
4922                 return -EFAULT;
4923
4924         buf[cnt] = 0;
4925
4926         /* strip ending whitespace. */
4927         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4928                 buf[i] = 0;
4929
4930         err = tracing_set_tracer(tr, buf);
4931         if (err)
4932                 return err;
4933
4934         *ppos += ret;
4935
4936         return ret;
4937 }
4938
4939 static ssize_t
4940 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4941                    size_t cnt, loff_t *ppos)
4942 {
4943         char buf[64];
4944         int r;
4945
4946         r = snprintf(buf, sizeof(buf), "%ld\n",
4947                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4948         if (r > sizeof(buf))
4949                 r = sizeof(buf);
4950         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4951 }
4952
4953 static ssize_t
4954 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4955                     size_t cnt, loff_t *ppos)
4956 {
4957         unsigned long val;
4958         int ret;
4959
4960         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4961         if (ret)
4962                 return ret;
4963
4964         *ptr = val * 1000;
4965
4966         return cnt;
4967 }
4968
4969 static ssize_t
4970 tracing_thresh_read(struct file *filp, char __user *ubuf,
4971                     size_t cnt, loff_t *ppos)
4972 {
4973         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4974 }
4975
4976 static ssize_t
4977 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4978                      size_t cnt, loff_t *ppos)
4979 {
4980         struct trace_array *tr = filp->private_data;
4981         int ret;
4982
4983         mutex_lock(&trace_types_lock);
4984         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4985         if (ret < 0)
4986                 goto out;
4987
4988         if (tr->current_trace->update_thresh) {
4989                 ret = tr->current_trace->update_thresh(tr);
4990                 if (ret < 0)
4991                         goto out;
4992         }
4993
4994         ret = cnt;
4995 out:
4996         mutex_unlock(&trace_types_lock);
4997
4998         return ret;
4999 }
5000
5001 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5002
5003 static ssize_t
5004 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5005                      size_t cnt, loff_t *ppos)
5006 {
5007         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5008 }
5009
5010 static ssize_t
5011 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5012                       size_t cnt, loff_t *ppos)
5013 {
5014         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5015 }
5016
5017 #endif
5018
5019 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5020 {
5021         struct trace_array *tr = inode->i_private;
5022         struct trace_iterator *iter;
5023         int ret = 0;
5024
5025         if (tracing_disabled)
5026                 return -ENODEV;
5027
5028         if (trace_array_get(tr) < 0)
5029                 return -ENODEV;
5030
5031         mutex_lock(&trace_types_lock);
5032
5033         /* create a buffer to store the information to pass to userspace */
5034         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5035         if (!iter) {
5036                 ret = -ENOMEM;
5037                 __trace_array_put(tr);
5038                 goto out;
5039         }
5040
5041         trace_seq_init(&iter->seq);
5042         iter->trace = tr->current_trace;
5043
5044         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5045                 ret = -ENOMEM;
5046                 goto fail;
5047         }
5048
5049         /* trace pipe does not show start of buffer */
5050         cpumask_setall(iter->started);
5051
5052         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5053                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5054
5055         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5056         if (trace_clocks[tr->clock_id].in_ns)
5057                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5058
5059         iter->tr = tr;
5060         iter->trace_buffer = &tr->trace_buffer;
5061         iter->cpu_file = tracing_get_cpu(inode);
5062         mutex_init(&iter->mutex);
5063         filp->private_data = iter;
5064
5065         if (iter->trace->pipe_open)
5066                 iter->trace->pipe_open(iter);
5067
5068         nonseekable_open(inode, filp);
5069
5070         tr->current_trace->ref++;
5071 out:
5072         mutex_unlock(&trace_types_lock);
5073         return ret;
5074
5075 fail:
5076         kfree(iter);
5077         __trace_array_put(tr);
5078         mutex_unlock(&trace_types_lock);
5079         return ret;
5080 }
5081
5082 static int tracing_release_pipe(struct inode *inode, struct file *file)
5083 {
5084         struct trace_iterator *iter = file->private_data;
5085         struct trace_array *tr = inode->i_private;
5086
5087         mutex_lock(&trace_types_lock);
5088
5089         tr->current_trace->ref--;
5090
5091         if (iter->trace->pipe_close)
5092                 iter->trace->pipe_close(iter);
5093
5094         mutex_unlock(&trace_types_lock);
5095
5096         free_cpumask_var(iter->started);
5097         mutex_destroy(&iter->mutex);
5098         kfree(iter);
5099
5100         trace_array_put(tr);
5101
5102         return 0;
5103 }
5104
5105 static unsigned int
5106 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5107 {
5108         struct trace_array *tr = iter->tr;
5109
5110         /* Iterators are static, they should be filled or empty */
5111         if (trace_buffer_iter(iter, iter->cpu_file))
5112                 return POLLIN | POLLRDNORM;
5113
5114         if (tr->trace_flags & TRACE_ITER_BLOCK)
5115                 /*
5116                  * Always select as readable when in blocking mode
5117                  */
5118                 return POLLIN | POLLRDNORM;
5119         else
5120                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5121                                              filp, poll_table);
5122 }
5123
5124 static unsigned int
5125 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5126 {
5127         struct trace_iterator *iter = filp->private_data;
5128
5129         return trace_poll(iter, filp, poll_table);
5130 }
5131
5132 /* Must be called with iter->mutex held. */
5133 static int tracing_wait_pipe(struct file *filp)
5134 {
5135         struct trace_iterator *iter = filp->private_data;
5136         int ret;
5137
5138         while (trace_empty(iter)) {
5139
5140                 if ((filp->f_flags & O_NONBLOCK)) {
5141                         return -EAGAIN;
5142                 }
5143
5144                 /*
5145                  * We block until we read something and tracing is disabled.
5146                  * We still block if tracing is disabled, but we have never
5147                  * read anything. This allows a user to cat this file, and
5148                  * then enable tracing. But after we have read something,
5149                  * we give an EOF when tracing is again disabled.
5150                  *
5151                  * iter->pos will be 0 if we haven't read anything.
5152                  */
5153                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5154                         break;
5155
5156                 mutex_unlock(&iter->mutex);
5157
5158                 ret = wait_on_pipe(iter, false);
5159
5160                 mutex_lock(&iter->mutex);
5161
5162                 if (ret)
5163                         return ret;
5164         }
5165
5166         return 1;
5167 }
5168
5169 /*
5170  * Consumer reader.
5171  */
5172 static ssize_t
5173 tracing_read_pipe(struct file *filp, char __user *ubuf,
5174                   size_t cnt, loff_t *ppos)
5175 {
5176         struct trace_iterator *iter = filp->private_data;
5177         ssize_t sret;
5178
5179         /*
5180          * Avoid more than one consumer on a single file descriptor
5181          * This is just a matter of traces coherency, the ring buffer itself
5182          * is protected.
5183          */
5184         mutex_lock(&iter->mutex);
5185
5186         /* return any leftover data */
5187         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5188         if (sret != -EBUSY)
5189                 goto out;
5190
5191         trace_seq_init(&iter->seq);
5192
5193         if (iter->trace->read) {
5194                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5195                 if (sret)
5196                         goto out;
5197         }
5198
5199 waitagain:
5200         sret = tracing_wait_pipe(filp);
5201         if (sret <= 0)
5202                 goto out;
5203
5204         /* stop when tracing is finished */
5205         if (trace_empty(iter)) {
5206                 sret = 0;
5207                 goto out;
5208         }
5209
5210         if (cnt >= PAGE_SIZE)
5211                 cnt = PAGE_SIZE - 1;
5212
5213         /* reset all but tr, trace, and overruns */
5214         memset(&iter->seq, 0,
5215                sizeof(struct trace_iterator) -
5216                offsetof(struct trace_iterator, seq));
5217         cpumask_clear(iter->started);
5218         trace_seq_init(&iter->seq);
5219         iter->pos = -1;
5220
5221         trace_event_read_lock();
5222         trace_access_lock(iter->cpu_file);
5223         while (trace_find_next_entry_inc(iter) != NULL) {
5224                 enum print_line_t ret;
5225                 int save_len = iter->seq.seq.len;
5226
5227                 ret = print_trace_line(iter);
5228                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5229                         /*
5230                          * If one print_trace_line() fills entire trace_seq in one shot,
5231                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
5232                          * In this case, we need to consume it, otherwise, loop will peek
5233                          * this event next time, resulting in an infinite loop.
5234                          */
5235                         if (save_len == 0) {
5236                                 iter->seq.full = 0;
5237                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
5238                                 trace_consume(iter);
5239                                 break;
5240                         }
5241
5242                         /* In other cases, don't print partial lines */
5243                         iter->seq.seq.len = save_len;
5244                         break;
5245                 }
5246                 if (ret != TRACE_TYPE_NO_CONSUME)
5247                         trace_consume(iter);
5248
5249                 if (trace_seq_used(&iter->seq) >= cnt)
5250                         break;
5251
5252                 /*
5253                  * Setting the full flag means we reached the trace_seq buffer
5254                  * size and we should leave by partial output condition above.
5255                  * One of the trace_seq_* functions is not used properly.
5256                  */
5257                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5258                           iter->ent->type);
5259         }
5260         trace_access_unlock(iter->cpu_file);
5261         trace_event_read_unlock();
5262
5263         /* Now copy what we have to the user */
5264         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5265         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5266                 trace_seq_init(&iter->seq);
5267
5268         /*
5269          * If there was nothing to send to user, in spite of consuming trace
5270          * entries, go back to wait for more entries.
5271          */
5272         if (sret == -EBUSY)
5273                 goto waitagain;
5274
5275 out:
5276         mutex_unlock(&iter->mutex);
5277
5278         return sret;
5279 }
5280
5281 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5282                                      unsigned int idx)
5283 {
5284         __free_page(spd->pages[idx]);
5285 }
5286
5287 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5288         .can_merge              = 0,
5289         .confirm                = generic_pipe_buf_confirm,
5290         .release                = generic_pipe_buf_release,
5291         .steal                  = generic_pipe_buf_steal,
5292         .get                    = generic_pipe_buf_get,
5293 };
5294
5295 static size_t
5296 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5297 {
5298         size_t count;
5299         int save_len;
5300         int ret;
5301
5302         /* Seq buffer is page-sized, exactly what we need. */
5303         for (;;) {
5304                 save_len = iter->seq.seq.len;
5305                 ret = print_trace_line(iter);
5306
5307                 if (trace_seq_has_overflowed(&iter->seq)) {
5308                         iter->seq.seq.len = save_len;
5309                         break;
5310                 }
5311
5312                 /*
5313                  * This should not be hit, because it should only
5314                  * be set if the iter->seq overflowed. But check it
5315                  * anyway to be safe.
5316                  */
5317                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5318                         iter->seq.seq.len = save_len;
5319                         break;
5320                 }
5321
5322                 count = trace_seq_used(&iter->seq) - save_len;
5323                 if (rem < count) {
5324                         rem = 0;
5325                         iter->seq.seq.len = save_len;
5326                         break;
5327                 }
5328
5329                 if (ret != TRACE_TYPE_NO_CONSUME)
5330                         trace_consume(iter);
5331                 rem -= count;
5332                 if (!trace_find_next_entry_inc(iter))   {
5333                         rem = 0;
5334                         iter->ent = NULL;
5335                         break;
5336                 }
5337         }
5338
5339         return rem;
5340 }
5341
5342 static ssize_t tracing_splice_read_pipe(struct file *filp,
5343                                         loff_t *ppos,
5344                                         struct pipe_inode_info *pipe,
5345                                         size_t len,
5346                                         unsigned int flags)
5347 {
5348         struct page *pages_def[PIPE_DEF_BUFFERS];
5349         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5350         struct trace_iterator *iter = filp->private_data;
5351         struct splice_pipe_desc spd = {
5352                 .pages          = pages_def,
5353                 .partial        = partial_def,
5354                 .nr_pages       = 0, /* This gets updated below. */
5355                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5356                 .flags          = flags,
5357                 .ops            = &tracing_pipe_buf_ops,
5358                 .spd_release    = tracing_spd_release_pipe,
5359         };
5360         ssize_t ret;
5361         size_t rem;
5362         unsigned int i;
5363
5364         if (splice_grow_spd(pipe, &spd))
5365                 return -ENOMEM;
5366
5367         mutex_lock(&iter->mutex);
5368
5369         if (iter->trace->splice_read) {
5370                 ret = iter->trace->splice_read(iter, filp,
5371                                                ppos, pipe, len, flags);
5372                 if (ret)
5373                         goto out_err;
5374         }
5375
5376         ret = tracing_wait_pipe(filp);
5377         if (ret <= 0)
5378                 goto out_err;
5379
5380         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5381                 ret = -EFAULT;
5382                 goto out_err;
5383         }
5384
5385         trace_event_read_lock();
5386         trace_access_lock(iter->cpu_file);
5387
5388         /* Fill as many pages as possible. */
5389         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5390                 spd.pages[i] = alloc_page(GFP_KERNEL);
5391                 if (!spd.pages[i])
5392                         break;
5393
5394                 rem = tracing_fill_pipe_page(rem, iter);
5395
5396                 /* Copy the data into the page, so we can start over. */
5397                 ret = trace_seq_to_buffer(&iter->seq,
5398                                           page_address(spd.pages[i]),
5399                                           trace_seq_used(&iter->seq));
5400                 if (ret < 0) {
5401                         __free_page(spd.pages[i]);
5402                         break;
5403                 }
5404                 spd.partial[i].offset = 0;
5405                 spd.partial[i].len = trace_seq_used(&iter->seq);
5406
5407                 trace_seq_init(&iter->seq);
5408         }
5409
5410         trace_access_unlock(iter->cpu_file);
5411         trace_event_read_unlock();
5412         mutex_unlock(&iter->mutex);
5413
5414         spd.nr_pages = i;
5415
5416         if (i)
5417                 ret = splice_to_pipe(pipe, &spd);
5418         else
5419                 ret = 0;
5420 out:
5421         splice_shrink_spd(&spd);
5422         return ret;
5423
5424 out_err:
5425         mutex_unlock(&iter->mutex);
5426         goto out;
5427 }
5428
5429 static ssize_t
5430 tracing_entries_read(struct file *filp, char __user *ubuf,
5431                      size_t cnt, loff_t *ppos)
5432 {
5433         struct inode *inode = file_inode(filp);
5434         struct trace_array *tr = inode->i_private;
5435         int cpu = tracing_get_cpu(inode);
5436         char buf[64];
5437         int r = 0;
5438         ssize_t ret;
5439
5440         mutex_lock(&trace_types_lock);
5441
5442         if (cpu == RING_BUFFER_ALL_CPUS) {
5443                 int cpu, buf_size_same;
5444                 unsigned long size;
5445
5446                 size = 0;
5447                 buf_size_same = 1;
5448                 /* check if all cpu sizes are same */
5449                 for_each_tracing_cpu(cpu) {
5450                         /* fill in the size from first enabled cpu */
5451                         if (size == 0)
5452                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5453                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5454                                 buf_size_same = 0;
5455                                 break;
5456                         }
5457                 }
5458
5459                 if (buf_size_same) {
5460                         if (!ring_buffer_expanded)
5461                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5462                                             size >> 10,
5463                                             trace_buf_size >> 10);
5464                         else
5465                                 r = sprintf(buf, "%lu\n", size >> 10);
5466                 } else
5467                         r = sprintf(buf, "X\n");
5468         } else
5469                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5470
5471         mutex_unlock(&trace_types_lock);
5472
5473         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5474         return ret;
5475 }
5476
5477 static ssize_t
5478 tracing_entries_write(struct file *filp, const char __user *ubuf,
5479                       size_t cnt, loff_t *ppos)
5480 {
5481         struct inode *inode = file_inode(filp);
5482         struct trace_array *tr = inode->i_private;
5483         unsigned long val;
5484         int ret;
5485
5486         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5487         if (ret)
5488                 return ret;
5489
5490         /* must have at least 1 entry */
5491         if (!val)
5492                 return -EINVAL;
5493
5494         /* value is in KB */
5495         val <<= 10;
5496         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5497         if (ret < 0)
5498                 return ret;
5499
5500         *ppos += cnt;
5501
5502         return cnt;
5503 }
5504
5505 static ssize_t
5506 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5507                                 size_t cnt, loff_t *ppos)
5508 {
5509         struct trace_array *tr = filp->private_data;
5510         char buf[64];
5511         int r, cpu;
5512         unsigned long size = 0, expanded_size = 0;
5513
5514         mutex_lock(&trace_types_lock);
5515         for_each_tracing_cpu(cpu) {
5516                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5517                 if (!ring_buffer_expanded)
5518                         expanded_size += trace_buf_size >> 10;
5519         }
5520         if (ring_buffer_expanded)
5521                 r = sprintf(buf, "%lu\n", size);
5522         else
5523                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5524         mutex_unlock(&trace_types_lock);
5525
5526         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5527 }
5528
5529 static ssize_t
5530 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5531                           size_t cnt, loff_t *ppos)
5532 {
5533         /*
5534          * There is no need to read what the user has written, this function
5535          * is just to make sure that there is no error when "echo" is used
5536          */
5537
5538         *ppos += cnt;
5539
5540         return cnt;
5541 }
5542
5543 static int
5544 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5545 {
5546         struct trace_array *tr = inode->i_private;
5547
5548         /* disable tracing ? */
5549         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5550                 tracer_tracing_off(tr);
5551         /* resize the ring buffer to 0 */
5552         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5553
5554         trace_array_put(tr);
5555
5556         return 0;
5557 }
5558
5559 static ssize_t
5560 tracing_mark_write(struct file *filp, const char __user *ubuf,
5561                                         size_t cnt, loff_t *fpos)
5562 {
5563         unsigned long addr = (unsigned long)ubuf;
5564         struct trace_array *tr = filp->private_data;
5565         struct ring_buffer_event *event;
5566         struct ring_buffer *buffer;
5567         struct print_entry *entry;
5568         unsigned long irq_flags;
5569         struct page *pages[2];
5570         void *map_page[2];
5571         int nr_pages = 1;
5572         ssize_t written;
5573         int offset;
5574         int size;
5575         int len;
5576         int ret;
5577         int i;
5578
5579         if (tracing_disabled)
5580                 return -EINVAL;
5581
5582         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5583                 return -EINVAL;
5584
5585         if (cnt > TRACE_BUF_SIZE)
5586                 cnt = TRACE_BUF_SIZE;
5587
5588         /*
5589          * Userspace is injecting traces into the kernel trace buffer.
5590          * We want to be as non intrusive as possible.
5591          * To do so, we do not want to allocate any special buffers
5592          * or take any locks, but instead write the userspace data
5593          * straight into the ring buffer.
5594          *
5595          * First we need to pin the userspace buffer into memory,
5596          * which, most likely it is, because it just referenced it.
5597          * But there's no guarantee that it is. By using get_user_pages_fast()
5598          * and kmap_atomic/kunmap_atomic() we can get access to the
5599          * pages directly. We then write the data directly into the
5600          * ring buffer.
5601          */
5602         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5603
5604         /* check if we cross pages */
5605         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5606                 nr_pages = 2;
5607
5608         offset = addr & (PAGE_SIZE - 1);
5609         addr &= PAGE_MASK;
5610
5611         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5612         if (ret < nr_pages) {
5613                 while (--ret >= 0)
5614                         put_page(pages[ret]);
5615                 written = -EFAULT;
5616                 goto out;
5617         }
5618
5619         for (i = 0; i < nr_pages; i++)
5620                 map_page[i] = kmap_atomic(pages[i]);
5621
5622         local_save_flags(irq_flags);
5623         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5624         buffer = tr->trace_buffer.buffer;
5625         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5626                                           irq_flags, preempt_count());
5627         if (!event) {
5628                 /* Ring buffer disabled, return as if not open for write */
5629                 written = -EBADF;
5630                 goto out_unlock;
5631         }
5632
5633         entry = ring_buffer_event_data(event);
5634         entry->ip = _THIS_IP_;
5635
5636         if (nr_pages == 2) {
5637                 len = PAGE_SIZE - offset;
5638                 memcpy(&entry->buf, map_page[0] + offset, len);
5639                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5640         } else
5641                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5642
5643         if (entry->buf[cnt - 1] != '\n') {
5644                 entry->buf[cnt] = '\n';
5645                 entry->buf[cnt + 1] = '\0';
5646         } else
5647                 entry->buf[cnt] = '\0';
5648
5649         __buffer_unlock_commit(buffer, event);
5650
5651         written = cnt;
5652
5653         *fpos += written;
5654
5655  out_unlock:
5656         for (i = nr_pages - 1; i >= 0; i--) {
5657                 kunmap_atomic(map_page[i]);
5658                 put_page(pages[i]);
5659         }
5660  out:
5661         return written;
5662 }
5663
5664 static int tracing_clock_show(struct seq_file *m, void *v)
5665 {
5666         struct trace_array *tr = m->private;
5667         int i;
5668
5669         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5670                 seq_printf(m,
5671                         "%s%s%s%s", i ? " " : "",
5672                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5673                         i == tr->clock_id ? "]" : "");
5674         seq_putc(m, '\n');
5675
5676         return 0;
5677 }
5678
5679 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5680 {
5681         int i;
5682
5683         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5684                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5685                         break;
5686         }
5687         if (i == ARRAY_SIZE(trace_clocks))
5688                 return -EINVAL;
5689
5690         mutex_lock(&trace_types_lock);
5691
5692         tr->clock_id = i;
5693
5694         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5695
5696         /*
5697          * New clock may not be consistent with the previous clock.
5698          * Reset the buffer so that it doesn't have incomparable timestamps.
5699          */
5700         tracing_reset_online_cpus(&tr->trace_buffer);
5701
5702 #ifdef CONFIG_TRACER_MAX_TRACE
5703         if (tr->max_buffer.buffer)
5704                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5705         tracing_reset_online_cpus(&tr->max_buffer);
5706 #endif
5707
5708         mutex_unlock(&trace_types_lock);
5709
5710         return 0;
5711 }
5712
5713 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5714                                    size_t cnt, loff_t *fpos)
5715 {
5716         struct seq_file *m = filp->private_data;
5717         struct trace_array *tr = m->private;
5718         char buf[64];
5719         const char *clockstr;
5720         int ret;
5721
5722         if (cnt >= sizeof(buf))
5723                 return -EINVAL;
5724
5725         if (copy_from_user(buf, ubuf, cnt))
5726                 return -EFAULT;
5727
5728         buf[cnt] = 0;
5729
5730         clockstr = strstrip(buf);
5731
5732         ret = tracing_set_clock(tr, clockstr);
5733         if (ret)
5734                 return ret;
5735
5736         *fpos += cnt;
5737
5738         return cnt;
5739 }
5740
5741 static int tracing_clock_open(struct inode *inode, struct file *file)
5742 {
5743         struct trace_array *tr = inode->i_private;
5744         int ret;
5745
5746         if (tracing_disabled)
5747                 return -ENODEV;
5748
5749         if (trace_array_get(tr))
5750                 return -ENODEV;
5751
5752         ret = single_open(file, tracing_clock_show, inode->i_private);
5753         if (ret < 0)
5754                 trace_array_put(tr);
5755
5756         return ret;
5757 }
5758
5759 struct ftrace_buffer_info {
5760         struct trace_iterator   iter;
5761         void                    *spare;
5762         unsigned int            read;
5763 };
5764
5765 #ifdef CONFIG_TRACER_SNAPSHOT
5766 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5767 {
5768         struct trace_array *tr = inode->i_private;
5769         struct trace_iterator *iter;
5770         struct seq_file *m;
5771         int ret = 0;
5772
5773         if (trace_array_get(tr) < 0)
5774                 return -ENODEV;
5775
5776         if (file->f_mode & FMODE_READ) {
5777                 iter = __tracing_open(inode, file, true);
5778                 if (IS_ERR(iter))
5779                         ret = PTR_ERR(iter);
5780         } else {
5781                 /* Writes still need the seq_file to hold the private data */
5782                 ret = -ENOMEM;
5783                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5784                 if (!m)
5785                         goto out;
5786                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5787                 if (!iter) {
5788                         kfree(m);
5789                         goto out;
5790                 }
5791                 ret = 0;
5792
5793                 iter->tr = tr;
5794                 iter->trace_buffer = &tr->max_buffer;
5795                 iter->cpu_file = tracing_get_cpu(inode);
5796                 m->private = iter;
5797                 file->private_data = m;
5798         }
5799 out:
5800         if (ret < 0)
5801                 trace_array_put(tr);
5802
5803         return ret;
5804 }
5805
5806 static ssize_t
5807 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5808                        loff_t *ppos)
5809 {
5810         struct seq_file *m = filp->private_data;
5811         struct trace_iterator *iter = m->private;
5812         struct trace_array *tr = iter->tr;
5813         unsigned long val;
5814         int ret;
5815
5816         ret = tracing_update_buffers();
5817         if (ret < 0)
5818                 return ret;
5819
5820         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5821         if (ret)
5822                 return ret;
5823
5824         mutex_lock(&trace_types_lock);
5825
5826         if (tr->current_trace->use_max_tr) {
5827                 ret = -EBUSY;
5828                 goto out;
5829         }
5830
5831         switch (val) {
5832         case 0:
5833                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5834                         ret = -EINVAL;
5835                         break;
5836                 }
5837                 if (tr->allocated_snapshot)
5838                         free_snapshot(tr);
5839                 break;
5840         case 1:
5841 /* Only allow per-cpu swap if the ring buffer supports it */
5842 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5843                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5844                         ret = -EINVAL;
5845                         break;
5846                 }
5847 #endif
5848                 if (!tr->allocated_snapshot)
5849                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
5850                                 &tr->trace_buffer, iter->cpu_file);
5851                 else
5852                         ret = alloc_snapshot(tr);
5853
5854                 if (ret < 0)
5855                         break;
5856
5857                 local_irq_disable();
5858                 /* Now, we're going to swap */
5859                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5860                         update_max_tr(tr, current, smp_processor_id());
5861                 else
5862                         update_max_tr_single(tr, current, iter->cpu_file);
5863                 local_irq_enable();
5864                 break;
5865         default:
5866                 if (tr->allocated_snapshot) {
5867                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5868                                 tracing_reset_online_cpus(&tr->max_buffer);
5869                         else
5870                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5871                 }
5872                 break;
5873         }
5874
5875         if (ret >= 0) {
5876                 *ppos += cnt;
5877                 ret = cnt;
5878         }
5879 out:
5880         mutex_unlock(&trace_types_lock);
5881         return ret;
5882 }
5883
5884 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5885 {
5886         struct seq_file *m = file->private_data;
5887         int ret;
5888
5889         ret = tracing_release(inode, file);
5890
5891         if (file->f_mode & FMODE_READ)
5892                 return ret;
5893
5894         /* If write only, the seq_file is just a stub */
5895         if (m)
5896                 kfree(m->private);
5897         kfree(m);
5898
5899         return 0;
5900 }
5901
5902 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5903 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5904                                     size_t count, loff_t *ppos);
5905 static int tracing_buffers_release(struct inode *inode, struct file *file);
5906 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5907                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5908
5909 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5910 {
5911         struct ftrace_buffer_info *info;
5912         int ret;
5913
5914         ret = tracing_buffers_open(inode, filp);
5915         if (ret < 0)
5916                 return ret;
5917
5918         info = filp->private_data;
5919
5920         if (info->iter.trace->use_max_tr) {
5921                 tracing_buffers_release(inode, filp);
5922                 return -EBUSY;
5923         }
5924
5925         info->iter.snapshot = true;
5926         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5927
5928         return ret;
5929 }
5930
5931 #endif /* CONFIG_TRACER_SNAPSHOT */
5932
5933
5934 static const struct file_operations tracing_thresh_fops = {
5935         .open           = tracing_open_generic,
5936         .read           = tracing_thresh_read,
5937         .write          = tracing_thresh_write,
5938         .llseek         = generic_file_llseek,
5939 };
5940
5941 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5942 static const struct file_operations tracing_max_lat_fops = {
5943         .open           = tracing_open_generic,
5944         .read           = tracing_max_lat_read,
5945         .write          = tracing_max_lat_write,
5946         .llseek         = generic_file_llseek,
5947 };
5948 #endif
5949
5950 static const struct file_operations set_tracer_fops = {
5951         .open           = tracing_open_generic,
5952         .read           = tracing_set_trace_read,
5953         .write          = tracing_set_trace_write,
5954         .llseek         = generic_file_llseek,
5955 };
5956
5957 static const struct file_operations tracing_pipe_fops = {
5958         .open           = tracing_open_pipe,
5959         .poll           = tracing_poll_pipe,
5960         .read           = tracing_read_pipe,
5961         .splice_read    = tracing_splice_read_pipe,
5962         .release        = tracing_release_pipe,
5963         .llseek         = no_llseek,
5964 };
5965
5966 static const struct file_operations tracing_entries_fops = {
5967         .open           = tracing_open_generic_tr,
5968         .read           = tracing_entries_read,
5969         .write          = tracing_entries_write,
5970         .llseek         = generic_file_llseek,
5971         .release        = tracing_release_generic_tr,
5972 };
5973
5974 static const struct file_operations tracing_total_entries_fops = {
5975         .open           = tracing_open_generic_tr,
5976         .read           = tracing_total_entries_read,
5977         .llseek         = generic_file_llseek,
5978         .release        = tracing_release_generic_tr,
5979 };
5980
5981 static const struct file_operations tracing_free_buffer_fops = {
5982         .open           = tracing_open_generic_tr,
5983         .write          = tracing_free_buffer_write,
5984         .release        = tracing_free_buffer_release,
5985 };
5986
5987 static const struct file_operations tracing_mark_fops = {
5988         .open           = tracing_open_generic_tr,
5989         .write          = tracing_mark_write,
5990         .llseek         = generic_file_llseek,
5991         .release        = tracing_release_generic_tr,
5992 };
5993
5994 static const struct file_operations trace_clock_fops = {
5995         .open           = tracing_clock_open,
5996         .read           = seq_read,
5997         .llseek         = seq_lseek,
5998         .release        = tracing_single_release_tr,
5999         .write          = tracing_clock_write,
6000 };
6001
6002 #ifdef CONFIG_TRACER_SNAPSHOT
6003 static const struct file_operations snapshot_fops = {
6004         .open           = tracing_snapshot_open,
6005         .read           = seq_read,
6006         .write          = tracing_snapshot_write,
6007         .llseek         = tracing_lseek,
6008         .release        = tracing_snapshot_release,
6009 };
6010
6011 static const struct file_operations snapshot_raw_fops = {
6012         .open           = snapshot_raw_open,
6013         .read           = tracing_buffers_read,
6014         .release        = tracing_buffers_release,
6015         .splice_read    = tracing_buffers_splice_read,
6016         .llseek         = no_llseek,
6017 };
6018
6019 #endif /* CONFIG_TRACER_SNAPSHOT */
6020
6021 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6022 {
6023         struct trace_array *tr = inode->i_private;
6024         struct ftrace_buffer_info *info;
6025         int ret;
6026
6027         if (tracing_disabled)
6028                 return -ENODEV;
6029
6030         if (trace_array_get(tr) < 0)
6031                 return -ENODEV;
6032
6033         info = kzalloc(sizeof(*info), GFP_KERNEL);
6034         if (!info) {
6035                 trace_array_put(tr);
6036                 return -ENOMEM;
6037         }
6038
6039         mutex_lock(&trace_types_lock);
6040
6041         info->iter.tr           = tr;
6042         info->iter.cpu_file     = tracing_get_cpu(inode);
6043         info->iter.trace        = tr->current_trace;
6044         info->iter.trace_buffer = &tr->trace_buffer;
6045         info->spare             = NULL;
6046         /* Force reading ring buffer for first read */
6047         info->read              = (unsigned int)-1;
6048
6049         filp->private_data = info;
6050
6051         tr->current_trace->ref++;
6052
6053         mutex_unlock(&trace_types_lock);
6054
6055         ret = nonseekable_open(inode, filp);
6056         if (ret < 0)
6057                 trace_array_put(tr);
6058
6059         return ret;
6060 }
6061
6062 static unsigned int
6063 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6064 {
6065         struct ftrace_buffer_info *info = filp->private_data;
6066         struct trace_iterator *iter = &info->iter;
6067
6068         return trace_poll(iter, filp, poll_table);
6069 }
6070
6071 static ssize_t
6072 tracing_buffers_read(struct file *filp, char __user *ubuf,
6073                      size_t count, loff_t *ppos)
6074 {
6075         struct ftrace_buffer_info *info = filp->private_data;
6076         struct trace_iterator *iter = &info->iter;
6077         ssize_t ret;
6078         ssize_t size;
6079
6080         if (!count)
6081                 return 0;
6082
6083 #ifdef CONFIG_TRACER_MAX_TRACE
6084         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6085                 return -EBUSY;
6086 #endif
6087
6088         if (!info->spare)
6089                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6090                                                           iter->cpu_file);
6091         if (!info->spare)
6092                 return -ENOMEM;
6093
6094         /* Do we have previous read data to read? */
6095         if (info->read < PAGE_SIZE)
6096                 goto read;
6097
6098  again:
6099         trace_access_lock(iter->cpu_file);
6100         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6101                                     &info->spare,
6102                                     count,
6103                                     iter->cpu_file, 0);
6104         trace_access_unlock(iter->cpu_file);
6105
6106         if (ret < 0) {
6107                 if (trace_empty(iter)) {
6108                         if ((filp->f_flags & O_NONBLOCK))
6109                                 return -EAGAIN;
6110
6111                         ret = wait_on_pipe(iter, false);
6112                         if (ret)
6113                                 return ret;
6114
6115                         goto again;
6116                 }
6117                 return 0;
6118         }
6119
6120         info->read = 0;
6121  read:
6122         size = PAGE_SIZE - info->read;
6123         if (size > count)
6124                 size = count;
6125
6126         ret = copy_to_user(ubuf, info->spare + info->read, size);
6127         if (ret == size)
6128                 return -EFAULT;
6129
6130         size -= ret;
6131
6132         *ppos += size;
6133         info->read += size;
6134
6135         return size;
6136 }
6137
6138 static int tracing_buffers_release(struct inode *inode, struct file *file)
6139 {
6140         struct ftrace_buffer_info *info = file->private_data;
6141         struct trace_iterator *iter = &info->iter;
6142
6143         mutex_lock(&trace_types_lock);
6144
6145         iter->tr->current_trace->ref--;
6146
6147         __trace_array_put(iter->tr);
6148
6149         if (info->spare)
6150                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6151         kfree(info);
6152
6153         mutex_unlock(&trace_types_lock);
6154
6155         return 0;
6156 }
6157
6158 struct buffer_ref {
6159         struct ring_buffer      *buffer;
6160         void                    *page;
6161         int                     ref;
6162 };
6163
6164 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6165                                     struct pipe_buffer *buf)
6166 {
6167         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6168
6169         if (--ref->ref)
6170                 return;
6171
6172         ring_buffer_free_read_page(ref->buffer, ref->page);
6173         kfree(ref);
6174         buf->private = 0;
6175 }
6176
6177 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6178                                 struct pipe_buffer *buf)
6179 {
6180         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6181
6182         if (ref->ref > INT_MAX/2)
6183                 return false;
6184
6185         ref->ref++;
6186         return true;
6187 }
6188
6189 /* Pipe buffer operations for a buffer. */
6190 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6191         .can_merge              = 0,
6192         .confirm                = generic_pipe_buf_confirm,
6193         .release                = buffer_pipe_buf_release,
6194         .steal                  = generic_pipe_buf_steal,
6195         .get                    = buffer_pipe_buf_get,
6196 };
6197
6198 /*
6199  * Callback from splice_to_pipe(), if we need to release some pages
6200  * at the end of the spd in case we error'ed out in filling the pipe.
6201  */
6202 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6203 {
6204         struct buffer_ref *ref =
6205                 (struct buffer_ref *)spd->partial[i].private;
6206
6207         if (--ref->ref)
6208                 return;
6209
6210         ring_buffer_free_read_page(ref->buffer, ref->page);
6211         kfree(ref);
6212         spd->partial[i].private = 0;
6213 }
6214
6215 static ssize_t
6216 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6217                             struct pipe_inode_info *pipe, size_t len,
6218                             unsigned int flags)
6219 {
6220         struct ftrace_buffer_info *info = file->private_data;
6221         struct trace_iterator *iter = &info->iter;
6222         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6223         struct page *pages_def[PIPE_DEF_BUFFERS];
6224         struct splice_pipe_desc spd = {
6225                 .pages          = pages_def,
6226                 .partial        = partial_def,
6227                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6228                 .flags          = flags,
6229                 .ops            = &buffer_pipe_buf_ops,
6230                 .spd_release    = buffer_spd_release,
6231         };
6232         struct buffer_ref *ref;
6233         int entries, i;
6234         ssize_t ret = 0;
6235
6236 #ifdef CONFIG_TRACER_MAX_TRACE
6237         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6238                 return -EBUSY;
6239 #endif
6240
6241         if (*ppos & (PAGE_SIZE - 1))
6242                 return -EINVAL;
6243
6244         if (len & (PAGE_SIZE - 1)) {
6245                 if (len < PAGE_SIZE)
6246                         return -EINVAL;
6247                 len &= PAGE_MASK;
6248         }
6249
6250         if (splice_grow_spd(pipe, &spd))
6251                 return -ENOMEM;
6252
6253  again:
6254         trace_access_lock(iter->cpu_file);
6255         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6256
6257         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6258                 struct page *page;
6259                 int r;
6260
6261                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6262                 if (!ref) {
6263                         ret = -ENOMEM;
6264                         break;
6265                 }
6266
6267                 ref->ref = 1;
6268                 ref->buffer = iter->trace_buffer->buffer;
6269                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6270                 if (!ref->page) {
6271                         ret = -ENOMEM;
6272                         kfree(ref);
6273                         break;
6274                 }
6275
6276                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6277                                           len, iter->cpu_file, 1);
6278                 if (r < 0) {
6279                         ring_buffer_free_read_page(ref->buffer, ref->page);
6280                         kfree(ref);
6281                         break;
6282                 }
6283
6284                 page = virt_to_page(ref->page);
6285
6286                 spd.pages[i] = page;
6287                 spd.partial[i].len = PAGE_SIZE;
6288                 spd.partial[i].offset = 0;
6289                 spd.partial[i].private = (unsigned long)ref;
6290                 spd.nr_pages++;
6291                 *ppos += PAGE_SIZE;
6292
6293                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6294         }
6295
6296         trace_access_unlock(iter->cpu_file);
6297         spd.nr_pages = i;
6298
6299         /* did we read anything? */
6300         if (!spd.nr_pages) {
6301                 if (ret)
6302                         goto out;
6303
6304                 ret = -EAGAIN;
6305                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6306                         goto out;
6307
6308                 ret = wait_on_pipe(iter, true);
6309                 if (ret)
6310                         goto out;
6311
6312                 goto again;
6313         }
6314
6315         ret = splice_to_pipe(pipe, &spd);
6316 out:
6317         splice_shrink_spd(&spd);
6318
6319         return ret;
6320 }
6321
6322 static const struct file_operations tracing_buffers_fops = {
6323         .open           = tracing_buffers_open,
6324         .read           = tracing_buffers_read,
6325         .poll           = tracing_buffers_poll,
6326         .release        = tracing_buffers_release,
6327         .splice_read    = tracing_buffers_splice_read,
6328         .llseek         = no_llseek,
6329 };
6330
6331 static ssize_t
6332 tracing_stats_read(struct file *filp, char __user *ubuf,
6333                    size_t count, loff_t *ppos)
6334 {
6335         struct inode *inode = file_inode(filp);
6336         struct trace_array *tr = inode->i_private;
6337         struct trace_buffer *trace_buf = &tr->trace_buffer;
6338         int cpu = tracing_get_cpu(inode);
6339         struct trace_seq *s;
6340         unsigned long cnt;
6341         unsigned long long t;
6342         unsigned long usec_rem;
6343
6344         s = kmalloc(sizeof(*s), GFP_KERNEL);
6345         if (!s)
6346                 return -ENOMEM;
6347
6348         trace_seq_init(s);
6349
6350         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6351         trace_seq_printf(s, "entries: %ld\n", cnt);
6352
6353         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6354         trace_seq_printf(s, "overrun: %ld\n", cnt);
6355
6356         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6357         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6358
6359         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6360         trace_seq_printf(s, "bytes: %ld\n", cnt);
6361
6362         if (trace_clocks[tr->clock_id].in_ns) {
6363                 /* local or global for trace_clock */
6364                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6365                 usec_rem = do_div(t, USEC_PER_SEC);
6366                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6367                                                                 t, usec_rem);
6368
6369                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6370                 usec_rem = do_div(t, USEC_PER_SEC);
6371                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6372         } else {
6373                 /* counter or tsc mode for trace_clock */
6374                 trace_seq_printf(s, "oldest event ts: %llu\n",
6375                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6376
6377                 trace_seq_printf(s, "now ts: %llu\n",
6378                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6379         }
6380
6381         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6382         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6383
6384         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6385         trace_seq_printf(s, "read events: %ld\n", cnt);
6386
6387         count = simple_read_from_buffer(ubuf, count, ppos,
6388                                         s->buffer, trace_seq_used(s));
6389
6390         kfree(s);
6391
6392         return count;
6393 }
6394
6395 static const struct file_operations tracing_stats_fops = {
6396         .open           = tracing_open_generic_tr,
6397         .read           = tracing_stats_read,
6398         .llseek         = generic_file_llseek,
6399         .release        = tracing_release_generic_tr,
6400 };
6401
6402 #ifdef CONFIG_DYNAMIC_FTRACE
6403
6404 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6405 {
6406         return 0;
6407 }
6408
6409 static ssize_t
6410 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6411                   size_t cnt, loff_t *ppos)
6412 {
6413         static char ftrace_dyn_info_buffer[1024];
6414         static DEFINE_MUTEX(dyn_info_mutex);
6415         unsigned long *p = filp->private_data;
6416         char *buf = ftrace_dyn_info_buffer;
6417         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6418         int r;
6419
6420         mutex_lock(&dyn_info_mutex);
6421         r = sprintf(buf, "%ld ", *p);
6422
6423         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6424         buf[r++] = '\n';
6425
6426         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6427
6428         mutex_unlock(&dyn_info_mutex);
6429
6430         return r;
6431 }
6432
6433 static const struct file_operations tracing_dyn_info_fops = {
6434         .open           = tracing_open_generic,
6435         .read           = tracing_read_dyn_info,
6436         .llseek         = generic_file_llseek,
6437 };
6438 #endif /* CONFIG_DYNAMIC_FTRACE */
6439
6440 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6441 static void
6442 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6443 {
6444         tracing_snapshot();
6445 }
6446
6447 static void
6448 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6449 {
6450         unsigned long *count = (long *)data;
6451
6452         if (!*count)
6453                 return;
6454
6455         if (*count != -1)
6456                 (*count)--;
6457
6458         tracing_snapshot();
6459 }
6460
6461 static int
6462 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6463                       struct ftrace_probe_ops *ops, void *data)
6464 {
6465         long count = (long)data;
6466
6467         seq_printf(m, "%ps:", (void *)ip);
6468
6469         seq_puts(m, "snapshot");
6470
6471         if (count == -1)
6472                 seq_puts(m, ":unlimited\n");
6473         else
6474                 seq_printf(m, ":count=%ld\n", count);
6475
6476         return 0;
6477 }
6478
6479 static struct ftrace_probe_ops snapshot_probe_ops = {
6480         .func                   = ftrace_snapshot,
6481         .print                  = ftrace_snapshot_print,
6482 };
6483
6484 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6485         .func                   = ftrace_count_snapshot,
6486         .print                  = ftrace_snapshot_print,
6487 };
6488
6489 static int
6490 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6491                                char *glob, char *cmd, char *param, int enable)
6492 {
6493         struct ftrace_probe_ops *ops;
6494         void *count = (void *)-1;
6495         char *number;
6496         int ret;
6497
6498         /* hash funcs only work with set_ftrace_filter */
6499         if (!enable)
6500                 return -EINVAL;
6501
6502         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6503
6504         if (glob[0] == '!') {
6505                 unregister_ftrace_function_probe_func(glob+1, ops);
6506                 return 0;
6507         }
6508
6509         if (!param)
6510                 goto out_reg;
6511
6512         number = strsep(&param, ":");
6513
6514         if (!strlen(number))
6515                 goto out_reg;
6516
6517         /*
6518          * We use the callback data field (which is a pointer)
6519          * as our counter.
6520          */
6521         ret = kstrtoul(number, 0, (unsigned long *)&count);
6522         if (ret)
6523                 return ret;
6524
6525  out_reg:
6526         ret = alloc_snapshot(&global_trace);
6527         if (ret < 0)
6528                 goto out;
6529
6530         ret = register_ftrace_function_probe(glob, ops, count);
6531
6532  out:
6533         return ret < 0 ? ret : 0;
6534 }
6535
6536 static struct ftrace_func_command ftrace_snapshot_cmd = {
6537         .name                   = "snapshot",
6538         .func                   = ftrace_trace_snapshot_callback,
6539 };
6540
6541 static __init int register_snapshot_cmd(void)
6542 {
6543         return register_ftrace_command(&ftrace_snapshot_cmd);
6544 }
6545 #else
6546 static inline __init int register_snapshot_cmd(void) { return 0; }
6547 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6548
6549 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6550 {
6551         if (WARN_ON(!tr->dir))
6552                 return ERR_PTR(-ENODEV);
6553
6554         /* Top directory uses NULL as the parent */
6555         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6556                 return NULL;
6557
6558         /* All sub buffers have a descriptor */
6559         return tr->dir;
6560 }
6561
6562 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6563 {
6564         struct dentry *d_tracer;
6565
6566         if (tr->percpu_dir)
6567                 return tr->percpu_dir;
6568
6569         d_tracer = tracing_get_dentry(tr);
6570         if (IS_ERR(d_tracer))
6571                 return NULL;
6572
6573         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6574
6575         WARN_ONCE(!tr->percpu_dir,
6576                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6577
6578         return tr->percpu_dir;
6579 }
6580
6581 static struct dentry *
6582 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6583                       void *data, long cpu, const struct file_operations *fops)
6584 {
6585         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6586
6587         if (ret) /* See tracing_get_cpu() */
6588                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6589         return ret;
6590 }
6591
6592 static void
6593 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6594 {
6595         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6596         struct dentry *d_cpu;
6597         char cpu_dir[30]; /* 30 characters should be more than enough */
6598
6599         if (!d_percpu)
6600                 return;
6601
6602         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6603         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6604         if (!d_cpu) {
6605                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6606                 return;
6607         }
6608
6609         /* per cpu trace_pipe */
6610         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6611                                 tr, cpu, &tracing_pipe_fops);
6612
6613         /* per cpu trace */
6614         trace_create_cpu_file("trace", 0644, d_cpu,
6615                                 tr, cpu, &tracing_fops);
6616
6617         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6618                                 tr, cpu, &tracing_buffers_fops);
6619
6620         trace_create_cpu_file("stats", 0444, d_cpu,
6621                                 tr, cpu, &tracing_stats_fops);
6622
6623         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6624                                 tr, cpu, &tracing_entries_fops);
6625
6626 #ifdef CONFIG_TRACER_SNAPSHOT
6627         trace_create_cpu_file("snapshot", 0644, d_cpu,
6628                                 tr, cpu, &snapshot_fops);
6629
6630         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6631                                 tr, cpu, &snapshot_raw_fops);
6632 #endif
6633 }
6634
6635 #ifdef CONFIG_FTRACE_SELFTEST
6636 /* Let selftest have access to static functions in this file */
6637 #include "trace_selftest.c"
6638 #endif
6639
6640 static ssize_t
6641 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6642                         loff_t *ppos)
6643 {
6644         struct trace_option_dentry *topt = filp->private_data;
6645         char *buf;
6646
6647         if (topt->flags->val & topt->opt->bit)
6648                 buf = "1\n";
6649         else
6650                 buf = "0\n";
6651
6652         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6653 }
6654
6655 static ssize_t
6656 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6657                          loff_t *ppos)
6658 {
6659         struct trace_option_dentry *topt = filp->private_data;
6660         unsigned long val;
6661         int ret;
6662
6663         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6664         if (ret)
6665                 return ret;
6666
6667         if (val != 0 && val != 1)
6668                 return -EINVAL;
6669
6670         if (!!(topt->flags->val & topt->opt->bit) != val) {
6671                 mutex_lock(&trace_types_lock);
6672                 ret = __set_tracer_option(topt->tr, topt->flags,
6673                                           topt->opt, !val);
6674                 mutex_unlock(&trace_types_lock);
6675                 if (ret)
6676                         return ret;
6677         }
6678
6679         *ppos += cnt;
6680
6681         return cnt;
6682 }
6683
6684
6685 static const struct file_operations trace_options_fops = {
6686         .open = tracing_open_generic,
6687         .read = trace_options_read,
6688         .write = trace_options_write,
6689         .llseek = generic_file_llseek,
6690 };
6691
6692 /*
6693  * In order to pass in both the trace_array descriptor as well as the index
6694  * to the flag that the trace option file represents, the trace_array
6695  * has a character array of trace_flags_index[], which holds the index
6696  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6697  * The address of this character array is passed to the flag option file
6698  * read/write callbacks.
6699  *
6700  * In order to extract both the index and the trace_array descriptor,
6701  * get_tr_index() uses the following algorithm.
6702  *
6703  *   idx = *ptr;
6704  *
6705  * As the pointer itself contains the address of the index (remember
6706  * index[1] == 1).
6707  *
6708  * Then to get the trace_array descriptor, by subtracting that index
6709  * from the ptr, we get to the start of the index itself.
6710  *
6711  *   ptr - idx == &index[0]
6712  *
6713  * Then a simple container_of() from that pointer gets us to the
6714  * trace_array descriptor.
6715  */
6716 static void get_tr_index(void *data, struct trace_array **ptr,
6717                          unsigned int *pindex)
6718 {
6719         *pindex = *(unsigned char *)data;
6720
6721         *ptr = container_of(data - *pindex, struct trace_array,
6722                             trace_flags_index);
6723 }
6724
6725 static ssize_t
6726 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6727                         loff_t *ppos)
6728 {
6729         void *tr_index = filp->private_data;
6730         struct trace_array *tr;
6731         unsigned int index;
6732         char *buf;
6733
6734         get_tr_index(tr_index, &tr, &index);
6735
6736         if (tr->trace_flags & (1 << index))
6737                 buf = "1\n";
6738         else
6739                 buf = "0\n";
6740
6741         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6742 }
6743
6744 static ssize_t
6745 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6746                          loff_t *ppos)
6747 {
6748         void *tr_index = filp->private_data;
6749         struct trace_array *tr;
6750         unsigned int index;
6751         unsigned long val;
6752         int ret;
6753
6754         get_tr_index(tr_index, &tr, &index);
6755
6756         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6757         if (ret)
6758                 return ret;
6759
6760         if (val != 0 && val != 1)
6761                 return -EINVAL;
6762
6763         mutex_lock(&trace_types_lock);
6764         ret = set_tracer_flag(tr, 1 << index, val);
6765         mutex_unlock(&trace_types_lock);
6766
6767         if (ret < 0)
6768                 return ret;
6769
6770         *ppos += cnt;
6771
6772         return cnt;
6773 }
6774
6775 static const struct file_operations trace_options_core_fops = {
6776         .open = tracing_open_generic,
6777         .read = trace_options_core_read,
6778         .write = trace_options_core_write,
6779         .llseek = generic_file_llseek,
6780 };
6781
6782 struct dentry *trace_create_file(const char *name,
6783                                  umode_t mode,
6784                                  struct dentry *parent,
6785                                  void *data,
6786                                  const struct file_operations *fops)
6787 {
6788         struct dentry *ret;
6789
6790         ret = tracefs_create_file(name, mode, parent, data, fops);
6791         if (!ret)
6792                 pr_warn("Could not create tracefs '%s' entry\n", name);
6793
6794         return ret;
6795 }
6796
6797
6798 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6799 {
6800         struct dentry *d_tracer;
6801
6802         if (tr->options)
6803                 return tr->options;
6804
6805         d_tracer = tracing_get_dentry(tr);
6806         if (IS_ERR(d_tracer))
6807                 return NULL;
6808
6809         tr->options = tracefs_create_dir("options", d_tracer);
6810         if (!tr->options) {
6811                 pr_warn("Could not create tracefs directory 'options'\n");
6812                 return NULL;
6813         }
6814
6815         return tr->options;
6816 }
6817
6818 static void
6819 create_trace_option_file(struct trace_array *tr,
6820                          struct trace_option_dentry *topt,
6821                          struct tracer_flags *flags,
6822                          struct tracer_opt *opt)
6823 {
6824         struct dentry *t_options;
6825
6826         t_options = trace_options_init_dentry(tr);
6827         if (!t_options)
6828                 return;
6829
6830         topt->flags = flags;
6831         topt->opt = opt;
6832         topt->tr = tr;
6833
6834         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6835                                     &trace_options_fops);
6836
6837 }
6838
6839 static void
6840 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6841 {
6842         struct trace_option_dentry *topts;
6843         struct trace_options *tr_topts;
6844         struct tracer_flags *flags;
6845         struct tracer_opt *opts;
6846         int cnt;
6847         int i;
6848
6849         if (!tracer)
6850                 return;
6851
6852         flags = tracer->flags;
6853
6854         if (!flags || !flags->opts)
6855                 return;
6856
6857         /*
6858          * If this is an instance, only create flags for tracers
6859          * the instance may have.
6860          */
6861         if (!trace_ok_for_array(tracer, tr))
6862                 return;
6863
6864         for (i = 0; i < tr->nr_topts; i++) {
6865                 /* Make sure there's no duplicate flags. */
6866                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6867                         return;
6868         }
6869
6870         opts = flags->opts;
6871
6872         for (cnt = 0; opts[cnt].name; cnt++)
6873                 ;
6874
6875         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6876         if (!topts)
6877                 return;
6878
6879         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6880                             GFP_KERNEL);
6881         if (!tr_topts) {
6882                 kfree(topts);
6883                 return;
6884         }
6885
6886         tr->topts = tr_topts;
6887         tr->topts[tr->nr_topts].tracer = tracer;
6888         tr->topts[tr->nr_topts].topts = topts;
6889         tr->nr_topts++;
6890
6891         for (cnt = 0; opts[cnt].name; cnt++) {
6892                 create_trace_option_file(tr, &topts[cnt], flags,
6893                                          &opts[cnt]);
6894                 WARN_ONCE(topts[cnt].entry == NULL,
6895                           "Failed to create trace option: %s",
6896                           opts[cnt].name);
6897         }
6898 }
6899
6900 static struct dentry *
6901 create_trace_option_core_file(struct trace_array *tr,
6902                               const char *option, long index)
6903 {
6904         struct dentry *t_options;
6905
6906         t_options = trace_options_init_dentry(tr);
6907         if (!t_options)
6908                 return NULL;
6909
6910         return trace_create_file(option, 0644, t_options,
6911                                  (void *)&tr->trace_flags_index[index],
6912                                  &trace_options_core_fops);
6913 }
6914
6915 static void create_trace_options_dir(struct trace_array *tr)
6916 {
6917         struct dentry *t_options;
6918         bool top_level = tr == &global_trace;
6919         int i;
6920
6921         t_options = trace_options_init_dentry(tr);
6922         if (!t_options)
6923                 return;
6924
6925         for (i = 0; trace_options[i]; i++) {
6926                 if (top_level ||
6927                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6928                         create_trace_option_core_file(tr, trace_options[i], i);
6929         }
6930 }
6931
6932 static ssize_t
6933 rb_simple_read(struct file *filp, char __user *ubuf,
6934                size_t cnt, loff_t *ppos)
6935 {
6936         struct trace_array *tr = filp->private_data;
6937         char buf[64];
6938         int r;
6939
6940         r = tracer_tracing_is_on(tr);
6941         r = sprintf(buf, "%d\n", r);
6942
6943         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6944 }
6945
6946 static ssize_t
6947 rb_simple_write(struct file *filp, const char __user *ubuf,
6948                 size_t cnt, loff_t *ppos)
6949 {
6950         struct trace_array *tr = filp->private_data;
6951         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6952         unsigned long val;
6953         int ret;
6954
6955         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6956         if (ret)
6957                 return ret;
6958
6959         if (buffer) {
6960                 mutex_lock(&trace_types_lock);
6961                 if (!!val == tracer_tracing_is_on(tr)) {
6962                         val = 0; /* do nothing */
6963                 } else if (val) {
6964                         tracer_tracing_on(tr);
6965                         if (tr->current_trace->start)
6966                                 tr->current_trace->start(tr);
6967                 } else {
6968                         tracer_tracing_off(tr);
6969                         if (tr->current_trace->stop)
6970                                 tr->current_trace->stop(tr);
6971                 }
6972                 mutex_unlock(&trace_types_lock);
6973         }
6974
6975         (*ppos)++;
6976
6977         return cnt;
6978 }
6979
6980 static const struct file_operations rb_simple_fops = {
6981         .open           = tracing_open_generic_tr,
6982         .read           = rb_simple_read,
6983         .write          = rb_simple_write,
6984         .release        = tracing_release_generic_tr,
6985         .llseek         = default_llseek,
6986 };
6987
6988 struct dentry *trace_instance_dir;
6989
6990 static void
6991 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6992
6993 static int
6994 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6995 {
6996         enum ring_buffer_flags rb_flags;
6997
6998         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6999
7000         buf->tr = tr;
7001
7002         buf->buffer = ring_buffer_alloc(size, rb_flags);
7003         if (!buf->buffer)
7004                 return -ENOMEM;
7005
7006         buf->data = alloc_percpu(struct trace_array_cpu);
7007         if (!buf->data) {
7008                 ring_buffer_free(buf->buffer);
7009                 buf->buffer = NULL;
7010                 return -ENOMEM;
7011         }
7012
7013         /* Allocate the first page for all buffers */
7014         set_buffer_entries(&tr->trace_buffer,
7015                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7016
7017         return 0;
7018 }
7019
7020 static int allocate_trace_buffers(struct trace_array *tr, int size)
7021 {
7022         int ret;
7023
7024         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7025         if (ret)
7026                 return ret;
7027
7028 #ifdef CONFIG_TRACER_MAX_TRACE
7029         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7030                                     allocate_snapshot ? size : 1);
7031         if (WARN_ON(ret)) {
7032                 ring_buffer_free(tr->trace_buffer.buffer);
7033                 tr->trace_buffer.buffer = NULL;
7034                 free_percpu(tr->trace_buffer.data);
7035                 tr->trace_buffer.data = NULL;
7036                 return -ENOMEM;
7037         }
7038         tr->allocated_snapshot = allocate_snapshot;
7039
7040         /*
7041          * Only the top level trace array gets its snapshot allocated
7042          * from the kernel command line.
7043          */
7044         allocate_snapshot = false;
7045 #endif
7046
7047         /*
7048          * Because of some magic with the way alloc_percpu() works on
7049          * x86_64, we need to synchronize the pgd of all the tables,
7050          * otherwise the trace events that happen in x86_64 page fault
7051          * handlers can't cope with accessing the chance that a
7052          * alloc_percpu()'d memory might be touched in the page fault trace
7053          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7054          * calls in tracing, because something might get triggered within a
7055          * page fault trace event!
7056          */
7057         vmalloc_sync_mappings();
7058
7059         return 0;
7060 }
7061
7062 static void free_trace_buffer(struct trace_buffer *buf)
7063 {
7064         if (buf->buffer) {
7065                 ring_buffer_free(buf->buffer);
7066                 buf->buffer = NULL;
7067                 free_percpu(buf->data);
7068                 buf->data = NULL;
7069         }
7070 }
7071
7072 static void free_trace_buffers(struct trace_array *tr)
7073 {
7074         if (!tr)
7075                 return;
7076
7077         free_trace_buffer(&tr->trace_buffer);
7078
7079 #ifdef CONFIG_TRACER_MAX_TRACE
7080         free_trace_buffer(&tr->max_buffer);
7081 #endif
7082 }
7083
7084 static void init_trace_flags_index(struct trace_array *tr)
7085 {
7086         int i;
7087
7088         /* Used by the trace options files */
7089         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7090                 tr->trace_flags_index[i] = i;
7091 }
7092
7093 static void __update_tracer_options(struct trace_array *tr)
7094 {
7095         struct tracer *t;
7096
7097         for (t = trace_types; t; t = t->next)
7098                 add_tracer_options(tr, t);
7099 }
7100
7101 static void update_tracer_options(struct trace_array *tr)
7102 {
7103         mutex_lock(&trace_types_lock);
7104         tracer_options_updated = true;
7105         __update_tracer_options(tr);
7106         mutex_unlock(&trace_types_lock);
7107 }
7108
7109 static int instance_mkdir(const char *name)
7110 {
7111         struct trace_array *tr;
7112         int ret;
7113
7114         mutex_lock(&trace_types_lock);
7115
7116         ret = -EEXIST;
7117         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7118                 if (tr->name && strcmp(tr->name, name) == 0)
7119                         goto out_unlock;
7120         }
7121
7122         ret = -ENOMEM;
7123         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7124         if (!tr)
7125                 goto out_unlock;
7126
7127         tr->name = kstrdup(name, GFP_KERNEL);
7128         if (!tr->name)
7129                 goto out_free_tr;
7130
7131         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7132                 goto out_free_tr;
7133
7134         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7135
7136         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7137
7138         raw_spin_lock_init(&tr->start_lock);
7139
7140         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7141
7142         tr->current_trace = &nop_trace;
7143
7144         INIT_LIST_HEAD(&tr->systems);
7145         INIT_LIST_HEAD(&tr->events);
7146
7147         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7148                 goto out_free_tr;
7149
7150         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7151         if (!tr->dir)
7152                 goto out_free_tr;
7153
7154         ret = event_trace_add_tracer(tr->dir, tr);
7155         if (ret) {
7156                 tracefs_remove_recursive(tr->dir);
7157                 goto out_free_tr;
7158         }
7159
7160         init_tracer_tracefs(tr, tr->dir);
7161         init_trace_flags_index(tr);
7162         __update_tracer_options(tr);
7163
7164         list_add(&tr->list, &ftrace_trace_arrays);
7165
7166         mutex_unlock(&trace_types_lock);
7167
7168         return 0;
7169
7170  out_free_tr:
7171         free_trace_buffers(tr);
7172         free_cpumask_var(tr->tracing_cpumask);
7173         kfree(tr->name);
7174         kfree(tr);
7175
7176  out_unlock:
7177         mutex_unlock(&trace_types_lock);
7178
7179         return ret;
7180
7181 }
7182
7183 static int instance_rmdir(const char *name)
7184 {
7185         struct trace_array *tr;
7186         int found = 0;
7187         int ret;
7188         int i;
7189
7190         mutex_lock(&trace_types_lock);
7191
7192         ret = -ENODEV;
7193         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7194                 if (tr->name && strcmp(tr->name, name) == 0) {
7195                         found = 1;
7196                         break;
7197                 }
7198         }
7199         if (!found)
7200                 goto out_unlock;
7201
7202         ret = -EBUSY;
7203         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7204                 goto out_unlock;
7205
7206         list_del(&tr->list);
7207
7208         /* Disable all the flags that were enabled coming in */
7209         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7210                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7211                         set_tracer_flag(tr, 1 << i, 0);
7212         }
7213
7214         tracing_set_nop(tr);
7215         event_trace_del_tracer(tr);
7216         ftrace_clear_pids(tr);
7217         ftrace_destroy_function_files(tr);
7218         tracefs_remove_recursive(tr->dir);
7219         free_trace_buffers(tr);
7220
7221         for (i = 0; i < tr->nr_topts; i++) {
7222                 kfree(tr->topts[i].topts);
7223         }
7224         kfree(tr->topts);
7225
7226         free_cpumask_var(tr->tracing_cpumask);
7227         kfree(tr->name);
7228         kfree(tr);
7229
7230         ret = 0;
7231
7232  out_unlock:
7233         mutex_unlock(&trace_types_lock);
7234
7235         return ret;
7236 }
7237
7238 static __init void create_trace_instances(struct dentry *d_tracer)
7239 {
7240         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7241                                                          instance_mkdir,
7242                                                          instance_rmdir);
7243         if (WARN_ON(!trace_instance_dir))
7244                 return;
7245 }
7246
7247 static void
7248 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7249 {
7250         int cpu;
7251
7252         trace_create_file("available_tracers", 0444, d_tracer,
7253                         tr, &show_traces_fops);
7254
7255         trace_create_file("current_tracer", 0644, d_tracer,
7256                         tr, &set_tracer_fops);
7257
7258         trace_create_file("tracing_cpumask", 0644, d_tracer,
7259                           tr, &tracing_cpumask_fops);
7260
7261         trace_create_file("trace_options", 0644, d_tracer,
7262                           tr, &tracing_iter_fops);
7263
7264         trace_create_file("trace", 0644, d_tracer,
7265                           tr, &tracing_fops);
7266
7267         trace_create_file("trace_pipe", 0444, d_tracer,
7268                           tr, &tracing_pipe_fops);
7269
7270         trace_create_file("buffer_size_kb", 0644, d_tracer,
7271                           tr, &tracing_entries_fops);
7272
7273         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7274                           tr, &tracing_total_entries_fops);
7275
7276         trace_create_file("free_buffer", 0200, d_tracer,
7277                           tr, &tracing_free_buffer_fops);
7278
7279         trace_create_file("trace_marker", 0220, d_tracer,
7280                           tr, &tracing_mark_fops);
7281
7282         trace_create_file("trace_clock", 0644, d_tracer, tr,
7283                           &trace_clock_fops);
7284
7285         trace_create_file("tracing_on", 0644, d_tracer,
7286                           tr, &rb_simple_fops);
7287
7288         create_trace_options_dir(tr);
7289
7290 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7291         trace_create_file("tracing_max_latency", 0644, d_tracer,
7292                         &tr->max_latency, &tracing_max_lat_fops);
7293 #endif
7294
7295         if (ftrace_create_function_files(tr, d_tracer))
7296                 WARN(1, "Could not allocate function filter files");
7297
7298 #ifdef CONFIG_TRACER_SNAPSHOT
7299         trace_create_file("snapshot", 0644, d_tracer,
7300                           tr, &snapshot_fops);
7301 #endif
7302
7303         for_each_tracing_cpu(cpu)
7304                 tracing_init_tracefs_percpu(tr, cpu);
7305
7306         ftrace_init_tracefs(tr, d_tracer);
7307 }
7308
7309 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7310 {
7311         struct vfsmount *mnt;
7312         struct file_system_type *type;
7313
7314         /*
7315          * To maintain backward compatibility for tools that mount
7316          * debugfs to get to the tracing facility, tracefs is automatically
7317          * mounted to the debugfs/tracing directory.
7318          */
7319         type = get_fs_type("tracefs");
7320         if (!type)
7321                 return NULL;
7322         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7323         put_filesystem(type);
7324         if (IS_ERR(mnt))
7325                 return NULL;
7326         mntget(mnt);
7327
7328         return mnt;
7329 }
7330
7331 /**
7332  * tracing_init_dentry - initialize top level trace array
7333  *
7334  * This is called when creating files or directories in the tracing
7335  * directory. It is called via fs_initcall() by any of the boot up code
7336  * and expects to return the dentry of the top level tracing directory.
7337  */
7338 struct dentry *tracing_init_dentry(void)
7339 {
7340         struct trace_array *tr = &global_trace;
7341
7342         /* The top level trace array uses  NULL as parent */
7343         if (tr->dir)
7344                 return NULL;
7345
7346         if (WARN_ON(!tracefs_initialized()) ||
7347                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7348                  WARN_ON(!debugfs_initialized())))
7349                 return ERR_PTR(-ENODEV);
7350
7351         /*
7352          * As there may still be users that expect the tracing
7353          * files to exist in debugfs/tracing, we must automount
7354          * the tracefs file system there, so older tools still
7355          * work with the newer kerenl.
7356          */
7357         tr->dir = debugfs_create_automount("tracing", NULL,
7358                                            trace_automount, NULL);
7359         if (!tr->dir) {
7360                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7361                 return ERR_PTR(-ENOMEM);
7362         }
7363
7364         return NULL;
7365 }
7366
7367 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7368 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7369
7370 static void __init trace_enum_init(void)
7371 {
7372         int len;
7373
7374         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7375         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7376 }
7377
7378 #ifdef CONFIG_MODULES
7379 static void trace_module_add_enums(struct module *mod)
7380 {
7381         if (!mod->num_trace_enums)
7382                 return;
7383
7384         /*
7385          * Modules with bad taint do not have events created, do
7386          * not bother with enums either.
7387          */
7388         if (trace_module_has_bad_taint(mod))
7389                 return;
7390
7391         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7392 }
7393
7394 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7395 static void trace_module_remove_enums(struct module *mod)
7396 {
7397         union trace_enum_map_item *map;
7398         union trace_enum_map_item **last = &trace_enum_maps;
7399
7400         if (!mod->num_trace_enums)
7401                 return;
7402
7403         mutex_lock(&trace_enum_mutex);
7404
7405         map = trace_enum_maps;
7406
7407         while (map) {
7408                 if (map->head.mod == mod)
7409                         break;
7410                 map = trace_enum_jmp_to_tail(map);
7411                 last = &map->tail.next;
7412                 map = map->tail.next;
7413         }
7414         if (!map)
7415                 goto out;
7416
7417         *last = trace_enum_jmp_to_tail(map)->tail.next;
7418         kfree(map);
7419  out:
7420         mutex_unlock(&trace_enum_mutex);
7421 }
7422 #else
7423 static inline void trace_module_remove_enums(struct module *mod) { }
7424 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7425
7426 static int trace_module_notify(struct notifier_block *self,
7427                                unsigned long val, void *data)
7428 {
7429         struct module *mod = data;
7430
7431         switch (val) {
7432         case MODULE_STATE_COMING:
7433                 trace_module_add_enums(mod);
7434                 break;
7435         case MODULE_STATE_GOING:
7436                 trace_module_remove_enums(mod);
7437                 break;
7438         }
7439
7440         return 0;
7441 }
7442
7443 static struct notifier_block trace_module_nb = {
7444         .notifier_call = trace_module_notify,
7445         .priority = 0,
7446 };
7447 #endif /* CONFIG_MODULES */
7448
7449 static __init int tracer_init_tracefs(void)
7450 {
7451         struct dentry *d_tracer;
7452
7453         trace_access_lock_init();
7454
7455         d_tracer = tracing_init_dentry();
7456         if (IS_ERR(d_tracer))
7457                 return 0;
7458
7459         init_tracer_tracefs(&global_trace, d_tracer);
7460         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7461
7462         trace_create_file("tracing_thresh", 0644, d_tracer,
7463                         &global_trace, &tracing_thresh_fops);
7464
7465         trace_create_file("README", 0444, d_tracer,
7466                         NULL, &tracing_readme_fops);
7467
7468         trace_create_file("saved_cmdlines", 0444, d_tracer,
7469                         NULL, &tracing_saved_cmdlines_fops);
7470
7471         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7472                           NULL, &tracing_saved_cmdlines_size_fops);
7473
7474         trace_enum_init();
7475
7476         trace_create_enum_file(d_tracer);
7477
7478 #ifdef CONFIG_MODULES
7479         register_module_notifier(&trace_module_nb);
7480 #endif
7481
7482 #ifdef CONFIG_DYNAMIC_FTRACE
7483         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7484                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7485 #endif
7486
7487         create_trace_instances(d_tracer);
7488
7489         update_tracer_options(&global_trace);
7490
7491         return 0;
7492 }
7493
7494 static int trace_panic_handler(struct notifier_block *this,
7495                                unsigned long event, void *unused)
7496 {
7497         if (ftrace_dump_on_oops)
7498                 ftrace_dump(ftrace_dump_on_oops);
7499         return NOTIFY_OK;
7500 }
7501
7502 static struct notifier_block trace_panic_notifier = {
7503         .notifier_call  = trace_panic_handler,
7504         .next           = NULL,
7505         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7506 };
7507
7508 static int trace_die_handler(struct notifier_block *self,
7509                              unsigned long val,
7510                              void *data)
7511 {
7512         switch (val) {
7513         case DIE_OOPS:
7514                 if (ftrace_dump_on_oops)
7515                         ftrace_dump(ftrace_dump_on_oops);
7516                 break;
7517         default:
7518                 break;
7519         }
7520         return NOTIFY_OK;
7521 }
7522
7523 static struct notifier_block trace_die_notifier = {
7524         .notifier_call = trace_die_handler,
7525         .priority = 200
7526 };
7527
7528 /*
7529  * printk is set to max of 1024, we really don't need it that big.
7530  * Nothing should be printing 1000 characters anyway.
7531  */
7532 #define TRACE_MAX_PRINT         1000
7533
7534 /*
7535  * Define here KERN_TRACE so that we have one place to modify
7536  * it if we decide to change what log level the ftrace dump
7537  * should be at.
7538  */
7539 #define KERN_TRACE              KERN_EMERG
7540
7541 void
7542 trace_printk_seq(struct trace_seq *s)
7543 {
7544         /* Probably should print a warning here. */
7545         if (s->seq.len >= TRACE_MAX_PRINT)
7546                 s->seq.len = TRACE_MAX_PRINT;
7547
7548         /*
7549          * More paranoid code. Although the buffer size is set to
7550          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7551          * an extra layer of protection.
7552          */
7553         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7554                 s->seq.len = s->seq.size - 1;
7555
7556         /* should be zero ended, but we are paranoid. */
7557         s->buffer[s->seq.len] = 0;
7558
7559         printk(KERN_TRACE "%s", s->buffer);
7560
7561         trace_seq_init(s);
7562 }
7563
7564 void trace_init_global_iter(struct trace_iterator *iter)
7565 {
7566         iter->tr = &global_trace;
7567         iter->trace = iter->tr->current_trace;
7568         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7569         iter->trace_buffer = &global_trace.trace_buffer;
7570
7571         if (iter->trace && iter->trace->open)
7572                 iter->trace->open(iter);
7573
7574         /* Annotate start of buffers if we had overruns */
7575         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7576                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7577
7578         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7579         if (trace_clocks[iter->tr->clock_id].in_ns)
7580                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7581 }
7582
7583 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7584 {
7585         /* use static because iter can be a bit big for the stack */
7586         static struct trace_iterator iter;
7587         static atomic_t dump_running;
7588         struct trace_array *tr = &global_trace;
7589         unsigned int old_userobj;
7590         unsigned long flags;
7591         int cnt = 0, cpu;
7592
7593         /* Only allow one dump user at a time. */
7594         if (atomic_inc_return(&dump_running) != 1) {
7595                 atomic_dec(&dump_running);
7596                 return;
7597         }
7598
7599         /*
7600          * Always turn off tracing when we dump.
7601          * We don't need to show trace output of what happens
7602          * between multiple crashes.
7603          *
7604          * If the user does a sysrq-z, then they can re-enable
7605          * tracing with echo 1 > tracing_on.
7606          */
7607         tracing_off();
7608
7609         local_irq_save(flags);
7610
7611         /* Simulate the iterator */
7612         trace_init_global_iter(&iter);
7613
7614         for_each_tracing_cpu(cpu) {
7615                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7616         }
7617
7618         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7619
7620         /* don't look at user memory in panic mode */
7621         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7622
7623         switch (oops_dump_mode) {
7624         case DUMP_ALL:
7625                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7626                 break;
7627         case DUMP_ORIG:
7628                 iter.cpu_file = raw_smp_processor_id();
7629                 break;
7630         case DUMP_NONE:
7631                 goto out_enable;
7632         default:
7633                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7634                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7635         }
7636
7637         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7638
7639         /* Did function tracer already get disabled? */
7640         if (ftrace_is_dead()) {
7641                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7642                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7643         }
7644
7645         /*
7646          * We need to stop all tracing on all CPUS to read the
7647          * the next buffer. This is a bit expensive, but is
7648          * not done often. We fill all what we can read,
7649          * and then release the locks again.
7650          */
7651
7652         while (!trace_empty(&iter)) {
7653
7654                 if (!cnt)
7655                         printk(KERN_TRACE "---------------------------------\n");
7656
7657                 cnt++;
7658
7659                 trace_iterator_reset(&iter);
7660                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7661
7662                 if (trace_find_next_entry_inc(&iter) != NULL) {
7663                         int ret;
7664
7665                         ret = print_trace_line(&iter);
7666                         if (ret != TRACE_TYPE_NO_CONSUME)
7667                                 trace_consume(&iter);
7668                 }
7669                 touch_nmi_watchdog();
7670
7671                 trace_printk_seq(&iter.seq);
7672         }
7673
7674         if (!cnt)
7675                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7676         else
7677                 printk(KERN_TRACE "---------------------------------\n");
7678
7679  out_enable:
7680         tr->trace_flags |= old_userobj;
7681
7682         for_each_tracing_cpu(cpu) {
7683                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7684         }
7685         atomic_dec(&dump_running);
7686         local_irq_restore(flags);
7687 }
7688 EXPORT_SYMBOL_GPL(ftrace_dump);
7689
7690 __init static int tracer_alloc_buffers(void)
7691 {
7692         int ring_buf_size;
7693         int ret = -ENOMEM;
7694
7695         /*
7696          * Make sure we don't accidently add more trace options
7697          * than we have bits for.
7698          */
7699         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7700
7701         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7702                 goto out;
7703
7704         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7705                 goto out_free_buffer_mask;
7706
7707         /* Only allocate trace_printk buffers if a trace_printk exists */
7708         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
7709                 /* Must be called before global_trace.buffer is allocated */
7710                 trace_printk_init_buffers();
7711
7712         /* To save memory, keep the ring buffer size to its minimum */
7713         if (ring_buffer_expanded)
7714                 ring_buf_size = trace_buf_size;
7715         else
7716                 ring_buf_size = 1;
7717
7718         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7719         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7720
7721         raw_spin_lock_init(&global_trace.start_lock);
7722
7723         /* Used for event triggers */
7724         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7725         if (!temp_buffer)
7726                 goto out_free_cpumask;
7727
7728         if (trace_create_savedcmd() < 0)
7729                 goto out_free_temp_buffer;
7730
7731         /* TODO: make the number of buffers hot pluggable with CPUS */
7732         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7733                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7734                 WARN_ON(1);
7735                 goto out_free_savedcmd;
7736         }
7737
7738         if (global_trace.buffer_disabled)
7739                 tracing_off();
7740
7741         if (trace_boot_clock) {
7742                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7743                 if (ret < 0)
7744                         pr_warn("Trace clock %s not defined, going back to default\n",
7745                                 trace_boot_clock);
7746         }
7747
7748         /*
7749          * register_tracer() might reference current_trace, so it
7750          * needs to be set before we register anything. This is
7751          * just a bootstrap of current_trace anyway.
7752          */
7753         global_trace.current_trace = &nop_trace;
7754
7755         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7756
7757         ftrace_init_global_array_ops(&global_trace);
7758
7759         init_trace_flags_index(&global_trace);
7760
7761         register_tracer(&nop_trace);
7762
7763         /* All seems OK, enable tracing */
7764         tracing_disabled = 0;
7765
7766         atomic_notifier_chain_register(&panic_notifier_list,
7767                                        &trace_panic_notifier);
7768
7769         register_die_notifier(&trace_die_notifier);
7770
7771         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7772
7773         INIT_LIST_HEAD(&global_trace.systems);
7774         INIT_LIST_HEAD(&global_trace.events);
7775         list_add(&global_trace.list, &ftrace_trace_arrays);
7776
7777         apply_trace_boot_options();
7778
7779         register_snapshot_cmd();
7780
7781         return 0;
7782
7783 out_free_savedcmd:
7784         free_saved_cmdlines_buffer(savedcmd);
7785 out_free_temp_buffer:
7786         ring_buffer_free(temp_buffer);
7787 out_free_cpumask:
7788         free_cpumask_var(global_trace.tracing_cpumask);
7789 out_free_buffer_mask:
7790         free_cpumask_var(tracing_buffer_mask);
7791 out:
7792         return ret;
7793 }
7794
7795 void __init trace_init(void)
7796 {
7797         if (tracepoint_printk) {
7798                 tracepoint_print_iter =
7799                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7800                 if (WARN_ON(!tracepoint_print_iter))
7801                         tracepoint_printk = 0;
7802         }
7803         tracer_alloc_buffers();
7804         trace_event_init();
7805 }
7806
7807 __init static int clear_boot_tracer(void)
7808 {
7809         /*
7810          * The default tracer at boot buffer is an init section.
7811          * This function is called in lateinit. If we did not
7812          * find the boot tracer, then clear it out, to prevent
7813          * later registration from accessing the buffer that is
7814          * about to be freed.
7815          */
7816         if (!default_bootup_tracer)
7817                 return 0;
7818
7819         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7820                default_bootup_tracer);
7821         default_bootup_tracer = NULL;
7822
7823         return 0;
7824 }
7825
7826 fs_initcall(tracer_init_tracefs);
7827 late_initcall_sync(clear_boot_tracer);