GNU Linux-libre 4.19.264-gnu1
[releases.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         /* Ignore the "tp_printk_stop_on_boot" param */
236         if (*str == '_')
237                 return 0;
238
239         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
240                 tracepoint_printk = 1;
241         return 1;
242 }
243 __setup("tp_printk", set_tracepoint_printk);
244
245 unsigned long long ns2usecs(u64 nsec)
246 {
247         nsec += 500;
248         do_div(nsec, 1000);
249         return nsec;
250 }
251
252 /* trace_flags holds trace_options default values */
253 #define TRACE_DEFAULT_FLAGS                                             \
254         (FUNCTION_DEFAULT_FLAGS |                                       \
255          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
256          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
257          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
258          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
259
260 /* trace_options that are only supported by global_trace */
261 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
262                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
263
264 /* trace_flags that are default zero for instances */
265 #define ZEROED_TRACE_FLAGS \
266         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
267
268 /*
269  * The global_trace is the descriptor that holds the top-level tracing
270  * buffers for the live tracing.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 __trace_event_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324         vfree(pid_list->pids);
325         kfree(pid_list);
326 }
327
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338         /*
339          * If pid_max changed after filtered_pids was created, we
340          * by default ignore all pids greater than the previous pid_max.
341          */
342         if (search_pid >= filtered_pids->pid_max)
343                 return false;
344
345         return test_bit(search_pid, filtered_pids->pids);
346 }
347
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360         /*
361          * Return false, because if filtered_pids does not exist,
362          * all pids are good to trace.
363          */
364         if (!filtered_pids)
365                 return false;
366
367         return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369
370 /**
371  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383                                   struct task_struct *self,
384                                   struct task_struct *task)
385 {
386         if (!pid_list)
387                 return;
388
389         /* For forks, we only add if the forking task is listed */
390         if (self) {
391                 if (!trace_find_filtered_pid(pid_list, self->pid))
392                         return;
393         }
394
395         /* Sorry, but we don't support pid_max changing after setting */
396         if (task->pid >= pid_list->pid_max)
397                 return;
398
399         /* "self" is set for forks, and NULL for exits */
400         if (self)
401                 set_bit(task->pid, pid_list->pids);
402         else
403                 clear_bit(task->pid, pid_list->pids);
404 }
405
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420         unsigned long pid = (unsigned long)v;
421
422         (*pos)++;
423
424         /* pid already is +1 of the actual prevous bit */
425         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426
427         /* Return pid + 1 to allow zero to be represented */
428         if (pid < pid_list->pid_max)
429                 return (void *)(pid + 1);
430
431         return NULL;
432 }
433
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447         unsigned long pid;
448         loff_t l = 0;
449
450         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451         if (pid >= pid_list->pid_max)
452                 return NULL;
453
454         /* Return pid + 1 so that zero can be the exit value */
455         for (pid++; pid && l < *pos;
456              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457                 ;
458         return (void *)pid;
459 }
460
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471         unsigned long pid = (unsigned long)v - 1;
472
473         seq_printf(m, "%lu\n", pid);
474         return 0;
475 }
476
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE            127
479
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481                     struct trace_pid_list **new_pid_list,
482                     const char __user *ubuf, size_t cnt)
483 {
484         struct trace_pid_list *pid_list;
485         struct trace_parser parser;
486         unsigned long val;
487         int nr_pids = 0;
488         ssize_t read = 0;
489         ssize_t ret = 0;
490         loff_t pos;
491         pid_t pid;
492
493         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494                 return -ENOMEM;
495
496         /*
497          * Always recreate a new array. The write is an all or nothing
498          * operation. Always create a new array when adding new pids by
499          * the user. If the operation fails, then the current list is
500          * not modified.
501          */
502         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503         if (!pid_list) {
504                 trace_parser_put(&parser);
505                 return -ENOMEM;
506         }
507
508         pid_list->pid_max = READ_ONCE(pid_max);
509
510         /* Only truncating will shrink pid_max */
511         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
512                 pid_list->pid_max = filtered_pids->pid_max;
513
514         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
515         if (!pid_list->pids) {
516                 trace_parser_put(&parser);
517                 kfree(pid_list);
518                 return -ENOMEM;
519         }
520
521         if (filtered_pids) {
522                 /* copy the current bits to the new max */
523                 for_each_set_bit(pid, filtered_pids->pids,
524                                  filtered_pids->pid_max) {
525                         set_bit(pid, pid_list->pids);
526                         nr_pids++;
527                 }
528         }
529
530         while (cnt > 0) {
531
532                 pos = 0;
533
534                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
535                 if (ret < 0 || !trace_parser_loaded(&parser))
536                         break;
537
538                 read += ret;
539                 ubuf += ret;
540                 cnt -= ret;
541
542                 ret = -EINVAL;
543                 if (kstrtoul(parser.buffer, 0, &val))
544                         break;
545                 if (val >= pid_list->pid_max)
546                         break;
547
548                 pid = (pid_t)val;
549
550                 set_bit(pid, pid_list->pids);
551                 nr_pids++;
552
553                 trace_parser_clear(&parser);
554                 ret = 0;
555         }
556         trace_parser_put(&parser);
557
558         if (ret < 0) {
559                 trace_free_pid_list(pid_list);
560                 return ret;
561         }
562
563         if (!nr_pids) {
564                 /* Cleared the list of pids */
565                 trace_free_pid_list(pid_list);
566                 read = ret;
567                 pid_list = NULL;
568         }
569
570         *new_pid_list = pid_list;
571
572         return read;
573 }
574
575 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
576 {
577         u64 ts;
578
579         /* Early boot up does not have a buffer yet */
580         if (!buf->buffer)
581                 return trace_clock_local();
582
583         ts = ring_buffer_time_stamp(buf->buffer, cpu);
584         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
585
586         return ts;
587 }
588
589 u64 ftrace_now(int cpu)
590 {
591         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
592 }
593
594 /**
595  * tracing_is_enabled - Show if global_trace has been disabled
596  *
597  * Shows if the global trace has been enabled or not. It uses the
598  * mirror flag "buffer_disabled" to be used in fast paths such as for
599  * the irqsoff tracer. But it may be inaccurate due to races. If you
600  * need to know the accurate state, use tracing_is_on() which is a little
601  * slower, but accurate.
602  */
603 int tracing_is_enabled(void)
604 {
605         /*
606          * For quick access (irqsoff uses this in fast path), just
607          * return the mirror variable of the state of the ring buffer.
608          * It's a little racy, but we don't really care.
609          */
610         smp_rmb();
611         return !global_trace.buffer_disabled;
612 }
613
614 /*
615  * trace_buf_size is the size in bytes that is allocated
616  * for a buffer. Note, the number of bytes is always rounded
617  * to page size.
618  *
619  * This number is purposely set to a low number of 16384.
620  * If the dump on oops happens, it will be much appreciated
621  * to not have to wait for all that output. Anyway this can be
622  * boot time and run time configurable.
623  */
624 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
625
626 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
627
628 /* trace_types holds a link list of available tracers. */
629 static struct tracer            *trace_types __read_mostly;
630
631 /*
632  * trace_types_lock is used to protect the trace_types list.
633  */
634 DEFINE_MUTEX(trace_types_lock);
635
636 /*
637  * serialize the access of the ring buffer
638  *
639  * ring buffer serializes readers, but it is low level protection.
640  * The validity of the events (which returns by ring_buffer_peek() ..etc)
641  * are not protected by ring buffer.
642  *
643  * The content of events may become garbage if we allow other process consumes
644  * these events concurrently:
645  *   A) the page of the consumed events may become a normal page
646  *      (not reader page) in ring buffer, and this page will be rewrited
647  *      by events producer.
648  *   B) The page of the consumed events may become a page for splice_read,
649  *      and this page will be returned to system.
650  *
651  * These primitives allow multi process access to different cpu ring buffer
652  * concurrently.
653  *
654  * These primitives don't distinguish read-only and read-consume access.
655  * Multi read-only access are also serialized.
656  */
657
658 #ifdef CONFIG_SMP
659 static DECLARE_RWSEM(all_cpu_access_lock);
660 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
661
662 static inline void trace_access_lock(int cpu)
663 {
664         if (cpu == RING_BUFFER_ALL_CPUS) {
665                 /* gain it for accessing the whole ring buffer. */
666                 down_write(&all_cpu_access_lock);
667         } else {
668                 /* gain it for accessing a cpu ring buffer. */
669
670                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
671                 down_read(&all_cpu_access_lock);
672
673                 /* Secondly block other access to this @cpu ring buffer. */
674                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
675         }
676 }
677
678 static inline void trace_access_unlock(int cpu)
679 {
680         if (cpu == RING_BUFFER_ALL_CPUS) {
681                 up_write(&all_cpu_access_lock);
682         } else {
683                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
684                 up_read(&all_cpu_access_lock);
685         }
686 }
687
688 static inline void trace_access_lock_init(void)
689 {
690         int cpu;
691
692         for_each_possible_cpu(cpu)
693                 mutex_init(&per_cpu(cpu_access_lock, cpu));
694 }
695
696 #else
697
698 static DEFINE_MUTEX(access_lock);
699
700 static inline void trace_access_lock(int cpu)
701 {
702         (void)cpu;
703         mutex_lock(&access_lock);
704 }
705
706 static inline void trace_access_unlock(int cpu)
707 {
708         (void)cpu;
709         mutex_unlock(&access_lock);
710 }
711
712 static inline void trace_access_lock_init(void)
713 {
714 }
715
716 #endif
717
718 #ifdef CONFIG_STACKTRACE
719 static void __ftrace_trace_stack(struct ring_buffer *buffer,
720                                  unsigned long flags,
721                                  int skip, int pc, struct pt_regs *regs);
722 static inline void ftrace_trace_stack(struct trace_array *tr,
723                                       struct ring_buffer *buffer,
724                                       unsigned long flags,
725                                       int skip, int pc, struct pt_regs *regs);
726
727 #else
728 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
729                                         unsigned long flags,
730                                         int skip, int pc, struct pt_regs *regs)
731 {
732 }
733 static inline void ftrace_trace_stack(struct trace_array *tr,
734                                       struct ring_buffer *buffer,
735                                       unsigned long flags,
736                                       int skip, int pc, struct pt_regs *regs)
737 {
738 }
739
740 #endif
741
742 static __always_inline void
743 trace_event_setup(struct ring_buffer_event *event,
744                   int type, unsigned long flags, int pc)
745 {
746         struct trace_entry *ent = ring_buffer_event_data(event);
747
748         tracing_generic_entry_update(ent, flags, pc);
749         ent->type = type;
750 }
751
752 static __always_inline struct ring_buffer_event *
753 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
754                           int type,
755                           unsigned long len,
756                           unsigned long flags, int pc)
757 {
758         struct ring_buffer_event *event;
759
760         event = ring_buffer_lock_reserve(buffer, len);
761         if (event != NULL)
762                 trace_event_setup(event, type, flags, pc);
763
764         return event;
765 }
766
767 void tracer_tracing_on(struct trace_array *tr)
768 {
769         if (tr->trace_buffer.buffer)
770                 ring_buffer_record_on(tr->trace_buffer.buffer);
771         /*
772          * This flag is looked at when buffers haven't been allocated
773          * yet, or by some tracers (like irqsoff), that just want to
774          * know if the ring buffer has been disabled, but it can handle
775          * races of where it gets disabled but we still do a record.
776          * As the check is in the fast path of the tracers, it is more
777          * important to be fast than accurate.
778          */
779         tr->buffer_disabled = 0;
780         /* Make the flag seen by readers */
781         smp_wmb();
782 }
783
784 /**
785  * tracing_on - enable tracing buffers
786  *
787  * This function enables tracing buffers that may have been
788  * disabled with tracing_off.
789  */
790 void tracing_on(void)
791 {
792         tracer_tracing_on(&global_trace);
793 }
794 EXPORT_SYMBOL_GPL(tracing_on);
795
796
797 static __always_inline void
798 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
799 {
800         __this_cpu_write(trace_taskinfo_save, true);
801
802         /* If this is the temp buffer, we need to commit fully */
803         if (this_cpu_read(trace_buffered_event) == event) {
804                 /* Length is in event->array[0] */
805                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
806                 /* Release the temp buffer */
807                 this_cpu_dec(trace_buffered_event_cnt);
808         } else
809                 ring_buffer_unlock_commit(buffer, event);
810 }
811
812 /**
813  * __trace_puts - write a constant string into the trace buffer.
814  * @ip:    The address of the caller
815  * @str:   The constant string to write
816  * @size:  The size of the string.
817  */
818 int __trace_puts(unsigned long ip, const char *str, int size)
819 {
820         struct ring_buffer_event *event;
821         struct ring_buffer *buffer;
822         struct print_entry *entry;
823         unsigned long irq_flags;
824         int alloc;
825         int pc;
826
827         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
828                 return 0;
829
830         pc = preempt_count();
831
832         if (unlikely(tracing_selftest_running || tracing_disabled))
833                 return 0;
834
835         alloc = sizeof(*entry) + size + 2; /* possible \n added */
836
837         local_save_flags(irq_flags);
838         buffer = global_trace.trace_buffer.buffer;
839         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
840                                             irq_flags, pc);
841         if (!event)
842                 return 0;
843
844         entry = ring_buffer_event_data(event);
845         entry->ip = ip;
846
847         memcpy(&entry->buf, str, size);
848
849         /* Add a newline if necessary */
850         if (entry->buf[size - 1] != '\n') {
851                 entry->buf[size] = '\n';
852                 entry->buf[size + 1] = '\0';
853         } else
854                 entry->buf[size] = '\0';
855
856         __buffer_unlock_commit(buffer, event);
857         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
858
859         return size;
860 }
861 EXPORT_SYMBOL_GPL(__trace_puts);
862
863 /**
864  * __trace_bputs - write the pointer to a constant string into trace buffer
865  * @ip:    The address of the caller
866  * @str:   The constant string to write to the buffer to
867  */
868 int __trace_bputs(unsigned long ip, const char *str)
869 {
870         struct ring_buffer_event *event;
871         struct ring_buffer *buffer;
872         struct bputs_entry *entry;
873         unsigned long irq_flags;
874         int size = sizeof(struct bputs_entry);
875         int pc;
876
877         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
878                 return 0;
879
880         pc = preempt_count();
881
882         if (unlikely(tracing_selftest_running || tracing_disabled))
883                 return 0;
884
885         local_save_flags(irq_flags);
886         buffer = global_trace.trace_buffer.buffer;
887         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
888                                             irq_flags, pc);
889         if (!event)
890                 return 0;
891
892         entry = ring_buffer_event_data(event);
893         entry->ip                       = ip;
894         entry->str                      = str;
895
896         __buffer_unlock_commit(buffer, event);
897         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
898
899         return 1;
900 }
901 EXPORT_SYMBOL_GPL(__trace_bputs);
902
903 #ifdef CONFIG_TRACER_SNAPSHOT
904 void tracing_snapshot_instance(struct trace_array *tr)
905 {
906         struct tracer *tracer = tr->current_trace;
907         unsigned long flags;
908
909         if (in_nmi()) {
910                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
911                 internal_trace_puts("*** snapshot is being ignored        ***\n");
912                 return;
913         }
914
915         if (!tr->allocated_snapshot) {
916                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
917                 internal_trace_puts("*** stopping trace here!   ***\n");
918                 tracing_off();
919                 return;
920         }
921
922         /* Note, snapshot can not be used when the tracer uses it */
923         if (tracer->use_max_tr) {
924                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
925                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
926                 return;
927         }
928
929         local_irq_save(flags);
930         update_max_tr(tr, current, smp_processor_id());
931         local_irq_restore(flags);
932 }
933
934 /**
935  * tracing_snapshot - take a snapshot of the current buffer.
936  *
937  * This causes a swap between the snapshot buffer and the current live
938  * tracing buffer. You can use this to take snapshots of the live
939  * trace when some condition is triggered, but continue to trace.
940  *
941  * Note, make sure to allocate the snapshot with either
942  * a tracing_snapshot_alloc(), or by doing it manually
943  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
944  *
945  * If the snapshot buffer is not allocated, it will stop tracing.
946  * Basically making a permanent snapshot.
947  */
948 void tracing_snapshot(void)
949 {
950         struct trace_array *tr = &global_trace;
951
952         tracing_snapshot_instance(tr);
953 }
954 EXPORT_SYMBOL_GPL(tracing_snapshot);
955
956 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
957                                         struct trace_buffer *size_buf, int cpu_id);
958 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
959
960 int tracing_alloc_snapshot_instance(struct trace_array *tr)
961 {
962         int ret;
963
964         if (!tr->allocated_snapshot) {
965
966                 /* allocate spare buffer */
967                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
968                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
969                 if (ret < 0)
970                         return ret;
971
972                 tr->allocated_snapshot = true;
973         }
974
975         return 0;
976 }
977
978 static void free_snapshot(struct trace_array *tr)
979 {
980         /*
981          * We don't free the ring buffer. instead, resize it because
982          * The max_tr ring buffer has some state (e.g. ring->clock) and
983          * we want preserve it.
984          */
985         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
986         set_buffer_entries(&tr->max_buffer, 1);
987         tracing_reset_online_cpus(&tr->max_buffer);
988         tr->allocated_snapshot = false;
989 }
990
991 /**
992  * tracing_alloc_snapshot - allocate snapshot buffer.
993  *
994  * This only allocates the snapshot buffer if it isn't already
995  * allocated - it doesn't also take a snapshot.
996  *
997  * This is meant to be used in cases where the snapshot buffer needs
998  * to be set up for events that can't sleep but need to be able to
999  * trigger a snapshot.
1000  */
1001 int tracing_alloc_snapshot(void)
1002 {
1003         struct trace_array *tr = &global_trace;
1004         int ret;
1005
1006         ret = tracing_alloc_snapshot_instance(tr);
1007         WARN_ON(ret < 0);
1008
1009         return ret;
1010 }
1011 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1012
1013 /**
1014  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1015  *
1016  * This is similar to tracing_snapshot(), but it will allocate the
1017  * snapshot buffer if it isn't already allocated. Use this only
1018  * where it is safe to sleep, as the allocation may sleep.
1019  *
1020  * This causes a swap between the snapshot buffer and the current live
1021  * tracing buffer. You can use this to take snapshots of the live
1022  * trace when some condition is triggered, but continue to trace.
1023  */
1024 void tracing_snapshot_alloc(void)
1025 {
1026         int ret;
1027
1028         ret = tracing_alloc_snapshot();
1029         if (ret < 0)
1030                 return;
1031
1032         tracing_snapshot();
1033 }
1034 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1035 #else
1036 void tracing_snapshot(void)
1037 {
1038         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1039 }
1040 EXPORT_SYMBOL_GPL(tracing_snapshot);
1041 int tracing_alloc_snapshot(void)
1042 {
1043         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1044         return -ENODEV;
1045 }
1046 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1047 void tracing_snapshot_alloc(void)
1048 {
1049         /* Give warning */
1050         tracing_snapshot();
1051 }
1052 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1053 #endif /* CONFIG_TRACER_SNAPSHOT */
1054
1055 void tracer_tracing_off(struct trace_array *tr)
1056 {
1057         if (tr->trace_buffer.buffer)
1058                 ring_buffer_record_off(tr->trace_buffer.buffer);
1059         /*
1060          * This flag is looked at when buffers haven't been allocated
1061          * yet, or by some tracers (like irqsoff), that just want to
1062          * know if the ring buffer has been disabled, but it can handle
1063          * races of where it gets disabled but we still do a record.
1064          * As the check is in the fast path of the tracers, it is more
1065          * important to be fast than accurate.
1066          */
1067         tr->buffer_disabled = 1;
1068         /* Make the flag seen by readers */
1069         smp_wmb();
1070 }
1071
1072 /**
1073  * tracing_off - turn off tracing buffers
1074  *
1075  * This function stops the tracing buffers from recording data.
1076  * It does not disable any overhead the tracers themselves may
1077  * be causing. This function simply causes all recording to
1078  * the ring buffers to fail.
1079  */
1080 void tracing_off(void)
1081 {
1082         tracer_tracing_off(&global_trace);
1083 }
1084 EXPORT_SYMBOL_GPL(tracing_off);
1085
1086 void disable_trace_on_warning(void)
1087 {
1088         if (__disable_trace_on_warning)
1089                 tracing_off();
1090 }
1091
1092 /**
1093  * tracer_tracing_is_on - show real state of ring buffer enabled
1094  * @tr : the trace array to know if ring buffer is enabled
1095  *
1096  * Shows real state of the ring buffer if it is enabled or not.
1097  */
1098 bool tracer_tracing_is_on(struct trace_array *tr)
1099 {
1100         if (tr->trace_buffer.buffer)
1101                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1102         return !tr->buffer_disabled;
1103 }
1104
1105 /**
1106  * tracing_is_on - show state of ring buffers enabled
1107  */
1108 int tracing_is_on(void)
1109 {
1110         return tracer_tracing_is_on(&global_trace);
1111 }
1112 EXPORT_SYMBOL_GPL(tracing_is_on);
1113
1114 static int __init set_buf_size(char *str)
1115 {
1116         unsigned long buf_size;
1117
1118         if (!str)
1119                 return 0;
1120         buf_size = memparse(str, &str);
1121         /*
1122          * nr_entries can not be zero and the startup
1123          * tests require some buffer space. Therefore
1124          * ensure we have at least 4096 bytes of buffer.
1125          */
1126         trace_buf_size = max(4096UL, buf_size);
1127         return 1;
1128 }
1129 __setup("trace_buf_size=", set_buf_size);
1130
1131 static int __init set_tracing_thresh(char *str)
1132 {
1133         unsigned long threshold;
1134         int ret;
1135
1136         if (!str)
1137                 return 0;
1138         ret = kstrtoul(str, 0, &threshold);
1139         if (ret < 0)
1140                 return 0;
1141         tracing_thresh = threshold * 1000;
1142         return 1;
1143 }
1144 __setup("tracing_thresh=", set_tracing_thresh);
1145
1146 unsigned long nsecs_to_usecs(unsigned long nsecs)
1147 {
1148         return nsecs / 1000;
1149 }
1150
1151 /*
1152  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1153  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1154  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1155  * of strings in the order that the evals (enum) were defined.
1156  */
1157 #undef C
1158 #define C(a, b) b
1159
1160 /* These must match the bit postions in trace_iterator_flags */
1161 static const char *trace_options[] = {
1162         TRACE_FLAGS
1163         NULL
1164 };
1165
1166 static struct {
1167         u64 (*func)(void);
1168         const char *name;
1169         int in_ns;              /* is this clock in nanoseconds? */
1170 } trace_clocks[] = {
1171         { trace_clock_local,            "local",        1 },
1172         { trace_clock_global,           "global",       1 },
1173         { trace_clock_counter,          "counter",      0 },
1174         { trace_clock_jiffies,          "uptime",       0 },
1175         { trace_clock,                  "perf",         1 },
1176         { ktime_get_mono_fast_ns,       "mono",         1 },
1177         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1178         { ktime_get_boot_fast_ns,       "boot",         1 },
1179         ARCH_TRACE_CLOCKS
1180 };
1181
1182 bool trace_clock_in_ns(struct trace_array *tr)
1183 {
1184         if (trace_clocks[tr->clock_id].in_ns)
1185                 return true;
1186
1187         return false;
1188 }
1189
1190 /*
1191  * trace_parser_get_init - gets the buffer for trace parser
1192  */
1193 int trace_parser_get_init(struct trace_parser *parser, int size)
1194 {
1195         memset(parser, 0, sizeof(*parser));
1196
1197         parser->buffer = kmalloc(size, GFP_KERNEL);
1198         if (!parser->buffer)
1199                 return 1;
1200
1201         parser->size = size;
1202         return 0;
1203 }
1204
1205 /*
1206  * trace_parser_put - frees the buffer for trace parser
1207  */
1208 void trace_parser_put(struct trace_parser *parser)
1209 {
1210         kfree(parser->buffer);
1211         parser->buffer = NULL;
1212 }
1213
1214 /*
1215  * trace_get_user - reads the user input string separated by  space
1216  * (matched by isspace(ch))
1217  *
1218  * For each string found the 'struct trace_parser' is updated,
1219  * and the function returns.
1220  *
1221  * Returns number of bytes read.
1222  *
1223  * See kernel/trace/trace.h for 'struct trace_parser' details.
1224  */
1225 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1226         size_t cnt, loff_t *ppos)
1227 {
1228         char ch;
1229         size_t read = 0;
1230         ssize_t ret;
1231
1232         if (!*ppos)
1233                 trace_parser_clear(parser);
1234
1235         ret = get_user(ch, ubuf++);
1236         if (ret)
1237                 goto out;
1238
1239         read++;
1240         cnt--;
1241
1242         /*
1243          * The parser is not finished with the last write,
1244          * continue reading the user input without skipping spaces.
1245          */
1246         if (!parser->cont) {
1247                 /* skip white space */
1248                 while (cnt && isspace(ch)) {
1249                         ret = get_user(ch, ubuf++);
1250                         if (ret)
1251                                 goto out;
1252                         read++;
1253                         cnt--;
1254                 }
1255
1256                 parser->idx = 0;
1257
1258                 /* only spaces were written */
1259                 if (isspace(ch) || !ch) {
1260                         *ppos += read;
1261                         ret = read;
1262                         goto out;
1263                 }
1264         }
1265
1266         /* read the non-space input */
1267         while (cnt && !isspace(ch) && ch) {
1268                 if (parser->idx < parser->size - 1)
1269                         parser->buffer[parser->idx++] = ch;
1270                 else {
1271                         ret = -EINVAL;
1272                         goto out;
1273                 }
1274                 ret = get_user(ch, ubuf++);
1275                 if (ret)
1276                         goto out;
1277                 read++;
1278                 cnt--;
1279         }
1280
1281         /* We either got finished input or we have to wait for another call. */
1282         if (isspace(ch) || !ch) {
1283                 parser->buffer[parser->idx] = 0;
1284                 parser->cont = false;
1285         } else if (parser->idx < parser->size - 1) {
1286                 parser->cont = true;
1287                 parser->buffer[parser->idx++] = ch;
1288                 /* Make sure the parsed string always terminates with '\0'. */
1289                 parser->buffer[parser->idx] = 0;
1290         } else {
1291                 ret = -EINVAL;
1292                 goto out;
1293         }
1294
1295         *ppos += read;
1296         ret = read;
1297
1298 out:
1299         return ret;
1300 }
1301
1302 /* TODO add a seq_buf_to_buffer() */
1303 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1304 {
1305         int len;
1306
1307         if (trace_seq_used(s) <= s->seq.readpos)
1308                 return -EBUSY;
1309
1310         len = trace_seq_used(s) - s->seq.readpos;
1311         if (cnt > len)
1312                 cnt = len;
1313         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1314
1315         s->seq.readpos += cnt;
1316         return cnt;
1317 }
1318
1319 unsigned long __read_mostly     tracing_thresh;
1320
1321 #ifdef CONFIG_TRACER_MAX_TRACE
1322 /*
1323  * Copy the new maximum trace into the separate maximum-trace
1324  * structure. (this way the maximum trace is permanently saved,
1325  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1326  */
1327 static void
1328 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1329 {
1330         struct trace_buffer *trace_buf = &tr->trace_buffer;
1331         struct trace_buffer *max_buf = &tr->max_buffer;
1332         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1333         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1334
1335         max_buf->cpu = cpu;
1336         max_buf->time_start = data->preempt_timestamp;
1337
1338         max_data->saved_latency = tr->max_latency;
1339         max_data->critical_start = data->critical_start;
1340         max_data->critical_end = data->critical_end;
1341
1342         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1343         max_data->pid = tsk->pid;
1344         /*
1345          * If tsk == current, then use current_uid(), as that does not use
1346          * RCU. The irq tracer can be called out of RCU scope.
1347          */
1348         if (tsk == current)
1349                 max_data->uid = current_uid();
1350         else
1351                 max_data->uid = task_uid(tsk);
1352
1353         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1354         max_data->policy = tsk->policy;
1355         max_data->rt_priority = tsk->rt_priority;
1356
1357         /* record this tasks comm */
1358         tracing_record_cmdline(tsk);
1359 }
1360
1361 /**
1362  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1363  * @tr: tracer
1364  * @tsk: the task with the latency
1365  * @cpu: The cpu that initiated the trace.
1366  *
1367  * Flip the buffers between the @tr and the max_tr and record information
1368  * about which task was the cause of this latency.
1369  */
1370 void
1371 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1372 {
1373         if (tr->stop_count)
1374                 return;
1375
1376         WARN_ON_ONCE(!irqs_disabled());
1377
1378         if (!tr->allocated_snapshot) {
1379                 /* Only the nop tracer should hit this when disabling */
1380                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1381                 return;
1382         }
1383
1384         arch_spin_lock(&tr->max_lock);
1385
1386         /* Inherit the recordable setting from trace_buffer */
1387         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1388                 ring_buffer_record_on(tr->max_buffer.buffer);
1389         else
1390                 ring_buffer_record_off(tr->max_buffer.buffer);
1391
1392         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1393
1394         __update_max_tr(tr, tsk, cpu);
1395         arch_spin_unlock(&tr->max_lock);
1396 }
1397
1398 /**
1399  * update_max_tr_single - only copy one trace over, and reset the rest
1400  * @tr - tracer
1401  * @tsk - task with the latency
1402  * @cpu - the cpu of the buffer to copy.
1403  *
1404  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1405  */
1406 void
1407 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1408 {
1409         int ret;
1410
1411         if (tr->stop_count)
1412                 return;
1413
1414         WARN_ON_ONCE(!irqs_disabled());
1415         if (!tr->allocated_snapshot) {
1416                 /* Only the nop tracer should hit this when disabling */
1417                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1418                 return;
1419         }
1420
1421         arch_spin_lock(&tr->max_lock);
1422
1423         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1424
1425         if (ret == -EBUSY) {
1426                 /*
1427                  * We failed to swap the buffer due to a commit taking
1428                  * place on this CPU. We fail to record, but we reset
1429                  * the max trace buffer (no one writes directly to it)
1430                  * and flag that it failed.
1431                  */
1432                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1433                         "Failed to swap buffers due to commit in progress\n");
1434         }
1435
1436         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1437
1438         __update_max_tr(tr, tsk, cpu);
1439         arch_spin_unlock(&tr->max_lock);
1440 }
1441 #endif /* CONFIG_TRACER_MAX_TRACE */
1442
1443 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1444 {
1445         /* Iterators are static, they should be filled or empty */
1446         if (trace_buffer_iter(iter, iter->cpu_file))
1447                 return 0;
1448
1449         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1450                                 full);
1451 }
1452
1453 #ifdef CONFIG_FTRACE_STARTUP_TEST
1454 static bool selftests_can_run;
1455
1456 struct trace_selftests {
1457         struct list_head                list;
1458         struct tracer                   *type;
1459 };
1460
1461 static LIST_HEAD(postponed_selftests);
1462
1463 static int save_selftest(struct tracer *type)
1464 {
1465         struct trace_selftests *selftest;
1466
1467         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1468         if (!selftest)
1469                 return -ENOMEM;
1470
1471         selftest->type = type;
1472         list_add(&selftest->list, &postponed_selftests);
1473         return 0;
1474 }
1475
1476 static int run_tracer_selftest(struct tracer *type)
1477 {
1478         struct trace_array *tr = &global_trace;
1479         struct tracer *saved_tracer = tr->current_trace;
1480         int ret;
1481
1482         if (!type->selftest || tracing_selftest_disabled)
1483                 return 0;
1484
1485         /*
1486          * If a tracer registers early in boot up (before scheduling is
1487          * initialized and such), then do not run its selftests yet.
1488          * Instead, run it a little later in the boot process.
1489          */
1490         if (!selftests_can_run)
1491                 return save_selftest(type);
1492
1493         /*
1494          * Run a selftest on this tracer.
1495          * Here we reset the trace buffer, and set the current
1496          * tracer to be this tracer. The tracer can then run some
1497          * internal tracing to verify that everything is in order.
1498          * If we fail, we do not register this tracer.
1499          */
1500         tracing_reset_online_cpus(&tr->trace_buffer);
1501
1502         tr->current_trace = type;
1503
1504 #ifdef CONFIG_TRACER_MAX_TRACE
1505         if (type->use_max_tr) {
1506                 /* If we expanded the buffers, make sure the max is expanded too */
1507                 if (ring_buffer_expanded)
1508                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1509                                            RING_BUFFER_ALL_CPUS);
1510                 tr->allocated_snapshot = true;
1511         }
1512 #endif
1513
1514         /* the test is responsible for initializing and enabling */
1515         pr_info("Testing tracer %s: ", type->name);
1516         ret = type->selftest(type, tr);
1517         /* the test is responsible for resetting too */
1518         tr->current_trace = saved_tracer;
1519         if (ret) {
1520                 printk(KERN_CONT "FAILED!\n");
1521                 /* Add the warning after printing 'FAILED' */
1522                 WARN_ON(1);
1523                 return -1;
1524         }
1525         /* Only reset on passing, to avoid touching corrupted buffers */
1526         tracing_reset_online_cpus(&tr->trace_buffer);
1527
1528 #ifdef CONFIG_TRACER_MAX_TRACE
1529         if (type->use_max_tr) {
1530                 tr->allocated_snapshot = false;
1531
1532                 /* Shrink the max buffer again */
1533                 if (ring_buffer_expanded)
1534                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1535                                            RING_BUFFER_ALL_CPUS);
1536         }
1537 #endif
1538
1539         printk(KERN_CONT "PASSED\n");
1540         return 0;
1541 }
1542
1543 static __init int init_trace_selftests(void)
1544 {
1545         struct trace_selftests *p, *n;
1546         struct tracer *t, **last;
1547         int ret;
1548
1549         selftests_can_run = true;
1550
1551         mutex_lock(&trace_types_lock);
1552
1553         if (list_empty(&postponed_selftests))
1554                 goto out;
1555
1556         pr_info("Running postponed tracer tests:\n");
1557
1558         tracing_selftest_running = true;
1559         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1560                 ret = run_tracer_selftest(p->type);
1561                 /* If the test fails, then warn and remove from available_tracers */
1562                 if (ret < 0) {
1563                         WARN(1, "tracer: %s failed selftest, disabling\n",
1564                              p->type->name);
1565                         last = &trace_types;
1566                         for (t = trace_types; t; t = t->next) {
1567                                 if (t == p->type) {
1568                                         *last = t->next;
1569                                         break;
1570                                 }
1571                                 last = &t->next;
1572                         }
1573                 }
1574                 list_del(&p->list);
1575                 kfree(p);
1576         }
1577         tracing_selftest_running = false;
1578
1579  out:
1580         mutex_unlock(&trace_types_lock);
1581
1582         return 0;
1583 }
1584 core_initcall(init_trace_selftests);
1585 #else
1586 static inline int run_tracer_selftest(struct tracer *type)
1587 {
1588         return 0;
1589 }
1590 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1591
1592 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1593
1594 static void __init apply_trace_boot_options(void);
1595
1596 /**
1597  * register_tracer - register a tracer with the ftrace system.
1598  * @type - the plugin for the tracer
1599  *
1600  * Register a new plugin tracer.
1601  */
1602 int __init register_tracer(struct tracer *type)
1603 {
1604         struct tracer *t;
1605         int ret = 0;
1606
1607         if (!type->name) {
1608                 pr_info("Tracer must have a name\n");
1609                 return -1;
1610         }
1611
1612         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1613                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1614                 return -1;
1615         }
1616
1617         mutex_lock(&trace_types_lock);
1618
1619         tracing_selftest_running = true;
1620
1621         for (t = trace_types; t; t = t->next) {
1622                 if (strcmp(type->name, t->name) == 0) {
1623                         /* already found */
1624                         pr_info("Tracer %s already registered\n",
1625                                 type->name);
1626                         ret = -1;
1627                         goto out;
1628                 }
1629         }
1630
1631         if (!type->set_flag)
1632                 type->set_flag = &dummy_set_flag;
1633         if (!type->flags) {
1634                 /*allocate a dummy tracer_flags*/
1635                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1636                 if (!type->flags) {
1637                         ret = -ENOMEM;
1638                         goto out;
1639                 }
1640                 type->flags->val = 0;
1641                 type->flags->opts = dummy_tracer_opt;
1642         } else
1643                 if (!type->flags->opts)
1644                         type->flags->opts = dummy_tracer_opt;
1645
1646         /* store the tracer for __set_tracer_option */
1647         type->flags->trace = type;
1648
1649         ret = run_tracer_selftest(type);
1650         if (ret < 0)
1651                 goto out;
1652
1653         type->next = trace_types;
1654         trace_types = type;
1655         add_tracer_options(&global_trace, type);
1656
1657  out:
1658         tracing_selftest_running = false;
1659         mutex_unlock(&trace_types_lock);
1660
1661         if (ret || !default_bootup_tracer)
1662                 goto out_unlock;
1663
1664         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1665                 goto out_unlock;
1666
1667         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1668         /* Do we want this tracer to start on bootup? */
1669         tracing_set_tracer(&global_trace, type->name);
1670         default_bootup_tracer = NULL;
1671
1672         apply_trace_boot_options();
1673
1674         /* disable other selftests, since this will break it. */
1675         tracing_selftest_disabled = true;
1676 #ifdef CONFIG_FTRACE_STARTUP_TEST
1677         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1678                type->name);
1679 #endif
1680
1681  out_unlock:
1682         return ret;
1683 }
1684
1685 void tracing_reset(struct trace_buffer *buf, int cpu)
1686 {
1687         struct ring_buffer *buffer = buf->buffer;
1688
1689         if (!buffer)
1690                 return;
1691
1692         ring_buffer_record_disable(buffer);
1693
1694         /* Make sure all commits have finished */
1695         synchronize_sched();
1696         ring_buffer_reset_cpu(buffer, cpu);
1697
1698         ring_buffer_record_enable(buffer);
1699 }
1700
1701 void tracing_reset_online_cpus(struct trace_buffer *buf)
1702 {
1703         struct ring_buffer *buffer = buf->buffer;
1704         int cpu;
1705
1706         if (!buffer)
1707                 return;
1708
1709         ring_buffer_record_disable(buffer);
1710
1711         /* Make sure all commits have finished */
1712         synchronize_sched();
1713
1714         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1715
1716         for_each_online_cpu(cpu)
1717                 ring_buffer_reset_cpu(buffer, cpu);
1718
1719         ring_buffer_record_enable(buffer);
1720 }
1721
1722 /* Must have trace_types_lock held */
1723 void tracing_reset_all_online_cpus(void)
1724 {
1725         struct trace_array *tr;
1726
1727         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1728                 if (!tr->clear_trace)
1729                         continue;
1730                 tr->clear_trace = false;
1731                 tracing_reset_online_cpus(&tr->trace_buffer);
1732 #ifdef CONFIG_TRACER_MAX_TRACE
1733                 tracing_reset_online_cpus(&tr->max_buffer);
1734 #endif
1735         }
1736 }
1737
1738 /*
1739  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
1740  * is the tgid last observed corresponding to pid=i.
1741  */
1742 static int *tgid_map;
1743
1744 /* The maximum valid index into tgid_map. */
1745 static size_t tgid_map_max;
1746
1747 #define SAVED_CMDLINES_DEFAULT 128
1748 #define NO_CMDLINE_MAP UINT_MAX
1749 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1750 struct saved_cmdlines_buffer {
1751         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1752         unsigned *map_cmdline_to_pid;
1753         unsigned cmdline_num;
1754         int cmdline_idx;
1755         char *saved_cmdlines;
1756 };
1757 static struct saved_cmdlines_buffer *savedcmd;
1758
1759 static inline char *get_saved_cmdlines(int idx)
1760 {
1761         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1762 }
1763
1764 static inline void set_cmdline(int idx, const char *cmdline)
1765 {
1766         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1767 }
1768
1769 static int allocate_cmdlines_buffer(unsigned int val,
1770                                     struct saved_cmdlines_buffer *s)
1771 {
1772         s->map_cmdline_to_pid = kmalloc_array(val,
1773                                               sizeof(*s->map_cmdline_to_pid),
1774                                               GFP_KERNEL);
1775         if (!s->map_cmdline_to_pid)
1776                 return -ENOMEM;
1777
1778         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1779         if (!s->saved_cmdlines) {
1780                 kfree(s->map_cmdline_to_pid);
1781                 return -ENOMEM;
1782         }
1783
1784         s->cmdline_idx = 0;
1785         s->cmdline_num = val;
1786         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1787                sizeof(s->map_pid_to_cmdline));
1788         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1789                val * sizeof(*s->map_cmdline_to_pid));
1790
1791         return 0;
1792 }
1793
1794 static int trace_create_savedcmd(void)
1795 {
1796         int ret;
1797
1798         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1799         if (!savedcmd)
1800                 return -ENOMEM;
1801
1802         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1803         if (ret < 0) {
1804                 kfree(savedcmd);
1805                 savedcmd = NULL;
1806                 return -ENOMEM;
1807         }
1808
1809         return 0;
1810 }
1811
1812 int is_tracing_stopped(void)
1813 {
1814         return global_trace.stop_count;
1815 }
1816
1817 /**
1818  * tracing_start - quick start of the tracer
1819  *
1820  * If tracing is enabled but was stopped by tracing_stop,
1821  * this will start the tracer back up.
1822  */
1823 void tracing_start(void)
1824 {
1825         struct ring_buffer *buffer;
1826         unsigned long flags;
1827
1828         if (tracing_disabled)
1829                 return;
1830
1831         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1832         if (--global_trace.stop_count) {
1833                 if (global_trace.stop_count < 0) {
1834                         /* Someone screwed up their debugging */
1835                         WARN_ON_ONCE(1);
1836                         global_trace.stop_count = 0;
1837                 }
1838                 goto out;
1839         }
1840
1841         /* Prevent the buffers from switching */
1842         arch_spin_lock(&global_trace.max_lock);
1843
1844         buffer = global_trace.trace_buffer.buffer;
1845         if (buffer)
1846                 ring_buffer_record_enable(buffer);
1847
1848 #ifdef CONFIG_TRACER_MAX_TRACE
1849         buffer = global_trace.max_buffer.buffer;
1850         if (buffer)
1851                 ring_buffer_record_enable(buffer);
1852 #endif
1853
1854         arch_spin_unlock(&global_trace.max_lock);
1855
1856  out:
1857         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1858 }
1859
1860 static void tracing_start_tr(struct trace_array *tr)
1861 {
1862         struct ring_buffer *buffer;
1863         unsigned long flags;
1864
1865         if (tracing_disabled)
1866                 return;
1867
1868         /* If global, we need to also start the max tracer */
1869         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1870                 return tracing_start();
1871
1872         raw_spin_lock_irqsave(&tr->start_lock, flags);
1873
1874         if (--tr->stop_count) {
1875                 if (tr->stop_count < 0) {
1876                         /* Someone screwed up their debugging */
1877                         WARN_ON_ONCE(1);
1878                         tr->stop_count = 0;
1879                 }
1880                 goto out;
1881         }
1882
1883         buffer = tr->trace_buffer.buffer;
1884         if (buffer)
1885                 ring_buffer_record_enable(buffer);
1886
1887  out:
1888         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1889 }
1890
1891 /**
1892  * tracing_stop - quick stop of the tracer
1893  *
1894  * Light weight way to stop tracing. Use in conjunction with
1895  * tracing_start.
1896  */
1897 void tracing_stop(void)
1898 {
1899         struct ring_buffer *buffer;
1900         unsigned long flags;
1901
1902         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1903         if (global_trace.stop_count++)
1904                 goto out;
1905
1906         /* Prevent the buffers from switching */
1907         arch_spin_lock(&global_trace.max_lock);
1908
1909         buffer = global_trace.trace_buffer.buffer;
1910         if (buffer)
1911                 ring_buffer_record_disable(buffer);
1912
1913 #ifdef CONFIG_TRACER_MAX_TRACE
1914         buffer = global_trace.max_buffer.buffer;
1915         if (buffer)
1916                 ring_buffer_record_disable(buffer);
1917 #endif
1918
1919         arch_spin_unlock(&global_trace.max_lock);
1920
1921  out:
1922         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1923 }
1924
1925 static void tracing_stop_tr(struct trace_array *tr)
1926 {
1927         struct ring_buffer *buffer;
1928         unsigned long flags;
1929
1930         /* If global, we need to also stop the max tracer */
1931         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1932                 return tracing_stop();
1933
1934         raw_spin_lock_irqsave(&tr->start_lock, flags);
1935         if (tr->stop_count++)
1936                 goto out;
1937
1938         buffer = tr->trace_buffer.buffer;
1939         if (buffer)
1940                 ring_buffer_record_disable(buffer);
1941
1942  out:
1943         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1944 }
1945
1946 static int trace_save_cmdline(struct task_struct *tsk)
1947 {
1948         unsigned tpid, idx;
1949
1950         /* treat recording of idle task as a success */
1951         if (!tsk->pid)
1952                 return 1;
1953
1954         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1955
1956         /*
1957          * It's not the end of the world if we don't get
1958          * the lock, but we also don't want to spin
1959          * nor do we want to disable interrupts,
1960          * so if we miss here, then better luck next time.
1961          */
1962         if (!arch_spin_trylock(&trace_cmdline_lock))
1963                 return 0;
1964
1965         idx = savedcmd->map_pid_to_cmdline[tpid];
1966         if (idx == NO_CMDLINE_MAP) {
1967                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1968
1969                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1970                 savedcmd->cmdline_idx = idx;
1971         }
1972
1973         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1974         set_cmdline(idx, tsk->comm);
1975
1976         arch_spin_unlock(&trace_cmdline_lock);
1977
1978         return 1;
1979 }
1980
1981 static void __trace_find_cmdline(int pid, char comm[])
1982 {
1983         unsigned map;
1984         int tpid;
1985
1986         if (!pid) {
1987                 strcpy(comm, "<idle>");
1988                 return;
1989         }
1990
1991         if (WARN_ON_ONCE(pid < 0)) {
1992                 strcpy(comm, "<XXX>");
1993                 return;
1994         }
1995
1996         tpid = pid & (PID_MAX_DEFAULT - 1);
1997         map = savedcmd->map_pid_to_cmdline[tpid];
1998         if (map != NO_CMDLINE_MAP) {
1999                 tpid = savedcmd->map_cmdline_to_pid[map];
2000                 if (tpid == pid) {
2001                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2002                         return;
2003                 }
2004         }
2005         strcpy(comm, "<...>");
2006 }
2007
2008 void trace_find_cmdline(int pid, char comm[])
2009 {
2010         preempt_disable();
2011         arch_spin_lock(&trace_cmdline_lock);
2012
2013         __trace_find_cmdline(pid, comm);
2014
2015         arch_spin_unlock(&trace_cmdline_lock);
2016         preempt_enable();
2017 }
2018
2019 static int *trace_find_tgid_ptr(int pid)
2020 {
2021         /*
2022          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2023          * if we observe a non-NULL tgid_map then we also observe the correct
2024          * tgid_map_max.
2025          */
2026         int *map = smp_load_acquire(&tgid_map);
2027
2028         if (unlikely(!map || pid > tgid_map_max))
2029                 return NULL;
2030
2031         return &map[pid];
2032 }
2033
2034 int trace_find_tgid(int pid)
2035 {
2036         int *ptr = trace_find_tgid_ptr(pid);
2037
2038         return ptr ? *ptr : 0;
2039 }
2040
2041 static int trace_save_tgid(struct task_struct *tsk)
2042 {
2043         int *ptr;
2044
2045         /* treat recording of idle task as a success */
2046         if (!tsk->pid)
2047                 return 1;
2048
2049         ptr = trace_find_tgid_ptr(tsk->pid);
2050         if (!ptr)
2051                 return 0;
2052
2053         *ptr = tsk->tgid;
2054         return 1;
2055 }
2056
2057 static bool tracing_record_taskinfo_skip(int flags)
2058 {
2059         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2060                 return true;
2061         if (!__this_cpu_read(trace_taskinfo_save))
2062                 return true;
2063         return false;
2064 }
2065
2066 /**
2067  * tracing_record_taskinfo - record the task info of a task
2068  *
2069  * @task  - task to record
2070  * @flags - TRACE_RECORD_CMDLINE for recording comm
2071  *        - TRACE_RECORD_TGID for recording tgid
2072  */
2073 void tracing_record_taskinfo(struct task_struct *task, int flags)
2074 {
2075         bool done;
2076
2077         if (tracing_record_taskinfo_skip(flags))
2078                 return;
2079
2080         /*
2081          * Record as much task information as possible. If some fail, continue
2082          * to try to record the others.
2083          */
2084         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2085         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2086
2087         /* If recording any information failed, retry again soon. */
2088         if (!done)
2089                 return;
2090
2091         __this_cpu_write(trace_taskinfo_save, false);
2092 }
2093
2094 /**
2095  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2096  *
2097  * @prev - previous task during sched_switch
2098  * @next - next task during sched_switch
2099  * @flags - TRACE_RECORD_CMDLINE for recording comm
2100  *          TRACE_RECORD_TGID for recording tgid
2101  */
2102 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2103                                           struct task_struct *next, int flags)
2104 {
2105         bool done;
2106
2107         if (tracing_record_taskinfo_skip(flags))
2108                 return;
2109
2110         /*
2111          * Record as much task information as possible. If some fail, continue
2112          * to try to record the others.
2113          */
2114         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2115         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2116         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2117         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2118
2119         /* If recording any information failed, retry again soon. */
2120         if (!done)
2121                 return;
2122
2123         __this_cpu_write(trace_taskinfo_save, false);
2124 }
2125
2126 /* Helpers to record a specific task information */
2127 void tracing_record_cmdline(struct task_struct *task)
2128 {
2129         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2130 }
2131
2132 void tracing_record_tgid(struct task_struct *task)
2133 {
2134         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2135 }
2136
2137 /*
2138  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2139  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2140  * simplifies those functions and keeps them in sync.
2141  */
2142 enum print_line_t trace_handle_return(struct trace_seq *s)
2143 {
2144         return trace_seq_has_overflowed(s) ?
2145                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2146 }
2147 EXPORT_SYMBOL_GPL(trace_handle_return);
2148
2149 void
2150 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2151                              int pc)
2152 {
2153         struct task_struct *tsk = current;
2154
2155         entry->preempt_count            = pc & 0xff;
2156         entry->pid                      = (tsk) ? tsk->pid : 0;
2157         entry->flags =
2158 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2159                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2160 #else
2161                 TRACE_FLAG_IRQS_NOSUPPORT |
2162 #endif
2163                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2164                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2165                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2166                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2167                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2168 }
2169 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2170
2171 struct ring_buffer_event *
2172 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2173                           int type,
2174                           unsigned long len,
2175                           unsigned long flags, int pc)
2176 {
2177         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2178 }
2179
2180 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2181 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2182 static int trace_buffered_event_ref;
2183
2184 /**
2185  * trace_buffered_event_enable - enable buffering events
2186  *
2187  * When events are being filtered, it is quicker to use a temporary
2188  * buffer to write the event data into if there's a likely chance
2189  * that it will not be committed. The discard of the ring buffer
2190  * is not as fast as committing, and is much slower than copying
2191  * a commit.
2192  *
2193  * When an event is to be filtered, allocate per cpu buffers to
2194  * write the event data into, and if the event is filtered and discarded
2195  * it is simply dropped, otherwise, the entire data is to be committed
2196  * in one shot.
2197  */
2198 void trace_buffered_event_enable(void)
2199 {
2200         struct ring_buffer_event *event;
2201         struct page *page;
2202         int cpu;
2203
2204         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2205
2206         if (trace_buffered_event_ref++)
2207                 return;
2208
2209         for_each_tracing_cpu(cpu) {
2210                 page = alloc_pages_node(cpu_to_node(cpu),
2211                                         GFP_KERNEL | __GFP_NORETRY, 0);
2212                 if (!page)
2213                         goto failed;
2214
2215                 event = page_address(page);
2216                 memset(event, 0, sizeof(*event));
2217
2218                 per_cpu(trace_buffered_event, cpu) = event;
2219
2220                 preempt_disable();
2221                 if (cpu == smp_processor_id() &&
2222                     this_cpu_read(trace_buffered_event) !=
2223                     per_cpu(trace_buffered_event, cpu))
2224                         WARN_ON_ONCE(1);
2225                 preempt_enable();
2226         }
2227
2228         return;
2229  failed:
2230         trace_buffered_event_disable();
2231 }
2232
2233 static void enable_trace_buffered_event(void *data)
2234 {
2235         /* Probably not needed, but do it anyway */
2236         smp_rmb();
2237         this_cpu_dec(trace_buffered_event_cnt);
2238 }
2239
2240 static void disable_trace_buffered_event(void *data)
2241 {
2242         this_cpu_inc(trace_buffered_event_cnt);
2243 }
2244
2245 /**
2246  * trace_buffered_event_disable - disable buffering events
2247  *
2248  * When a filter is removed, it is faster to not use the buffered
2249  * events, and to commit directly into the ring buffer. Free up
2250  * the temp buffers when there are no more users. This requires
2251  * special synchronization with current events.
2252  */
2253 void trace_buffered_event_disable(void)
2254 {
2255         int cpu;
2256
2257         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2258
2259         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2260                 return;
2261
2262         if (--trace_buffered_event_ref)
2263                 return;
2264
2265         preempt_disable();
2266         /* For each CPU, set the buffer as used. */
2267         smp_call_function_many(tracing_buffer_mask,
2268                                disable_trace_buffered_event, NULL, 1);
2269         preempt_enable();
2270
2271         /* Wait for all current users to finish */
2272         synchronize_sched();
2273
2274         for_each_tracing_cpu(cpu) {
2275                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2276                 per_cpu(trace_buffered_event, cpu) = NULL;
2277         }
2278         /*
2279          * Make sure trace_buffered_event is NULL before clearing
2280          * trace_buffered_event_cnt.
2281          */
2282         smp_wmb();
2283
2284         preempt_disable();
2285         /* Do the work on each cpu */
2286         smp_call_function_many(tracing_buffer_mask,
2287                                enable_trace_buffered_event, NULL, 1);
2288         preempt_enable();
2289 }
2290
2291 static struct ring_buffer *temp_buffer;
2292
2293 struct ring_buffer_event *
2294 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2295                           struct trace_event_file *trace_file,
2296                           int type, unsigned long len,
2297                           unsigned long flags, int pc)
2298 {
2299         struct ring_buffer_event *entry;
2300         int val;
2301
2302         *current_rb = trace_file->tr->trace_buffer.buffer;
2303
2304         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2305              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2306             (entry = this_cpu_read(trace_buffered_event))) {
2307                 /* Try to use the per cpu buffer first */
2308                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2309                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2310                         trace_event_setup(entry, type, flags, pc);
2311                         entry->array[0] = len;
2312                         return entry;
2313                 }
2314                 this_cpu_dec(trace_buffered_event_cnt);
2315         }
2316
2317         entry = __trace_buffer_lock_reserve(*current_rb,
2318                                             type, len, flags, pc);
2319         /*
2320          * If tracing is off, but we have triggers enabled
2321          * we still need to look at the event data. Use the temp_buffer
2322          * to store the trace event for the tigger to use. It's recusive
2323          * safe and will not be recorded anywhere.
2324          */
2325         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2326                 *current_rb = temp_buffer;
2327                 entry = __trace_buffer_lock_reserve(*current_rb,
2328                                                     type, len, flags, pc);
2329         }
2330         return entry;
2331 }
2332 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2333
2334 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2335 static DEFINE_MUTEX(tracepoint_printk_mutex);
2336
2337 static void output_printk(struct trace_event_buffer *fbuffer)
2338 {
2339         struct trace_event_call *event_call;
2340         struct trace_event *event;
2341         unsigned long flags;
2342         struct trace_iterator *iter = tracepoint_print_iter;
2343
2344         /* We should never get here if iter is NULL */
2345         if (WARN_ON_ONCE(!iter))
2346                 return;
2347
2348         event_call = fbuffer->trace_file->event_call;
2349         if (!event_call || !event_call->event.funcs ||
2350             !event_call->event.funcs->trace)
2351                 return;
2352
2353         event = &fbuffer->trace_file->event_call->event;
2354
2355         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2356         trace_seq_init(&iter->seq);
2357         iter->ent = fbuffer->entry;
2358         event_call->event.funcs->trace(iter, 0, event);
2359         trace_seq_putc(&iter->seq, 0);
2360         printk("%s", iter->seq.buffer);
2361
2362         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2363 }
2364
2365 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2366                              void __user *buffer, size_t *lenp,
2367                              loff_t *ppos)
2368 {
2369         int save_tracepoint_printk;
2370         int ret;
2371
2372         mutex_lock(&tracepoint_printk_mutex);
2373         save_tracepoint_printk = tracepoint_printk;
2374
2375         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2376
2377         /*
2378          * This will force exiting early, as tracepoint_printk
2379          * is always zero when tracepoint_printk_iter is not allocated
2380          */
2381         if (!tracepoint_print_iter)
2382                 tracepoint_printk = 0;
2383
2384         if (save_tracepoint_printk == tracepoint_printk)
2385                 goto out;
2386
2387         if (tracepoint_printk)
2388                 static_key_enable(&tracepoint_printk_key.key);
2389         else
2390                 static_key_disable(&tracepoint_printk_key.key);
2391
2392  out:
2393         mutex_unlock(&tracepoint_printk_mutex);
2394
2395         return ret;
2396 }
2397
2398 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2399 {
2400         if (static_key_false(&tracepoint_printk_key.key))
2401                 output_printk(fbuffer);
2402
2403         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2404                                     fbuffer->event, fbuffer->entry,
2405                                     fbuffer->flags, fbuffer->pc);
2406 }
2407 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2408
2409 /*
2410  * Skip 3:
2411  *
2412  *   trace_buffer_unlock_commit_regs()
2413  *   trace_event_buffer_commit()
2414  *   trace_event_raw_event_xxx()
2415  */
2416 # define STACK_SKIP 3
2417
2418 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2419                                      struct ring_buffer *buffer,
2420                                      struct ring_buffer_event *event,
2421                                      unsigned long flags, int pc,
2422                                      struct pt_regs *regs)
2423 {
2424         __buffer_unlock_commit(buffer, event);
2425
2426         /*
2427          * If regs is not set, then skip the necessary functions.
2428          * Note, we can still get here via blktrace, wakeup tracer
2429          * and mmiotrace, but that's ok if they lose a function or
2430          * two. They are not that meaningful.
2431          */
2432         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2433         ftrace_trace_userstack(tr, buffer, flags, pc);
2434 }
2435
2436 /*
2437  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2438  */
2439 void
2440 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2441                                    struct ring_buffer_event *event)
2442 {
2443         __buffer_unlock_commit(buffer, event);
2444 }
2445
2446 static void
2447 trace_process_export(struct trace_export *export,
2448                struct ring_buffer_event *event)
2449 {
2450         struct trace_entry *entry;
2451         unsigned int size = 0;
2452
2453         entry = ring_buffer_event_data(event);
2454         size = ring_buffer_event_length(event);
2455         export->write(export, entry, size);
2456 }
2457
2458 static DEFINE_MUTEX(ftrace_export_lock);
2459
2460 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2461
2462 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2463
2464 static inline void ftrace_exports_enable(void)
2465 {
2466         static_branch_enable(&ftrace_exports_enabled);
2467 }
2468
2469 static inline void ftrace_exports_disable(void)
2470 {
2471         static_branch_disable(&ftrace_exports_enabled);
2472 }
2473
2474 void ftrace_exports(struct ring_buffer_event *event)
2475 {
2476         struct trace_export *export;
2477
2478         preempt_disable_notrace();
2479
2480         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2481         while (export) {
2482                 trace_process_export(export, event);
2483                 export = rcu_dereference_raw_notrace(export->next);
2484         }
2485
2486         preempt_enable_notrace();
2487 }
2488
2489 static inline void
2490 add_trace_export(struct trace_export **list, struct trace_export *export)
2491 {
2492         rcu_assign_pointer(export->next, *list);
2493         /*
2494          * We are entering export into the list but another
2495          * CPU might be walking that list. We need to make sure
2496          * the export->next pointer is valid before another CPU sees
2497          * the export pointer included into the list.
2498          */
2499         rcu_assign_pointer(*list, export);
2500 }
2501
2502 static inline int
2503 rm_trace_export(struct trace_export **list, struct trace_export *export)
2504 {
2505         struct trace_export **p;
2506
2507         for (p = list; *p != NULL; p = &(*p)->next)
2508                 if (*p == export)
2509                         break;
2510
2511         if (*p != export)
2512                 return -1;
2513
2514         rcu_assign_pointer(*p, (*p)->next);
2515
2516         return 0;
2517 }
2518
2519 static inline void
2520 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2521 {
2522         if (*list == NULL)
2523                 ftrace_exports_enable();
2524
2525         add_trace_export(list, export);
2526 }
2527
2528 static inline int
2529 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2530 {
2531         int ret;
2532
2533         ret = rm_trace_export(list, export);
2534         if (*list == NULL)
2535                 ftrace_exports_disable();
2536
2537         return ret;
2538 }
2539
2540 int register_ftrace_export(struct trace_export *export)
2541 {
2542         if (WARN_ON_ONCE(!export->write))
2543                 return -1;
2544
2545         mutex_lock(&ftrace_export_lock);
2546
2547         add_ftrace_export(&ftrace_exports_list, export);
2548
2549         mutex_unlock(&ftrace_export_lock);
2550
2551         return 0;
2552 }
2553 EXPORT_SYMBOL_GPL(register_ftrace_export);
2554
2555 int unregister_ftrace_export(struct trace_export *export)
2556 {
2557         int ret;
2558
2559         mutex_lock(&ftrace_export_lock);
2560
2561         ret = rm_ftrace_export(&ftrace_exports_list, export);
2562
2563         mutex_unlock(&ftrace_export_lock);
2564
2565         return ret;
2566 }
2567 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2568
2569 void
2570 trace_function(struct trace_array *tr,
2571                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2572                int pc)
2573 {
2574         struct trace_event_call *call = &event_function;
2575         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2576         struct ring_buffer_event *event;
2577         struct ftrace_entry *entry;
2578
2579         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2580                                             flags, pc);
2581         if (!event)
2582                 return;
2583         entry   = ring_buffer_event_data(event);
2584         entry->ip                       = ip;
2585         entry->parent_ip                = parent_ip;
2586
2587         if (!call_filter_check_discard(call, entry, buffer, event)) {
2588                 if (static_branch_unlikely(&ftrace_exports_enabled))
2589                         ftrace_exports(event);
2590                 __buffer_unlock_commit(buffer, event);
2591         }
2592 }
2593
2594 #ifdef CONFIG_STACKTRACE
2595
2596 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2597 struct ftrace_stack {
2598         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2599 };
2600
2601 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2602 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2603
2604 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2605                                  unsigned long flags,
2606                                  int skip, int pc, struct pt_regs *regs)
2607 {
2608         struct trace_event_call *call = &event_kernel_stack;
2609         struct ring_buffer_event *event;
2610         struct stack_entry *entry;
2611         struct stack_trace trace;
2612         int use_stack;
2613         int size = FTRACE_STACK_ENTRIES;
2614
2615         trace.nr_entries        = 0;
2616         trace.skip              = skip;
2617
2618         /*
2619          * Add one, for this function and the call to save_stack_trace()
2620          * If regs is set, then these functions will not be in the way.
2621          */
2622 #ifndef CONFIG_UNWINDER_ORC
2623         if (!regs)
2624                 trace.skip++;
2625 #endif
2626
2627         /*
2628          * Since events can happen in NMIs there's no safe way to
2629          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2630          * or NMI comes in, it will just have to use the default
2631          * FTRACE_STACK_SIZE.
2632          */
2633         preempt_disable_notrace();
2634
2635         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2636         /*
2637          * We don't need any atomic variables, just a barrier.
2638          * If an interrupt comes in, we don't care, because it would
2639          * have exited and put the counter back to what we want.
2640          * We just need a barrier to keep gcc from moving things
2641          * around.
2642          */
2643         barrier();
2644         if (use_stack == 1) {
2645                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2646                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2647
2648                 if (regs)
2649                         save_stack_trace_regs(regs, &trace);
2650                 else
2651                         save_stack_trace(&trace);
2652
2653                 if (trace.nr_entries > size)
2654                         size = trace.nr_entries;
2655         } else
2656                 /* From now on, use_stack is a boolean */
2657                 use_stack = 0;
2658
2659         size *= sizeof(unsigned long);
2660
2661         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2662                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2663                                     flags, pc);
2664         if (!event)
2665                 goto out;
2666         entry = ring_buffer_event_data(event);
2667
2668         memset(&entry->caller, 0, size);
2669
2670         if (use_stack)
2671                 memcpy(&entry->caller, trace.entries,
2672                        trace.nr_entries * sizeof(unsigned long));
2673         else {
2674                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2675                 trace.entries           = entry->caller;
2676                 if (regs)
2677                         save_stack_trace_regs(regs, &trace);
2678                 else
2679                         save_stack_trace(&trace);
2680         }
2681
2682         entry->size = trace.nr_entries;
2683
2684         if (!call_filter_check_discard(call, entry, buffer, event))
2685                 __buffer_unlock_commit(buffer, event);
2686
2687  out:
2688         /* Again, don't let gcc optimize things here */
2689         barrier();
2690         __this_cpu_dec(ftrace_stack_reserve);
2691         preempt_enable_notrace();
2692
2693 }
2694
2695 static inline void ftrace_trace_stack(struct trace_array *tr,
2696                                       struct ring_buffer *buffer,
2697                                       unsigned long flags,
2698                                       int skip, int pc, struct pt_regs *regs)
2699 {
2700         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2701                 return;
2702
2703         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2704 }
2705
2706 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2707                    int pc)
2708 {
2709         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2710
2711         if (rcu_is_watching()) {
2712                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2713                 return;
2714         }
2715
2716         /*
2717          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2718          * but if the above rcu_is_watching() failed, then the NMI
2719          * triggered someplace critical, and rcu_irq_enter() should
2720          * not be called from NMI.
2721          */
2722         if (unlikely(in_nmi()))
2723                 return;
2724
2725         rcu_irq_enter_irqson();
2726         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2727         rcu_irq_exit_irqson();
2728 }
2729
2730 /**
2731  * trace_dump_stack - record a stack back trace in the trace buffer
2732  * @skip: Number of functions to skip (helper handlers)
2733  */
2734 void trace_dump_stack(int skip)
2735 {
2736         unsigned long flags;
2737
2738         if (tracing_disabled || tracing_selftest_running)
2739                 return;
2740
2741         local_save_flags(flags);
2742
2743 #ifndef CONFIG_UNWINDER_ORC
2744         /* Skip 1 to skip this function. */
2745         skip++;
2746 #endif
2747         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2748                              flags, skip, preempt_count(), NULL);
2749 }
2750
2751 static DEFINE_PER_CPU(int, user_stack_count);
2752
2753 void
2754 ftrace_trace_userstack(struct trace_array *tr,
2755                        struct ring_buffer *buffer, unsigned long flags, int pc)
2756 {
2757         struct trace_event_call *call = &event_user_stack;
2758         struct ring_buffer_event *event;
2759         struct userstack_entry *entry;
2760         struct stack_trace trace;
2761
2762         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2763                 return;
2764
2765         /*
2766          * NMIs can not handle page faults, even with fix ups.
2767          * The save user stack can (and often does) fault.
2768          */
2769         if (unlikely(in_nmi()))
2770                 return;
2771
2772         /*
2773          * prevent recursion, since the user stack tracing may
2774          * trigger other kernel events.
2775          */
2776         preempt_disable();
2777         if (__this_cpu_read(user_stack_count))
2778                 goto out;
2779
2780         __this_cpu_inc(user_stack_count);
2781
2782         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2783                                             sizeof(*entry), flags, pc);
2784         if (!event)
2785                 goto out_drop_count;
2786         entry   = ring_buffer_event_data(event);
2787
2788         entry->tgid             = current->tgid;
2789         memset(&entry->caller, 0, sizeof(entry->caller));
2790
2791         trace.nr_entries        = 0;
2792         trace.max_entries       = FTRACE_STACK_ENTRIES;
2793         trace.skip              = 0;
2794         trace.entries           = entry->caller;
2795
2796         save_stack_trace_user(&trace);
2797         if (!call_filter_check_discard(call, entry, buffer, event))
2798                 __buffer_unlock_commit(buffer, event);
2799
2800  out_drop_count:
2801         __this_cpu_dec(user_stack_count);
2802  out:
2803         preempt_enable();
2804 }
2805
2806 #ifdef UNUSED
2807 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2808 {
2809         ftrace_trace_userstack(tr, flags, preempt_count());
2810 }
2811 #endif /* UNUSED */
2812
2813 #endif /* CONFIG_STACKTRACE */
2814
2815 /* created for use with alloc_percpu */
2816 struct trace_buffer_struct {
2817         int nesting;
2818         char buffer[4][TRACE_BUF_SIZE];
2819 };
2820
2821 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
2822
2823 /*
2824  * Thise allows for lockless recording.  If we're nested too deeply, then
2825  * this returns NULL.
2826  */
2827 static char *get_trace_buf(void)
2828 {
2829         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2830
2831         if (!trace_percpu_buffer || buffer->nesting >= 4)
2832                 return NULL;
2833
2834         buffer->nesting++;
2835
2836         /* Interrupts must see nesting incremented before we use the buffer */
2837         barrier();
2838         return &buffer->buffer[buffer->nesting - 1][0];
2839 }
2840
2841 static void put_trace_buf(void)
2842 {
2843         /* Don't let the decrement of nesting leak before this */
2844         barrier();
2845         this_cpu_dec(trace_percpu_buffer->nesting);
2846 }
2847
2848 static int alloc_percpu_trace_buffer(void)
2849 {
2850         struct trace_buffer_struct __percpu *buffers;
2851
2852         buffers = alloc_percpu(struct trace_buffer_struct);
2853         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2854                 return -ENOMEM;
2855
2856         trace_percpu_buffer = buffers;
2857         return 0;
2858 }
2859
2860 static int buffers_allocated;
2861
2862 void trace_printk_init_buffers(void)
2863 {
2864         if (buffers_allocated)
2865                 return;
2866
2867         if (alloc_percpu_trace_buffer())
2868                 return;
2869
2870         /* trace_printk() is for debug use only. Don't use it in production. */
2871
2872         pr_warn("\n");
2873         pr_warn("**********************************************************\n");
2874         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2875         pr_warn("**                                                      **\n");
2876         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2877         pr_warn("**                                                      **\n");
2878         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2879         pr_warn("** unsafe for production use.                           **\n");
2880         pr_warn("**                                                      **\n");
2881         pr_warn("** If you see this message and you are not debugging    **\n");
2882         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2883         pr_warn("**                                                      **\n");
2884         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2885         pr_warn("**********************************************************\n");
2886
2887         /* Expand the buffers to set size */
2888         tracing_update_buffers();
2889
2890         buffers_allocated = 1;
2891
2892         /*
2893          * trace_printk_init_buffers() can be called by modules.
2894          * If that happens, then we need to start cmdline recording
2895          * directly here. If the global_trace.buffer is already
2896          * allocated here, then this was called by module code.
2897          */
2898         if (global_trace.trace_buffer.buffer)
2899                 tracing_start_cmdline_record();
2900 }
2901
2902 void trace_printk_start_comm(void)
2903 {
2904         /* Start tracing comms if trace printk is set */
2905         if (!buffers_allocated)
2906                 return;
2907         tracing_start_cmdline_record();
2908 }
2909
2910 static void trace_printk_start_stop_comm(int enabled)
2911 {
2912         if (!buffers_allocated)
2913                 return;
2914
2915         if (enabled)
2916                 tracing_start_cmdline_record();
2917         else
2918                 tracing_stop_cmdline_record();
2919 }
2920
2921 /**
2922  * trace_vbprintk - write binary msg to tracing buffer
2923  *
2924  */
2925 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2926 {
2927         struct trace_event_call *call = &event_bprint;
2928         struct ring_buffer_event *event;
2929         struct ring_buffer *buffer;
2930         struct trace_array *tr = &global_trace;
2931         struct bprint_entry *entry;
2932         unsigned long flags;
2933         char *tbuffer;
2934         int len = 0, size, pc;
2935
2936         if (unlikely(tracing_selftest_running || tracing_disabled))
2937                 return 0;
2938
2939         /* Don't pollute graph traces with trace_vprintk internals */
2940         pause_graph_tracing();
2941
2942         pc = preempt_count();
2943         preempt_disable_notrace();
2944
2945         tbuffer = get_trace_buf();
2946         if (!tbuffer) {
2947                 len = 0;
2948                 goto out_nobuffer;
2949         }
2950
2951         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2952
2953         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2954                 goto out;
2955
2956         local_save_flags(flags);
2957         size = sizeof(*entry) + sizeof(u32) * len;
2958         buffer = tr->trace_buffer.buffer;
2959         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2960                                             flags, pc);
2961         if (!event)
2962                 goto out;
2963         entry = ring_buffer_event_data(event);
2964         entry->ip                       = ip;
2965         entry->fmt                      = fmt;
2966
2967         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2968         if (!call_filter_check_discard(call, entry, buffer, event)) {
2969                 __buffer_unlock_commit(buffer, event);
2970                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2971         }
2972
2973 out:
2974         put_trace_buf();
2975
2976 out_nobuffer:
2977         preempt_enable_notrace();
2978         unpause_graph_tracing();
2979
2980         return len;
2981 }
2982 EXPORT_SYMBOL_GPL(trace_vbprintk);
2983
2984 __printf(3, 0)
2985 static int
2986 __trace_array_vprintk(struct ring_buffer *buffer,
2987                       unsigned long ip, const char *fmt, va_list args)
2988 {
2989         struct trace_event_call *call = &event_print;
2990         struct ring_buffer_event *event;
2991         int len = 0, size, pc;
2992         struct print_entry *entry;
2993         unsigned long flags;
2994         char *tbuffer;
2995
2996         if (tracing_disabled || tracing_selftest_running)
2997                 return 0;
2998
2999         /* Don't pollute graph traces with trace_vprintk internals */
3000         pause_graph_tracing();
3001
3002         pc = preempt_count();
3003         preempt_disable_notrace();
3004
3005
3006         tbuffer = get_trace_buf();
3007         if (!tbuffer) {
3008                 len = 0;
3009                 goto out_nobuffer;
3010         }
3011
3012         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3013
3014         local_save_flags(flags);
3015         size = sizeof(*entry) + len + 1;
3016         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3017                                             flags, pc);
3018         if (!event)
3019                 goto out;
3020         entry = ring_buffer_event_data(event);
3021         entry->ip = ip;
3022
3023         memcpy(&entry->buf, tbuffer, len + 1);
3024         if (!call_filter_check_discard(call, entry, buffer, event)) {
3025                 __buffer_unlock_commit(buffer, event);
3026                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3027         }
3028
3029 out:
3030         put_trace_buf();
3031
3032 out_nobuffer:
3033         preempt_enable_notrace();
3034         unpause_graph_tracing();
3035
3036         return len;
3037 }
3038
3039 __printf(3, 0)
3040 int trace_array_vprintk(struct trace_array *tr,
3041                         unsigned long ip, const char *fmt, va_list args)
3042 {
3043         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3044 }
3045
3046 __printf(3, 0)
3047 int trace_array_printk(struct trace_array *tr,
3048                        unsigned long ip, const char *fmt, ...)
3049 {
3050         int ret;
3051         va_list ap;
3052
3053         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3054                 return 0;
3055
3056         if (!tr)
3057                 return -ENOENT;
3058
3059         va_start(ap, fmt);
3060         ret = trace_array_vprintk(tr, ip, fmt, ap);
3061         va_end(ap);
3062         return ret;
3063 }
3064
3065 __printf(3, 4)
3066 int trace_array_printk_buf(struct ring_buffer *buffer,
3067                            unsigned long ip, const char *fmt, ...)
3068 {
3069         int ret;
3070         va_list ap;
3071
3072         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3073                 return 0;
3074
3075         va_start(ap, fmt);
3076         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3077         va_end(ap);
3078         return ret;
3079 }
3080
3081 __printf(2, 0)
3082 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3083 {
3084         return trace_array_vprintk(&global_trace, ip, fmt, args);
3085 }
3086 EXPORT_SYMBOL_GPL(trace_vprintk);
3087
3088 static void trace_iterator_increment(struct trace_iterator *iter)
3089 {
3090         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3091
3092         iter->idx++;
3093         if (buf_iter)
3094                 ring_buffer_read(buf_iter, NULL);
3095 }
3096
3097 static struct trace_entry *
3098 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3099                 unsigned long *lost_events)
3100 {
3101         struct ring_buffer_event *event;
3102         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3103
3104         if (buf_iter)
3105                 event = ring_buffer_iter_peek(buf_iter, ts);
3106         else
3107                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3108                                          lost_events);
3109
3110         if (event) {
3111                 iter->ent_size = ring_buffer_event_length(event);
3112                 return ring_buffer_event_data(event);
3113         }
3114         iter->ent_size = 0;
3115         return NULL;
3116 }
3117
3118 static struct trace_entry *
3119 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3120                   unsigned long *missing_events, u64 *ent_ts)
3121 {
3122         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3123         struct trace_entry *ent, *next = NULL;
3124         unsigned long lost_events = 0, next_lost = 0;
3125         int cpu_file = iter->cpu_file;
3126         u64 next_ts = 0, ts;
3127         int next_cpu = -1;
3128         int next_size = 0;
3129         int cpu;
3130
3131         /*
3132          * If we are in a per_cpu trace file, don't bother by iterating over
3133          * all cpu and peek directly.
3134          */
3135         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3136                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3137                         return NULL;
3138                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3139                 if (ent_cpu)
3140                         *ent_cpu = cpu_file;
3141
3142                 return ent;
3143         }
3144
3145         for_each_tracing_cpu(cpu) {
3146
3147                 if (ring_buffer_empty_cpu(buffer, cpu))
3148                         continue;
3149
3150                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3151
3152                 /*
3153                  * Pick the entry with the smallest timestamp:
3154                  */
3155                 if (ent && (!next || ts < next_ts)) {
3156                         next = ent;
3157                         next_cpu = cpu;
3158                         next_ts = ts;
3159                         next_lost = lost_events;
3160                         next_size = iter->ent_size;
3161                 }
3162         }
3163
3164         iter->ent_size = next_size;
3165
3166         if (ent_cpu)
3167                 *ent_cpu = next_cpu;
3168
3169         if (ent_ts)
3170                 *ent_ts = next_ts;
3171
3172         if (missing_events)
3173                 *missing_events = next_lost;
3174
3175         return next;
3176 }
3177
3178 /* Find the next real entry, without updating the iterator itself */
3179 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3180                                           int *ent_cpu, u64 *ent_ts)
3181 {
3182         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3183 }
3184
3185 /* Find the next real entry, and increment the iterator to the next entry */
3186 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3187 {
3188         iter->ent = __find_next_entry(iter, &iter->cpu,
3189                                       &iter->lost_events, &iter->ts);
3190
3191         if (iter->ent)
3192                 trace_iterator_increment(iter);
3193
3194         return iter->ent ? iter : NULL;
3195 }
3196
3197 static void trace_consume(struct trace_iterator *iter)
3198 {
3199         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3200                             &iter->lost_events);
3201 }
3202
3203 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3204 {
3205         struct trace_iterator *iter = m->private;
3206         int i = (int)*pos;
3207         void *ent;
3208
3209         WARN_ON_ONCE(iter->leftover);
3210
3211         (*pos)++;
3212
3213         /* can't go backwards */
3214         if (iter->idx > i)
3215                 return NULL;
3216
3217         if (iter->idx < 0)
3218                 ent = trace_find_next_entry_inc(iter);
3219         else
3220                 ent = iter;
3221
3222         while (ent && iter->idx < i)
3223                 ent = trace_find_next_entry_inc(iter);
3224
3225         iter->pos = *pos;
3226
3227         return ent;
3228 }
3229
3230 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3231 {
3232         struct ring_buffer_event *event;
3233         struct ring_buffer_iter *buf_iter;
3234         unsigned long entries = 0;
3235         u64 ts;
3236
3237         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3238
3239         buf_iter = trace_buffer_iter(iter, cpu);
3240         if (!buf_iter)
3241                 return;
3242
3243         ring_buffer_iter_reset(buf_iter);
3244
3245         /*
3246          * We could have the case with the max latency tracers
3247          * that a reset never took place on a cpu. This is evident
3248          * by the timestamp being before the start of the buffer.
3249          */
3250         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3251                 if (ts >= iter->trace_buffer->time_start)
3252                         break;
3253                 entries++;
3254                 ring_buffer_read(buf_iter, NULL);
3255         }
3256
3257         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3258 }
3259
3260 /*
3261  * The current tracer is copied to avoid a global locking
3262  * all around.
3263  */
3264 static void *s_start(struct seq_file *m, loff_t *pos)
3265 {
3266         struct trace_iterator *iter = m->private;
3267         struct trace_array *tr = iter->tr;
3268         int cpu_file = iter->cpu_file;
3269         void *p = NULL;
3270         loff_t l = 0;
3271         int cpu;
3272
3273         /*
3274          * copy the tracer to avoid using a global lock all around.
3275          * iter->trace is a copy of current_trace, the pointer to the
3276          * name may be used instead of a strcmp(), as iter->trace->name
3277          * will point to the same string as current_trace->name.
3278          */
3279         mutex_lock(&trace_types_lock);
3280         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3281                 *iter->trace = *tr->current_trace;
3282         mutex_unlock(&trace_types_lock);
3283
3284 #ifdef CONFIG_TRACER_MAX_TRACE
3285         if (iter->snapshot && iter->trace->use_max_tr)
3286                 return ERR_PTR(-EBUSY);
3287 #endif
3288
3289         if (*pos != iter->pos) {
3290                 iter->ent = NULL;
3291                 iter->cpu = 0;
3292                 iter->idx = -1;
3293
3294                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3295                         for_each_tracing_cpu(cpu)
3296                                 tracing_iter_reset(iter, cpu);
3297                 } else
3298                         tracing_iter_reset(iter, cpu_file);
3299
3300                 iter->leftover = 0;
3301                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3302                         ;
3303
3304         } else {
3305                 /*
3306                  * If we overflowed the seq_file before, then we want
3307                  * to just reuse the trace_seq buffer again.
3308                  */
3309                 if (iter->leftover)
3310                         p = iter;
3311                 else {
3312                         l = *pos - 1;
3313                         p = s_next(m, p, &l);
3314                 }
3315         }
3316
3317         trace_event_read_lock();
3318         trace_access_lock(cpu_file);
3319         return p;
3320 }
3321
3322 static void s_stop(struct seq_file *m, void *p)
3323 {
3324         struct trace_iterator *iter = m->private;
3325
3326 #ifdef CONFIG_TRACER_MAX_TRACE
3327         if (iter->snapshot && iter->trace->use_max_tr)
3328                 return;
3329 #endif
3330
3331         trace_access_unlock(iter->cpu_file);
3332         trace_event_read_unlock();
3333 }
3334
3335 static void
3336 get_total_entries(struct trace_buffer *buf,
3337                   unsigned long *total, unsigned long *entries)
3338 {
3339         unsigned long count;
3340         int cpu;
3341
3342         *total = 0;
3343         *entries = 0;
3344
3345         for_each_tracing_cpu(cpu) {
3346                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3347                 /*
3348                  * If this buffer has skipped entries, then we hold all
3349                  * entries for the trace and we need to ignore the
3350                  * ones before the time stamp.
3351                  */
3352                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3353                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3354                         /* total is the same as the entries */
3355                         *total += count;
3356                 } else
3357                         *total += count +
3358                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3359                 *entries += count;
3360         }
3361 }
3362
3363 static void print_lat_help_header(struct seq_file *m)
3364 {
3365         seq_puts(m, "#                  _------=> CPU#            \n"
3366                     "#                 / _-----=> irqs-off        \n"
3367                     "#                | / _----=> need-resched    \n"
3368                     "#                || / _---=> hardirq/softirq \n"
3369                     "#                ||| / _--=> preempt-depth   \n"
3370                     "#                |||| /     delay            \n"
3371                     "#  cmd     pid   ||||| time  |   caller      \n"
3372                     "#     \\   /      |||||  \\    |   /         \n");
3373 }
3374
3375 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3376 {
3377         unsigned long total;
3378         unsigned long entries;
3379
3380         get_total_entries(buf, &total, &entries);
3381         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3382                    entries, total, num_online_cpus());
3383         seq_puts(m, "#\n");
3384 }
3385
3386 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3387                                    unsigned int flags)
3388 {
3389         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3390
3391         print_event_info(buf, m);
3392
3393         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3394         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3395 }
3396
3397 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3398                                        unsigned int flags)
3399 {
3400         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3401         const char tgid_space[] = "          ";
3402         const char space[] = "  ";
3403
3404         print_event_info(buf, m);
3405
3406         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3407                    tgid ? tgid_space : space);
3408         seq_printf(m, "#                          %s / _----=> need-resched\n",
3409                    tgid ? tgid_space : space);
3410         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3411                    tgid ? tgid_space : space);
3412         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3413                    tgid ? tgid_space : space);
3414         seq_printf(m, "#                          %s||| /     delay\n",
3415                    tgid ? tgid_space : space);
3416         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3417                    tgid ? "   TGID   " : space);
3418         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3419                    tgid ? "     |    " : space);
3420 }
3421
3422 void
3423 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3424 {
3425         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3426         struct trace_buffer *buf = iter->trace_buffer;
3427         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3428         struct tracer *type = iter->trace;
3429         unsigned long entries;
3430         unsigned long total;
3431         const char *name = "preemption";
3432
3433         name = type->name;
3434
3435         get_total_entries(buf, &total, &entries);
3436
3437         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3438                    name, UTS_RELEASE);
3439         seq_puts(m, "# -----------------------------------"
3440                  "---------------------------------\n");
3441         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3442                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3443                    nsecs_to_usecs(data->saved_latency),
3444                    entries,
3445                    total,
3446                    buf->cpu,
3447 #if defined(CONFIG_PREEMPT_NONE)
3448                    "server",
3449 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3450                    "desktop",
3451 #elif defined(CONFIG_PREEMPT)
3452                    "preempt",
3453 #else
3454                    "unknown",
3455 #endif
3456                    /* These are reserved for later use */
3457                    0, 0, 0, 0);
3458 #ifdef CONFIG_SMP
3459         seq_printf(m, " #P:%d)\n", num_online_cpus());
3460 #else
3461         seq_puts(m, ")\n");
3462 #endif
3463         seq_puts(m, "#    -----------------\n");
3464         seq_printf(m, "#    | task: %.16s-%d "
3465                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3466                    data->comm, data->pid,
3467                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3468                    data->policy, data->rt_priority);
3469         seq_puts(m, "#    -----------------\n");
3470
3471         if (data->critical_start) {
3472                 seq_puts(m, "#  => started at: ");
3473                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3474                 trace_print_seq(m, &iter->seq);
3475                 seq_puts(m, "\n#  => ended at:   ");
3476                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3477                 trace_print_seq(m, &iter->seq);
3478                 seq_puts(m, "\n#\n");
3479         }
3480
3481         seq_puts(m, "#\n");
3482 }
3483
3484 static void test_cpu_buff_start(struct trace_iterator *iter)
3485 {
3486         struct trace_seq *s = &iter->seq;
3487         struct trace_array *tr = iter->tr;
3488
3489         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3490                 return;
3491
3492         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3493                 return;
3494
3495         if (cpumask_available(iter->started) &&
3496             cpumask_test_cpu(iter->cpu, iter->started))
3497                 return;
3498
3499         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3500                 return;
3501
3502         if (cpumask_available(iter->started))
3503                 cpumask_set_cpu(iter->cpu, iter->started);
3504
3505         /* Don't print started cpu buffer for the first entry of the trace */
3506         if (iter->idx > 1)
3507                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3508                                 iter->cpu);
3509 }
3510
3511 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3512 {
3513         struct trace_array *tr = iter->tr;
3514         struct trace_seq *s = &iter->seq;
3515         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3516         struct trace_entry *entry;
3517         struct trace_event *event;
3518
3519         entry = iter->ent;
3520
3521         test_cpu_buff_start(iter);
3522
3523         event = ftrace_find_event(entry->type);
3524
3525         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3526                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3527                         trace_print_lat_context(iter);
3528                 else
3529                         trace_print_context(iter);
3530         }
3531
3532         if (trace_seq_has_overflowed(s))
3533                 return TRACE_TYPE_PARTIAL_LINE;
3534
3535         if (event)
3536                 return event->funcs->trace(iter, sym_flags, event);
3537
3538         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3539
3540         return trace_handle_return(s);
3541 }
3542
3543 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3544 {
3545         struct trace_array *tr = iter->tr;
3546         struct trace_seq *s = &iter->seq;
3547         struct trace_entry *entry;
3548         struct trace_event *event;
3549
3550         entry = iter->ent;
3551
3552         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3553                 trace_seq_printf(s, "%d %d %llu ",
3554                                  entry->pid, iter->cpu, iter->ts);
3555
3556         if (trace_seq_has_overflowed(s))
3557                 return TRACE_TYPE_PARTIAL_LINE;
3558
3559         event = ftrace_find_event(entry->type);
3560         if (event)
3561                 return event->funcs->raw(iter, 0, event);
3562
3563         trace_seq_printf(s, "%d ?\n", entry->type);
3564
3565         return trace_handle_return(s);
3566 }
3567
3568 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3569 {
3570         struct trace_array *tr = iter->tr;
3571         struct trace_seq *s = &iter->seq;
3572         unsigned char newline = '\n';
3573         struct trace_entry *entry;
3574         struct trace_event *event;
3575
3576         entry = iter->ent;
3577
3578         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3579                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3580                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3581                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3582                 if (trace_seq_has_overflowed(s))
3583                         return TRACE_TYPE_PARTIAL_LINE;
3584         }
3585
3586         event = ftrace_find_event(entry->type);
3587         if (event) {
3588                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3589                 if (ret != TRACE_TYPE_HANDLED)
3590                         return ret;
3591         }
3592
3593         SEQ_PUT_FIELD(s, newline);
3594
3595         return trace_handle_return(s);
3596 }
3597
3598 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3599 {
3600         struct trace_array *tr = iter->tr;
3601         struct trace_seq *s = &iter->seq;
3602         struct trace_entry *entry;
3603         struct trace_event *event;
3604
3605         entry = iter->ent;
3606
3607         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3608                 SEQ_PUT_FIELD(s, entry->pid);
3609                 SEQ_PUT_FIELD(s, iter->cpu);
3610                 SEQ_PUT_FIELD(s, iter->ts);
3611                 if (trace_seq_has_overflowed(s))
3612                         return TRACE_TYPE_PARTIAL_LINE;
3613         }
3614
3615         event = ftrace_find_event(entry->type);
3616         return event ? event->funcs->binary(iter, 0, event) :
3617                 TRACE_TYPE_HANDLED;
3618 }
3619
3620 int trace_empty(struct trace_iterator *iter)
3621 {
3622         struct ring_buffer_iter *buf_iter;
3623         int cpu;
3624
3625         /* If we are looking at one CPU buffer, only check that one */
3626         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3627                 cpu = iter->cpu_file;
3628                 buf_iter = trace_buffer_iter(iter, cpu);
3629                 if (buf_iter) {
3630                         if (!ring_buffer_iter_empty(buf_iter))
3631                                 return 0;
3632                 } else {
3633                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3634                                 return 0;
3635                 }
3636                 return 1;
3637         }
3638
3639         for_each_tracing_cpu(cpu) {
3640                 buf_iter = trace_buffer_iter(iter, cpu);
3641                 if (buf_iter) {
3642                         if (!ring_buffer_iter_empty(buf_iter))
3643                                 return 0;
3644                 } else {
3645                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3646                                 return 0;
3647                 }
3648         }
3649
3650         return 1;
3651 }
3652
3653 /*  Called with trace_event_read_lock() held. */
3654 enum print_line_t print_trace_line(struct trace_iterator *iter)
3655 {
3656         struct trace_array *tr = iter->tr;
3657         unsigned long trace_flags = tr->trace_flags;
3658         enum print_line_t ret;
3659
3660         if (iter->lost_events) {
3661                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3662                                  iter->cpu, iter->lost_events);
3663                 if (trace_seq_has_overflowed(&iter->seq))
3664                         return TRACE_TYPE_PARTIAL_LINE;
3665         }
3666
3667         if (iter->trace && iter->trace->print_line) {
3668                 ret = iter->trace->print_line(iter);
3669                 if (ret != TRACE_TYPE_UNHANDLED)
3670                         return ret;
3671         }
3672
3673         if (iter->ent->type == TRACE_BPUTS &&
3674                         trace_flags & TRACE_ITER_PRINTK &&
3675                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3676                 return trace_print_bputs_msg_only(iter);
3677
3678         if (iter->ent->type == TRACE_BPRINT &&
3679                         trace_flags & TRACE_ITER_PRINTK &&
3680                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3681                 return trace_print_bprintk_msg_only(iter);
3682
3683         if (iter->ent->type == TRACE_PRINT &&
3684                         trace_flags & TRACE_ITER_PRINTK &&
3685                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3686                 return trace_print_printk_msg_only(iter);
3687
3688         if (trace_flags & TRACE_ITER_BIN)
3689                 return print_bin_fmt(iter);
3690
3691         if (trace_flags & TRACE_ITER_HEX)
3692                 return print_hex_fmt(iter);
3693
3694         if (trace_flags & TRACE_ITER_RAW)
3695                 return print_raw_fmt(iter);
3696
3697         return print_trace_fmt(iter);
3698 }
3699
3700 void trace_latency_header(struct seq_file *m)
3701 {
3702         struct trace_iterator *iter = m->private;
3703         struct trace_array *tr = iter->tr;
3704
3705         /* print nothing if the buffers are empty */
3706         if (trace_empty(iter))
3707                 return;
3708
3709         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3710                 print_trace_header(m, iter);
3711
3712         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3713                 print_lat_help_header(m);
3714 }
3715
3716 void trace_default_header(struct seq_file *m)
3717 {
3718         struct trace_iterator *iter = m->private;
3719         struct trace_array *tr = iter->tr;
3720         unsigned long trace_flags = tr->trace_flags;
3721
3722         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3723                 return;
3724
3725         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3726                 /* print nothing if the buffers are empty */
3727                 if (trace_empty(iter))
3728                         return;
3729                 print_trace_header(m, iter);
3730                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3731                         print_lat_help_header(m);
3732         } else {
3733                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3734                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3735                                 print_func_help_header_irq(iter->trace_buffer,
3736                                                            m, trace_flags);
3737                         else
3738                                 print_func_help_header(iter->trace_buffer, m,
3739                                                        trace_flags);
3740                 }
3741         }
3742 }
3743
3744 static void test_ftrace_alive(struct seq_file *m)
3745 {
3746         if (!ftrace_is_dead())
3747                 return;
3748         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3749                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3750 }
3751
3752 #ifdef CONFIG_TRACER_MAX_TRACE
3753 static void show_snapshot_main_help(struct seq_file *m)
3754 {
3755         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3756                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3757                     "#                      Takes a snapshot of the main buffer.\n"
3758                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3759                     "#                      (Doesn't have to be '2' works with any number that\n"
3760                     "#                       is not a '0' or '1')\n");
3761 }
3762
3763 static void show_snapshot_percpu_help(struct seq_file *m)
3764 {
3765         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3766 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3767         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3768                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3769 #else
3770         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3771                     "#                     Must use main snapshot file to allocate.\n");
3772 #endif
3773         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3774                     "#                      (Doesn't have to be '2' works with any number that\n"
3775                     "#                       is not a '0' or '1')\n");
3776 }
3777
3778 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3779 {
3780         if (iter->tr->allocated_snapshot)
3781                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3782         else
3783                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3784
3785         seq_puts(m, "# Snapshot commands:\n");
3786         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3787                 show_snapshot_main_help(m);
3788         else
3789                 show_snapshot_percpu_help(m);
3790 }
3791 #else
3792 /* Should never be called */
3793 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3794 #endif
3795
3796 static int s_show(struct seq_file *m, void *v)
3797 {
3798         struct trace_iterator *iter = v;
3799         int ret;
3800
3801         if (iter->ent == NULL) {
3802                 if (iter->tr) {
3803                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3804                         seq_puts(m, "#\n");
3805                         test_ftrace_alive(m);
3806                 }
3807                 if (iter->snapshot && trace_empty(iter))
3808                         print_snapshot_help(m, iter);
3809                 else if (iter->trace && iter->trace->print_header)
3810                         iter->trace->print_header(m);
3811                 else
3812                         trace_default_header(m);
3813
3814         } else if (iter->leftover) {
3815                 /*
3816                  * If we filled the seq_file buffer earlier, we
3817                  * want to just show it now.
3818                  */
3819                 ret = trace_print_seq(m, &iter->seq);
3820
3821                 /* ret should this time be zero, but you never know */
3822                 iter->leftover = ret;
3823
3824         } else {
3825                 print_trace_line(iter);
3826                 ret = trace_print_seq(m, &iter->seq);
3827                 /*
3828                  * If we overflow the seq_file buffer, then it will
3829                  * ask us for this data again at start up.
3830                  * Use that instead.
3831                  *  ret is 0 if seq_file write succeeded.
3832                  *        -1 otherwise.
3833                  */
3834                 iter->leftover = ret;
3835         }
3836
3837         return 0;
3838 }
3839
3840 /*
3841  * Should be used after trace_array_get(), trace_types_lock
3842  * ensures that i_cdev was already initialized.
3843  */
3844 static inline int tracing_get_cpu(struct inode *inode)
3845 {
3846         if (inode->i_cdev) /* See trace_create_cpu_file() */
3847                 return (long)inode->i_cdev - 1;
3848         return RING_BUFFER_ALL_CPUS;
3849 }
3850
3851 static const struct seq_operations tracer_seq_ops = {
3852         .start          = s_start,
3853         .next           = s_next,
3854         .stop           = s_stop,
3855         .show           = s_show,
3856 };
3857
3858 static struct trace_iterator *
3859 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3860 {
3861         struct trace_array *tr = inode->i_private;
3862         struct trace_iterator *iter;
3863         int cpu;
3864
3865         if (tracing_disabled)
3866                 return ERR_PTR(-ENODEV);
3867
3868         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3869         if (!iter)
3870                 return ERR_PTR(-ENOMEM);
3871
3872         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3873                                     GFP_KERNEL);
3874         if (!iter->buffer_iter)
3875                 goto release;
3876
3877         /*
3878          * We make a copy of the current tracer to avoid concurrent
3879          * changes on it while we are reading.
3880          */
3881         mutex_lock(&trace_types_lock);
3882         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3883         if (!iter->trace)
3884                 goto fail;
3885
3886         *iter->trace = *tr->current_trace;
3887
3888         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3889                 goto fail;
3890
3891         iter->tr = tr;
3892
3893 #ifdef CONFIG_TRACER_MAX_TRACE
3894         /* Currently only the top directory has a snapshot */
3895         if (tr->current_trace->print_max || snapshot)
3896                 iter->trace_buffer = &tr->max_buffer;
3897         else
3898 #endif
3899                 iter->trace_buffer = &tr->trace_buffer;
3900         iter->snapshot = snapshot;
3901         iter->pos = -1;
3902         iter->cpu_file = tracing_get_cpu(inode);
3903         mutex_init(&iter->mutex);
3904
3905         /* Notify the tracer early; before we stop tracing. */
3906         if (iter->trace && iter->trace->open)
3907                 iter->trace->open(iter);
3908
3909         /* Annotate start of buffers if we had overruns */
3910         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3911                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3912
3913         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3914         if (trace_clocks[tr->clock_id].in_ns)
3915                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3916
3917         /* stop the trace while dumping if we are not opening "snapshot" */
3918         if (!iter->snapshot)
3919                 tracing_stop_tr(tr);
3920
3921         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3922                 for_each_tracing_cpu(cpu) {
3923                         iter->buffer_iter[cpu] =
3924                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3925                                                          cpu, GFP_KERNEL);
3926                 }
3927                 ring_buffer_read_prepare_sync();
3928                 for_each_tracing_cpu(cpu) {
3929                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3930                         tracing_iter_reset(iter, cpu);
3931                 }
3932         } else {
3933                 cpu = iter->cpu_file;
3934                 iter->buffer_iter[cpu] =
3935                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3936                                                  cpu, GFP_KERNEL);
3937                 ring_buffer_read_prepare_sync();
3938                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3939                 tracing_iter_reset(iter, cpu);
3940         }
3941
3942         mutex_unlock(&trace_types_lock);
3943
3944         return iter;
3945
3946  fail:
3947         mutex_unlock(&trace_types_lock);
3948         kfree(iter->trace);
3949         kfree(iter->buffer_iter);
3950 release:
3951         seq_release_private(inode, file);
3952         return ERR_PTR(-ENOMEM);
3953 }
3954
3955 int tracing_open_generic(struct inode *inode, struct file *filp)
3956 {
3957         if (tracing_disabled)
3958                 return -ENODEV;
3959
3960         filp->private_data = inode->i_private;
3961         return 0;
3962 }
3963
3964 bool tracing_is_disabled(void)
3965 {
3966         return (tracing_disabled) ? true: false;
3967 }
3968
3969 /*
3970  * Open and update trace_array ref count.
3971  * Must have the current trace_array passed to it.
3972  */
3973 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3974 {
3975         struct trace_array *tr = inode->i_private;
3976
3977         if (tracing_disabled)
3978                 return -ENODEV;
3979
3980         if (trace_array_get(tr) < 0)
3981                 return -ENODEV;
3982
3983         filp->private_data = inode->i_private;
3984
3985         return 0;
3986 }
3987
3988 static int tracing_release(struct inode *inode, struct file *file)
3989 {
3990         struct trace_array *tr = inode->i_private;
3991         struct seq_file *m = file->private_data;
3992         struct trace_iterator *iter;
3993         int cpu;
3994
3995         if (!(file->f_mode & FMODE_READ)) {
3996                 trace_array_put(tr);
3997                 return 0;
3998         }
3999
4000         /* Writes do not use seq_file */
4001         iter = m->private;
4002         mutex_lock(&trace_types_lock);
4003
4004         for_each_tracing_cpu(cpu) {
4005                 if (iter->buffer_iter[cpu])
4006                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4007         }
4008
4009         if (iter->trace && iter->trace->close)
4010                 iter->trace->close(iter);
4011
4012         if (!iter->snapshot)
4013                 /* reenable tracing if it was previously enabled */
4014                 tracing_start_tr(tr);
4015
4016         __trace_array_put(tr);
4017
4018         mutex_unlock(&trace_types_lock);
4019
4020         mutex_destroy(&iter->mutex);
4021         free_cpumask_var(iter->started);
4022         kfree(iter->trace);
4023         kfree(iter->buffer_iter);
4024         seq_release_private(inode, file);
4025
4026         return 0;
4027 }
4028
4029 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4030 {
4031         struct trace_array *tr = inode->i_private;
4032
4033         trace_array_put(tr);
4034         return 0;
4035 }
4036
4037 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4038 {
4039         struct trace_array *tr = inode->i_private;
4040
4041         trace_array_put(tr);
4042
4043         return single_release(inode, file);
4044 }
4045
4046 static int tracing_open(struct inode *inode, struct file *file)
4047 {
4048         struct trace_array *tr = inode->i_private;
4049         struct trace_iterator *iter;
4050         int ret = 0;
4051
4052         if (trace_array_get(tr) < 0)
4053                 return -ENODEV;
4054
4055         /* If this file was open for write, then erase contents */
4056         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4057                 int cpu = tracing_get_cpu(inode);
4058                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4059
4060 #ifdef CONFIG_TRACER_MAX_TRACE
4061                 if (tr->current_trace->print_max)
4062                         trace_buf = &tr->max_buffer;
4063 #endif
4064
4065                 if (cpu == RING_BUFFER_ALL_CPUS)
4066                         tracing_reset_online_cpus(trace_buf);
4067                 else
4068                         tracing_reset(trace_buf, cpu);
4069         }
4070
4071         if (file->f_mode & FMODE_READ) {
4072                 iter = __tracing_open(inode, file, false);
4073                 if (IS_ERR(iter))
4074                         ret = PTR_ERR(iter);
4075                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4076                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4077         }
4078
4079         if (ret < 0)
4080                 trace_array_put(tr);
4081
4082         return ret;
4083 }
4084
4085 /*
4086  * Some tracers are not suitable for instance buffers.
4087  * A tracer is always available for the global array (toplevel)
4088  * or if it explicitly states that it is.
4089  */
4090 static bool
4091 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4092 {
4093         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4094 }
4095
4096 /* Find the next tracer that this trace array may use */
4097 static struct tracer *
4098 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4099 {
4100         while (t && !trace_ok_for_array(t, tr))
4101                 t = t->next;
4102
4103         return t;
4104 }
4105
4106 static void *
4107 t_next(struct seq_file *m, void *v, loff_t *pos)
4108 {
4109         struct trace_array *tr = m->private;
4110         struct tracer *t = v;
4111
4112         (*pos)++;
4113
4114         if (t)
4115                 t = get_tracer_for_array(tr, t->next);
4116
4117         return t;
4118 }
4119
4120 static void *t_start(struct seq_file *m, loff_t *pos)
4121 {
4122         struct trace_array *tr = m->private;
4123         struct tracer *t;
4124         loff_t l = 0;
4125
4126         mutex_lock(&trace_types_lock);
4127
4128         t = get_tracer_for_array(tr, trace_types);
4129         for (; t && l < *pos; t = t_next(m, t, &l))
4130                         ;
4131
4132         return t;
4133 }
4134
4135 static void t_stop(struct seq_file *m, void *p)
4136 {
4137         mutex_unlock(&trace_types_lock);
4138 }
4139
4140 static int t_show(struct seq_file *m, void *v)
4141 {
4142         struct tracer *t = v;
4143
4144         if (!t)
4145                 return 0;
4146
4147         seq_puts(m, t->name);
4148         if (t->next)
4149                 seq_putc(m, ' ');
4150         else
4151                 seq_putc(m, '\n');
4152
4153         return 0;
4154 }
4155
4156 static const struct seq_operations show_traces_seq_ops = {
4157         .start          = t_start,
4158         .next           = t_next,
4159         .stop           = t_stop,
4160         .show           = t_show,
4161 };
4162
4163 static int show_traces_open(struct inode *inode, struct file *file)
4164 {
4165         struct trace_array *tr = inode->i_private;
4166         struct seq_file *m;
4167         int ret;
4168
4169         if (tracing_disabled)
4170                 return -ENODEV;
4171
4172         if (trace_array_get(tr) < 0)
4173                 return -ENODEV;
4174
4175         ret = seq_open(file, &show_traces_seq_ops);
4176         if (ret) {
4177                 trace_array_put(tr);
4178                 return ret;
4179         }
4180
4181         m = file->private_data;
4182         m->private = tr;
4183
4184         return 0;
4185 }
4186
4187 static int show_traces_release(struct inode *inode, struct file *file)
4188 {
4189         struct trace_array *tr = inode->i_private;
4190
4191         trace_array_put(tr);
4192         return seq_release(inode, file);
4193 }
4194
4195 static ssize_t
4196 tracing_write_stub(struct file *filp, const char __user *ubuf,
4197                    size_t count, loff_t *ppos)
4198 {
4199         return count;
4200 }
4201
4202 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4203 {
4204         int ret;
4205
4206         if (file->f_mode & FMODE_READ)
4207                 ret = seq_lseek(file, offset, whence);
4208         else
4209                 file->f_pos = ret = 0;
4210
4211         return ret;
4212 }
4213
4214 static const struct file_operations tracing_fops = {
4215         .open           = tracing_open,
4216         .read           = seq_read,
4217         .write          = tracing_write_stub,
4218         .llseek         = tracing_lseek,
4219         .release        = tracing_release,
4220 };
4221
4222 static const struct file_operations show_traces_fops = {
4223         .open           = show_traces_open,
4224         .read           = seq_read,
4225         .llseek         = seq_lseek,
4226         .release        = show_traces_release,
4227 };
4228
4229 static ssize_t
4230 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4231                      size_t count, loff_t *ppos)
4232 {
4233         struct trace_array *tr = file_inode(filp)->i_private;
4234         char *mask_str;
4235         int len;
4236
4237         len = snprintf(NULL, 0, "%*pb\n",
4238                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4239         mask_str = kmalloc(len, GFP_KERNEL);
4240         if (!mask_str)
4241                 return -ENOMEM;
4242
4243         len = snprintf(mask_str, len, "%*pb\n",
4244                        cpumask_pr_args(tr->tracing_cpumask));
4245         if (len >= count) {
4246                 count = -EINVAL;
4247                 goto out_err;
4248         }
4249         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4250
4251 out_err:
4252         kfree(mask_str);
4253
4254         return count;
4255 }
4256
4257 static ssize_t
4258 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4259                       size_t count, loff_t *ppos)
4260 {
4261         struct trace_array *tr = file_inode(filp)->i_private;
4262         cpumask_var_t tracing_cpumask_new;
4263         int err, cpu;
4264
4265         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4266                 return -ENOMEM;
4267
4268         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4269         if (err)
4270                 goto err_unlock;
4271
4272         local_irq_disable();
4273         arch_spin_lock(&tr->max_lock);
4274         for_each_tracing_cpu(cpu) {
4275                 /*
4276                  * Increase/decrease the disabled counter if we are
4277                  * about to flip a bit in the cpumask:
4278                  */
4279                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4280                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4281                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4282                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4283                 }
4284                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4285                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4286                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4287                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4288                 }
4289         }
4290         arch_spin_unlock(&tr->max_lock);
4291         local_irq_enable();
4292
4293         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4294         free_cpumask_var(tracing_cpumask_new);
4295
4296         return count;
4297
4298 err_unlock:
4299         free_cpumask_var(tracing_cpumask_new);
4300
4301         return err;
4302 }
4303
4304 static const struct file_operations tracing_cpumask_fops = {
4305         .open           = tracing_open_generic_tr,
4306         .read           = tracing_cpumask_read,
4307         .write          = tracing_cpumask_write,
4308         .release        = tracing_release_generic_tr,
4309         .llseek         = generic_file_llseek,
4310 };
4311
4312 static int tracing_trace_options_show(struct seq_file *m, void *v)
4313 {
4314         struct tracer_opt *trace_opts;
4315         struct trace_array *tr = m->private;
4316         u32 tracer_flags;
4317         int i;
4318
4319         mutex_lock(&trace_types_lock);
4320         tracer_flags = tr->current_trace->flags->val;
4321         trace_opts = tr->current_trace->flags->opts;
4322
4323         for (i = 0; trace_options[i]; i++) {
4324                 if (tr->trace_flags & (1 << i))
4325                         seq_printf(m, "%s\n", trace_options[i]);
4326                 else
4327                         seq_printf(m, "no%s\n", trace_options[i]);
4328         }
4329
4330         for (i = 0; trace_opts[i].name; i++) {
4331                 if (tracer_flags & trace_opts[i].bit)
4332                         seq_printf(m, "%s\n", trace_opts[i].name);
4333                 else
4334                         seq_printf(m, "no%s\n", trace_opts[i].name);
4335         }
4336         mutex_unlock(&trace_types_lock);
4337
4338         return 0;
4339 }
4340
4341 static int __set_tracer_option(struct trace_array *tr,
4342                                struct tracer_flags *tracer_flags,
4343                                struct tracer_opt *opts, int neg)
4344 {
4345         struct tracer *trace = tracer_flags->trace;
4346         int ret;
4347
4348         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4349         if (ret)
4350                 return ret;
4351
4352         if (neg)
4353                 tracer_flags->val &= ~opts->bit;
4354         else
4355                 tracer_flags->val |= opts->bit;
4356         return 0;
4357 }
4358
4359 /* Try to assign a tracer specific option */
4360 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4361 {
4362         struct tracer *trace = tr->current_trace;
4363         struct tracer_flags *tracer_flags = trace->flags;
4364         struct tracer_opt *opts = NULL;
4365         int i;
4366
4367         for (i = 0; tracer_flags->opts[i].name; i++) {
4368                 opts = &tracer_flags->opts[i];
4369
4370                 if (strcmp(cmp, opts->name) == 0)
4371                         return __set_tracer_option(tr, trace->flags, opts, neg);
4372         }
4373
4374         return -EINVAL;
4375 }
4376
4377 /* Some tracers require overwrite to stay enabled */
4378 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4379 {
4380         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4381                 return -1;
4382
4383         return 0;
4384 }
4385
4386 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4387 {
4388         int *map;
4389
4390         if ((mask == TRACE_ITER_RECORD_TGID) ||
4391             (mask == TRACE_ITER_RECORD_CMD))
4392                 lockdep_assert_held(&event_mutex);
4393
4394         /* do nothing if flag is already set */
4395         if (!!(tr->trace_flags & mask) == !!enabled)
4396                 return 0;
4397
4398         /* Give the tracer a chance to approve the change */
4399         if (tr->current_trace->flag_changed)
4400                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4401                         return -EINVAL;
4402
4403         if (enabled)
4404                 tr->trace_flags |= mask;
4405         else
4406                 tr->trace_flags &= ~mask;
4407
4408         if (mask == TRACE_ITER_RECORD_CMD)
4409                 trace_event_enable_cmd_record(enabled);
4410
4411         if (mask == TRACE_ITER_RECORD_TGID) {
4412                 if (!tgid_map) {
4413                         tgid_map_max = pid_max;
4414                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4415                                        GFP_KERNEL);
4416
4417                         /*
4418                          * Pairs with smp_load_acquire() in
4419                          * trace_find_tgid_ptr() to ensure that if it observes
4420                          * the tgid_map we just allocated then it also observes
4421                          * the corresponding tgid_map_max value.
4422                          */
4423                         smp_store_release(&tgid_map, map);
4424                 }
4425                 if (!tgid_map) {
4426                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4427                         return -ENOMEM;
4428                 }
4429
4430                 trace_event_enable_tgid_record(enabled);
4431         }
4432
4433         if (mask == TRACE_ITER_EVENT_FORK)
4434                 trace_event_follow_fork(tr, enabled);
4435
4436         if (mask == TRACE_ITER_FUNC_FORK)
4437                 ftrace_pid_follow_fork(tr, enabled);
4438
4439         if (mask == TRACE_ITER_OVERWRITE) {
4440                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4441 #ifdef CONFIG_TRACER_MAX_TRACE
4442                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4443 #endif
4444         }
4445
4446         if (mask == TRACE_ITER_PRINTK) {
4447                 trace_printk_start_stop_comm(enabled);
4448                 trace_printk_control(enabled);
4449         }
4450
4451         return 0;
4452 }
4453
4454 static int trace_set_options(struct trace_array *tr, char *option)
4455 {
4456         char *cmp;
4457         int neg = 0;
4458         int ret;
4459         size_t orig_len = strlen(option);
4460
4461         cmp = strstrip(option);
4462
4463         if (strncmp(cmp, "no", 2) == 0) {
4464                 neg = 1;
4465                 cmp += 2;
4466         }
4467
4468         mutex_lock(&event_mutex);
4469         mutex_lock(&trace_types_lock);
4470
4471         ret = match_string(trace_options, -1, cmp);
4472         /* If no option could be set, test the specific tracer options */
4473         if (ret < 0)
4474                 ret = set_tracer_option(tr, cmp, neg);
4475         else
4476                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4477
4478         mutex_unlock(&trace_types_lock);
4479         mutex_unlock(&event_mutex);
4480
4481         /*
4482          * If the first trailing whitespace is replaced with '\0' by strstrip,
4483          * turn it back into a space.
4484          */
4485         if (orig_len > strlen(option))
4486                 option[strlen(option)] = ' ';
4487
4488         return ret;
4489 }
4490
4491 static void __init apply_trace_boot_options(void)
4492 {
4493         char *buf = trace_boot_options_buf;
4494         char *option;
4495
4496         while (true) {
4497                 option = strsep(&buf, ",");
4498
4499                 if (!option)
4500                         break;
4501
4502                 if (*option)
4503                         trace_set_options(&global_trace, option);
4504
4505                 /* Put back the comma to allow this to be called again */
4506                 if (buf)
4507                         *(buf - 1) = ',';
4508         }
4509 }
4510
4511 static ssize_t
4512 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4513                         size_t cnt, loff_t *ppos)
4514 {
4515         struct seq_file *m = filp->private_data;
4516         struct trace_array *tr = m->private;
4517         char buf[64];
4518         int ret;
4519
4520         if (cnt >= sizeof(buf))
4521                 return -EINVAL;
4522
4523         if (copy_from_user(buf, ubuf, cnt))
4524                 return -EFAULT;
4525
4526         buf[cnt] = 0;
4527
4528         ret = trace_set_options(tr, buf);
4529         if (ret < 0)
4530                 return ret;
4531
4532         *ppos += cnt;
4533
4534         return cnt;
4535 }
4536
4537 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4538 {
4539         struct trace_array *tr = inode->i_private;
4540         int ret;
4541
4542         if (tracing_disabled)
4543                 return -ENODEV;
4544
4545         if (trace_array_get(tr) < 0)
4546                 return -ENODEV;
4547
4548         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4549         if (ret < 0)
4550                 trace_array_put(tr);
4551
4552         return ret;
4553 }
4554
4555 static const struct file_operations tracing_iter_fops = {
4556         .open           = tracing_trace_options_open,
4557         .read           = seq_read,
4558         .llseek         = seq_lseek,
4559         .release        = tracing_single_release_tr,
4560         .write          = tracing_trace_options_write,
4561 };
4562
4563 static const char readme_msg[] =
4564         "tracing mini-HOWTO:\n\n"
4565         "# echo 0 > tracing_on : quick way to disable tracing\n"
4566         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4567         " Important files:\n"
4568         "  trace\t\t\t- The static contents of the buffer\n"
4569         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4570         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4571         "  current_tracer\t- function and latency tracers\n"
4572         "  available_tracers\t- list of configured tracers for current_tracer\n"
4573         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4574         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4575         "  trace_clock\t\t-change the clock used to order events\n"
4576         "       local:   Per cpu clock but may not be synced across CPUs\n"
4577         "      global:   Synced across CPUs but slows tracing down.\n"
4578         "     counter:   Not a clock, but just an increment\n"
4579         "      uptime:   Jiffy counter from time of boot\n"
4580         "        perf:   Same clock that perf events use\n"
4581 #ifdef CONFIG_X86_64
4582         "     x86-tsc:   TSC cycle counter\n"
4583 #endif
4584         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4585         "       delta:   Delta difference against a buffer-wide timestamp\n"
4586         "    absolute:   Absolute (standalone) timestamp\n"
4587         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4588         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4589         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4590         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4591         "\t\t\t  Remove sub-buffer with rmdir\n"
4592         "  trace_options\t\t- Set format or modify how tracing happens\n"
4593         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4594         "\t\t\t  option name\n"
4595         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4596 #ifdef CONFIG_DYNAMIC_FTRACE
4597         "\n  available_filter_functions - list of functions that can be filtered on\n"
4598         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4599         "\t\t\t  functions\n"
4600         "\t     accepts: func_full_name or glob-matching-pattern\n"
4601         "\t     modules: Can select a group via module\n"
4602         "\t      Format: :mod:<module-name>\n"
4603         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4604         "\t    triggers: a command to perform when function is hit\n"
4605         "\t      Format: <function>:<trigger>[:count]\n"
4606         "\t     trigger: traceon, traceoff\n"
4607         "\t\t      enable_event:<system>:<event>\n"
4608         "\t\t      disable_event:<system>:<event>\n"
4609 #ifdef CONFIG_STACKTRACE
4610         "\t\t      stacktrace\n"
4611 #endif
4612 #ifdef CONFIG_TRACER_SNAPSHOT
4613         "\t\t      snapshot\n"
4614 #endif
4615         "\t\t      dump\n"
4616         "\t\t      cpudump\n"
4617         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4618         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4619         "\t     The first one will disable tracing every time do_fault is hit\n"
4620         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4621         "\t       The first time do trap is hit and it disables tracing, the\n"
4622         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4623         "\t       the counter will not decrement. It only decrements when the\n"
4624         "\t       trigger did work\n"
4625         "\t     To remove trigger without count:\n"
4626         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4627         "\t     To remove trigger with a count:\n"
4628         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4629         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4630         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4631         "\t    modules: Can select a group via module command :mod:\n"
4632         "\t    Does not accept triggers\n"
4633 #endif /* CONFIG_DYNAMIC_FTRACE */
4634 #ifdef CONFIG_FUNCTION_TRACER
4635         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4636         "\t\t    (function)\n"
4637 #endif
4638 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4639         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4640         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4641         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4642 #endif
4643 #ifdef CONFIG_TRACER_SNAPSHOT
4644         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4645         "\t\t\t  snapshot buffer. Read the contents for more\n"
4646         "\t\t\t  information\n"
4647 #endif
4648 #ifdef CONFIG_STACK_TRACER
4649         "  stack_trace\t\t- Shows the max stack trace when active\n"
4650         "  stack_max_size\t- Shows current max stack size that was traced\n"
4651         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4652         "\t\t\t  new trace)\n"
4653 #ifdef CONFIG_DYNAMIC_FTRACE
4654         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4655         "\t\t\t  traces\n"
4656 #endif
4657 #endif /* CONFIG_STACK_TRACER */
4658 #ifdef CONFIG_KPROBE_EVENTS
4659         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4660         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4661 #endif
4662 #ifdef CONFIG_UPROBE_EVENTS
4663         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4664         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4665 #endif
4666 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4667         "\t  accepts: event-definitions (one definition per line)\n"
4668         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4669         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4670         "\t           -:[<group>/]<event>\n"
4671 #ifdef CONFIG_KPROBE_EVENTS
4672         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4673   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4674 #endif
4675 #ifdef CONFIG_UPROBE_EVENTS
4676         "\t    place: <path>:<offset>\n"
4677 #endif
4678         "\t     args: <name>=fetcharg[:type]\n"
4679         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4680         "\t           $stack<index>, $stack, $retval, $comm\n"
4681         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4682         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4683 #endif
4684         "  events/\t\t- Directory containing all trace event subsystems:\n"
4685         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4686         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4687         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4688         "\t\t\t  events\n"
4689         "      filter\t\t- If set, only events passing filter are traced\n"
4690         "  events/<system>/<event>/\t- Directory containing control files for\n"
4691         "\t\t\t  <event>:\n"
4692         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4693         "      filter\t\t- If set, only events passing filter are traced\n"
4694         "      trigger\t\t- If set, a command to perform when event is hit\n"
4695         "\t    Format: <trigger>[:count][if <filter>]\n"
4696         "\t   trigger: traceon, traceoff\n"
4697         "\t            enable_event:<system>:<event>\n"
4698         "\t            disable_event:<system>:<event>\n"
4699 #ifdef CONFIG_HIST_TRIGGERS
4700         "\t            enable_hist:<system>:<event>\n"
4701         "\t            disable_hist:<system>:<event>\n"
4702 #endif
4703 #ifdef CONFIG_STACKTRACE
4704         "\t\t    stacktrace\n"
4705 #endif
4706 #ifdef CONFIG_TRACER_SNAPSHOT
4707         "\t\t    snapshot\n"
4708 #endif
4709 #ifdef CONFIG_HIST_TRIGGERS
4710         "\t\t    hist (see below)\n"
4711 #endif
4712         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4713         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4714         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4715         "\t                  events/block/block_unplug/trigger\n"
4716         "\t   The first disables tracing every time block_unplug is hit.\n"
4717         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4718         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4719         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4720         "\t   Like function triggers, the counter is only decremented if it\n"
4721         "\t    enabled or disabled tracing.\n"
4722         "\t   To remove a trigger without a count:\n"
4723         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4724         "\t   To remove a trigger with a count:\n"
4725         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4726         "\t   Filters can be ignored when removing a trigger.\n"
4727 #ifdef CONFIG_HIST_TRIGGERS
4728         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4729         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4730         "\t            [:values=<field1[,field2,...]>]\n"
4731         "\t            [:sort=<field1[,field2,...]>]\n"
4732         "\t            [:size=#entries]\n"
4733         "\t            [:pause][:continue][:clear]\n"
4734         "\t            [:name=histname1]\n"
4735         "\t            [if <filter>]\n\n"
4736         "\t    Note, special fields can be used as well:\n"
4737         "\t            common_timestamp - to record current timestamp\n"
4738         "\t            common_cpu - to record the CPU the event happened on\n"
4739         "\n"
4740         "\t    When a matching event is hit, an entry is added to a hash\n"
4741         "\t    table using the key(s) and value(s) named, and the value of a\n"
4742         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4743         "\t    correspond to fields in the event's format description.  Keys\n"
4744         "\t    can be any field, or the special string 'stacktrace'.\n"
4745         "\t    Compound keys consisting of up to two fields can be specified\n"
4746         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4747         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4748         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4749         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4750         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4751         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4752         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4753         "\t    its histogram data will be shared with other triggers of the\n"
4754         "\t    same name, and trigger hits will update this common data.\n\n"
4755         "\t    Reading the 'hist' file for the event will dump the hash\n"
4756         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4757         "\t    triggers attached to an event, there will be a table for each\n"
4758         "\t    trigger in the output.  The table displayed for a named\n"
4759         "\t    trigger will be the same as any other instance having the\n"
4760         "\t    same name.  The default format used to display a given field\n"
4761         "\t    can be modified by appending any of the following modifiers\n"
4762         "\t    to the field name, as applicable:\n\n"
4763         "\t            .hex        display a number as a hex value\n"
4764         "\t            .sym        display an address as a symbol\n"
4765         "\t            .sym-offset display an address as a symbol and offset\n"
4766         "\t            .execname   display a common_pid as a program name\n"
4767         "\t            .syscall    display a syscall id as a syscall name\n"
4768         "\t            .log2       display log2 value rather than raw number\n"
4769         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4770         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4771         "\t    trigger or to start a hist trigger but not log any events\n"
4772         "\t    until told to do so.  'continue' can be used to start or\n"
4773         "\t    restart a paused hist trigger.\n\n"
4774         "\t    The 'clear' parameter will clear the contents of a running\n"
4775         "\t    hist trigger and leave its current paused/active state\n"
4776         "\t    unchanged.\n\n"
4777         "\t    The enable_hist and disable_hist triggers can be used to\n"
4778         "\t    have one event conditionally start and stop another event's\n"
4779         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4780         "\t    the enable_event and disable_event triggers.\n"
4781 #endif
4782 ;
4783
4784 static ssize_t
4785 tracing_readme_read(struct file *filp, char __user *ubuf,
4786                        size_t cnt, loff_t *ppos)
4787 {
4788         return simple_read_from_buffer(ubuf, cnt, ppos,
4789                                         readme_msg, strlen(readme_msg));
4790 }
4791
4792 static const struct file_operations tracing_readme_fops = {
4793         .open           = tracing_open_generic,
4794         .read           = tracing_readme_read,
4795         .llseek         = generic_file_llseek,
4796 };
4797
4798 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4799 {
4800         int pid = ++(*pos);
4801
4802         return trace_find_tgid_ptr(pid);
4803 }
4804
4805 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4806 {
4807         int pid = *pos;
4808
4809         return trace_find_tgid_ptr(pid);
4810 }
4811
4812 static void saved_tgids_stop(struct seq_file *m, void *v)
4813 {
4814 }
4815
4816 static int saved_tgids_show(struct seq_file *m, void *v)
4817 {
4818         int *entry = (int *)v;
4819         int pid = entry - tgid_map;
4820         int tgid = *entry;
4821
4822         if (tgid == 0)
4823                 return SEQ_SKIP;
4824
4825         seq_printf(m, "%d %d\n", pid, tgid);
4826         return 0;
4827 }
4828
4829 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4830         .start          = saved_tgids_start,
4831         .stop           = saved_tgids_stop,
4832         .next           = saved_tgids_next,
4833         .show           = saved_tgids_show,
4834 };
4835
4836 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4837 {
4838         if (tracing_disabled)
4839                 return -ENODEV;
4840
4841         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4842 }
4843
4844
4845 static const struct file_operations tracing_saved_tgids_fops = {
4846         .open           = tracing_saved_tgids_open,
4847         .read           = seq_read,
4848         .llseek         = seq_lseek,
4849         .release        = seq_release,
4850 };
4851
4852 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4853 {
4854         unsigned int *ptr = v;
4855
4856         if (*pos || m->count)
4857                 ptr++;
4858
4859         (*pos)++;
4860
4861         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4862              ptr++) {
4863                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4864                         continue;
4865
4866                 return ptr;
4867         }
4868
4869         return NULL;
4870 }
4871
4872 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4873 {
4874         void *v;
4875         loff_t l = 0;
4876
4877         preempt_disable();
4878         arch_spin_lock(&trace_cmdline_lock);
4879
4880         v = &savedcmd->map_cmdline_to_pid[0];
4881         while (l <= *pos) {
4882                 v = saved_cmdlines_next(m, v, &l);
4883                 if (!v)
4884                         return NULL;
4885         }
4886
4887         return v;
4888 }
4889
4890 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4891 {
4892         arch_spin_unlock(&trace_cmdline_lock);
4893         preempt_enable();
4894 }
4895
4896 static int saved_cmdlines_show(struct seq_file *m, void *v)
4897 {
4898         char buf[TASK_COMM_LEN];
4899         unsigned int *pid = v;
4900
4901         __trace_find_cmdline(*pid, buf);
4902         seq_printf(m, "%d %s\n", *pid, buf);
4903         return 0;
4904 }
4905
4906 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4907         .start          = saved_cmdlines_start,
4908         .next           = saved_cmdlines_next,
4909         .stop           = saved_cmdlines_stop,
4910         .show           = saved_cmdlines_show,
4911 };
4912
4913 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4914 {
4915         if (tracing_disabled)
4916                 return -ENODEV;
4917
4918         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4919 }
4920
4921 static const struct file_operations tracing_saved_cmdlines_fops = {
4922         .open           = tracing_saved_cmdlines_open,
4923         .read           = seq_read,
4924         .llseek         = seq_lseek,
4925         .release        = seq_release,
4926 };
4927
4928 static ssize_t
4929 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4930                                  size_t cnt, loff_t *ppos)
4931 {
4932         char buf[64];
4933         int r;
4934
4935         arch_spin_lock(&trace_cmdline_lock);
4936         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4937         arch_spin_unlock(&trace_cmdline_lock);
4938
4939         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4940 }
4941
4942 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4943 {
4944         kfree(s->saved_cmdlines);
4945         kfree(s->map_cmdline_to_pid);
4946         kfree(s);
4947 }
4948
4949 static int tracing_resize_saved_cmdlines(unsigned int val)
4950 {
4951         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4952
4953         s = kmalloc(sizeof(*s), GFP_KERNEL);
4954         if (!s)
4955                 return -ENOMEM;
4956
4957         if (allocate_cmdlines_buffer(val, s) < 0) {
4958                 kfree(s);
4959                 return -ENOMEM;
4960         }
4961
4962         arch_spin_lock(&trace_cmdline_lock);
4963         savedcmd_temp = savedcmd;
4964         savedcmd = s;
4965         arch_spin_unlock(&trace_cmdline_lock);
4966         free_saved_cmdlines_buffer(savedcmd_temp);
4967
4968         return 0;
4969 }
4970
4971 static ssize_t
4972 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4973                                   size_t cnt, loff_t *ppos)
4974 {
4975         unsigned long val;
4976         int ret;
4977
4978         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4979         if (ret)
4980                 return ret;
4981
4982         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4983         if (!val || val > PID_MAX_DEFAULT)
4984                 return -EINVAL;
4985
4986         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4987         if (ret < 0)
4988                 return ret;
4989
4990         *ppos += cnt;
4991
4992         return cnt;
4993 }
4994
4995 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4996         .open           = tracing_open_generic,
4997         .read           = tracing_saved_cmdlines_size_read,
4998         .write          = tracing_saved_cmdlines_size_write,
4999 };
5000
5001 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5002 static union trace_eval_map_item *
5003 update_eval_map(union trace_eval_map_item *ptr)
5004 {
5005         if (!ptr->map.eval_string) {
5006                 if (ptr->tail.next) {
5007                         ptr = ptr->tail.next;
5008                         /* Set ptr to the next real item (skip head) */
5009                         ptr++;
5010                 } else
5011                         return NULL;
5012         }
5013         return ptr;
5014 }
5015
5016 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5017 {
5018         union trace_eval_map_item *ptr = v;
5019
5020         /*
5021          * Paranoid! If ptr points to end, we don't want to increment past it.
5022          * This really should never happen.
5023          */
5024         ptr = update_eval_map(ptr);
5025         if (WARN_ON_ONCE(!ptr))
5026                 return NULL;
5027
5028         ptr++;
5029
5030         (*pos)++;
5031
5032         ptr = update_eval_map(ptr);
5033
5034         return ptr;
5035 }
5036
5037 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5038 {
5039         union trace_eval_map_item *v;
5040         loff_t l = 0;
5041
5042         mutex_lock(&trace_eval_mutex);
5043
5044         v = trace_eval_maps;
5045         if (v)
5046                 v++;
5047
5048         while (v && l < *pos) {
5049                 v = eval_map_next(m, v, &l);
5050         }
5051
5052         return v;
5053 }
5054
5055 static void eval_map_stop(struct seq_file *m, void *v)
5056 {
5057         mutex_unlock(&trace_eval_mutex);
5058 }
5059
5060 static int eval_map_show(struct seq_file *m, void *v)
5061 {
5062         union trace_eval_map_item *ptr = v;
5063
5064         seq_printf(m, "%s %ld (%s)\n",
5065                    ptr->map.eval_string, ptr->map.eval_value,
5066                    ptr->map.system);
5067
5068         return 0;
5069 }
5070
5071 static const struct seq_operations tracing_eval_map_seq_ops = {
5072         .start          = eval_map_start,
5073         .next           = eval_map_next,
5074         .stop           = eval_map_stop,
5075         .show           = eval_map_show,
5076 };
5077
5078 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5079 {
5080         if (tracing_disabled)
5081                 return -ENODEV;
5082
5083         return seq_open(filp, &tracing_eval_map_seq_ops);
5084 }
5085
5086 static const struct file_operations tracing_eval_map_fops = {
5087         .open           = tracing_eval_map_open,
5088         .read           = seq_read,
5089         .llseek         = seq_lseek,
5090         .release        = seq_release,
5091 };
5092
5093 static inline union trace_eval_map_item *
5094 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5095 {
5096         /* Return tail of array given the head */
5097         return ptr + ptr->head.length + 1;
5098 }
5099
5100 static void
5101 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5102                            int len)
5103 {
5104         struct trace_eval_map **stop;
5105         struct trace_eval_map **map;
5106         union trace_eval_map_item *map_array;
5107         union trace_eval_map_item *ptr;
5108
5109         stop = start + len;
5110
5111         /*
5112          * The trace_eval_maps contains the map plus a head and tail item,
5113          * where the head holds the module and length of array, and the
5114          * tail holds a pointer to the next list.
5115          */
5116         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5117         if (!map_array) {
5118                 pr_warn("Unable to allocate trace eval mapping\n");
5119                 return;
5120         }
5121
5122         mutex_lock(&trace_eval_mutex);
5123
5124         if (!trace_eval_maps)
5125                 trace_eval_maps = map_array;
5126         else {
5127                 ptr = trace_eval_maps;
5128                 for (;;) {
5129                         ptr = trace_eval_jmp_to_tail(ptr);
5130                         if (!ptr->tail.next)
5131                                 break;
5132                         ptr = ptr->tail.next;
5133
5134                 }
5135                 ptr->tail.next = map_array;
5136         }
5137         map_array->head.mod = mod;
5138         map_array->head.length = len;
5139         map_array++;
5140
5141         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5142                 map_array->map = **map;
5143                 map_array++;
5144         }
5145         memset(map_array, 0, sizeof(*map_array));
5146
5147         mutex_unlock(&trace_eval_mutex);
5148 }
5149
5150 static void trace_create_eval_file(struct dentry *d_tracer)
5151 {
5152         trace_create_file("eval_map", 0444, d_tracer,
5153                           NULL, &tracing_eval_map_fops);
5154 }
5155
5156 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5157 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5158 static inline void trace_insert_eval_map_file(struct module *mod,
5159                               struct trace_eval_map **start, int len) { }
5160 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5161
5162 static void trace_insert_eval_map(struct module *mod,
5163                                   struct trace_eval_map **start, int len)
5164 {
5165         struct trace_eval_map **map;
5166
5167         if (len <= 0)
5168                 return;
5169
5170         map = start;
5171
5172         trace_event_eval_update(map, len);
5173
5174         trace_insert_eval_map_file(mod, start, len);
5175 }
5176
5177 static ssize_t
5178 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5179                        size_t cnt, loff_t *ppos)
5180 {
5181         struct trace_array *tr = filp->private_data;
5182         char buf[MAX_TRACER_SIZE+2];
5183         int r;
5184
5185         mutex_lock(&trace_types_lock);
5186         r = sprintf(buf, "%s\n", tr->current_trace->name);
5187         mutex_unlock(&trace_types_lock);
5188
5189         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5190 }
5191
5192 int tracer_init(struct tracer *t, struct trace_array *tr)
5193 {
5194         tracing_reset_online_cpus(&tr->trace_buffer);
5195         return t->init(tr);
5196 }
5197
5198 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5199 {
5200         int cpu;
5201
5202         for_each_tracing_cpu(cpu)
5203                 per_cpu_ptr(buf->data, cpu)->entries = val;
5204 }
5205
5206 #ifdef CONFIG_TRACER_MAX_TRACE
5207 /* resize @tr's buffer to the size of @size_tr's entries */
5208 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5209                                         struct trace_buffer *size_buf, int cpu_id)
5210 {
5211         int cpu, ret = 0;
5212
5213         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5214                 for_each_tracing_cpu(cpu) {
5215                         ret = ring_buffer_resize(trace_buf->buffer,
5216                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5217                         if (ret < 0)
5218                                 break;
5219                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5220                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5221                 }
5222         } else {
5223                 ret = ring_buffer_resize(trace_buf->buffer,
5224                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5225                 if (ret == 0)
5226                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5227                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5228         }
5229
5230         return ret;
5231 }
5232 #endif /* CONFIG_TRACER_MAX_TRACE */
5233
5234 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5235                                         unsigned long size, int cpu)
5236 {
5237         int ret;
5238
5239         /*
5240          * If kernel or user changes the size of the ring buffer
5241          * we use the size that was given, and we can forget about
5242          * expanding it later.
5243          */
5244         ring_buffer_expanded = true;
5245
5246         /* May be called before buffers are initialized */
5247         if (!tr->trace_buffer.buffer)
5248                 return 0;
5249
5250         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5251         if (ret < 0)
5252                 return ret;
5253
5254 #ifdef CONFIG_TRACER_MAX_TRACE
5255         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5256             !tr->current_trace->use_max_tr)
5257                 goto out;
5258
5259         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5260         if (ret < 0) {
5261                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5262                                                      &tr->trace_buffer, cpu);
5263                 if (r < 0) {
5264                         /*
5265                          * AARGH! We are left with different
5266                          * size max buffer!!!!
5267                          * The max buffer is our "snapshot" buffer.
5268                          * When a tracer needs a snapshot (one of the
5269                          * latency tracers), it swaps the max buffer
5270                          * with the saved snap shot. We succeeded to
5271                          * update the size of the main buffer, but failed to
5272                          * update the size of the max buffer. But when we tried
5273                          * to reset the main buffer to the original size, we
5274                          * failed there too. This is very unlikely to
5275                          * happen, but if it does, warn and kill all
5276                          * tracing.
5277                          */
5278                         WARN_ON(1);
5279                         tracing_disabled = 1;
5280                 }
5281                 return ret;
5282         }
5283
5284         if (cpu == RING_BUFFER_ALL_CPUS)
5285                 set_buffer_entries(&tr->max_buffer, size);
5286         else
5287                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5288
5289  out:
5290 #endif /* CONFIG_TRACER_MAX_TRACE */
5291
5292         if (cpu == RING_BUFFER_ALL_CPUS)
5293                 set_buffer_entries(&tr->trace_buffer, size);
5294         else
5295                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5296
5297         return ret;
5298 }
5299
5300 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5301                                           unsigned long size, int cpu_id)
5302 {
5303         int ret = size;
5304
5305         mutex_lock(&trace_types_lock);
5306
5307         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5308                 /* make sure, this cpu is enabled in the mask */
5309                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5310                         ret = -EINVAL;
5311                         goto out;
5312                 }
5313         }
5314
5315         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5316         if (ret < 0)
5317                 ret = -ENOMEM;
5318
5319 out:
5320         mutex_unlock(&trace_types_lock);
5321
5322         return ret;
5323 }
5324
5325
5326 /**
5327  * tracing_update_buffers - used by tracing facility to expand ring buffers
5328  *
5329  * To save on memory when the tracing is never used on a system with it
5330  * configured in. The ring buffers are set to a minimum size. But once
5331  * a user starts to use the tracing facility, then they need to grow
5332  * to their default size.
5333  *
5334  * This function is to be called when a tracer is about to be used.
5335  */
5336 int tracing_update_buffers(void)
5337 {
5338         int ret = 0;
5339
5340         mutex_lock(&trace_types_lock);
5341         if (!ring_buffer_expanded)
5342                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5343                                                 RING_BUFFER_ALL_CPUS);
5344         mutex_unlock(&trace_types_lock);
5345
5346         return ret;
5347 }
5348
5349 struct trace_option_dentry;
5350
5351 static void
5352 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5353
5354 /*
5355  * Used to clear out the tracer before deletion of an instance.
5356  * Must have trace_types_lock held.
5357  */
5358 static void tracing_set_nop(struct trace_array *tr)
5359 {
5360         if (tr->current_trace == &nop_trace)
5361                 return;
5362         
5363         tr->current_trace->enabled--;
5364
5365         if (tr->current_trace->reset)
5366                 tr->current_trace->reset(tr);
5367
5368         tr->current_trace = &nop_trace;
5369 }
5370
5371 static bool tracer_options_updated;
5372
5373 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5374 {
5375         /* Only enable if the directory has been created already. */
5376         if (!tr->dir)
5377                 return;
5378
5379         /* Only create trace option files after update_tracer_options finish */
5380         if (!tracer_options_updated)
5381                 return;
5382
5383         create_trace_option_files(tr, t);
5384 }
5385
5386 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5387 {
5388         struct tracer *t;
5389 #ifdef CONFIG_TRACER_MAX_TRACE
5390         bool had_max_tr;
5391 #endif
5392         int ret = 0;
5393
5394         mutex_lock(&trace_types_lock);
5395
5396         if (!ring_buffer_expanded) {
5397                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5398                                                 RING_BUFFER_ALL_CPUS);
5399                 if (ret < 0)
5400                         goto out;
5401                 ret = 0;
5402         }
5403
5404         for (t = trace_types; t; t = t->next) {
5405                 if (strcmp(t->name, buf) == 0)
5406                         break;
5407         }
5408         if (!t) {
5409                 ret = -EINVAL;
5410                 goto out;
5411         }
5412         if (t == tr->current_trace)
5413                 goto out;
5414
5415         /* Some tracers won't work on kernel command line */
5416         if (system_state < SYSTEM_RUNNING && t->noboot) {
5417                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5418                         t->name);
5419                 goto out;
5420         }
5421
5422         /* Some tracers are only allowed for the top level buffer */
5423         if (!trace_ok_for_array(t, tr)) {
5424                 ret = -EINVAL;
5425                 goto out;
5426         }
5427
5428         /* If trace pipe files are being read, we can't change the tracer */
5429         if (tr->current_trace->ref) {
5430                 ret = -EBUSY;
5431                 goto out;
5432         }
5433
5434         trace_branch_disable();
5435
5436         tr->current_trace->enabled--;
5437
5438         if (tr->current_trace->reset)
5439                 tr->current_trace->reset(tr);
5440
5441         /* Current trace needs to be nop_trace before synchronize_sched */
5442         tr->current_trace = &nop_trace;
5443
5444 #ifdef CONFIG_TRACER_MAX_TRACE
5445         had_max_tr = tr->allocated_snapshot;
5446
5447         if (had_max_tr && !t->use_max_tr) {
5448                 /*
5449                  * We need to make sure that the update_max_tr sees that
5450                  * current_trace changed to nop_trace to keep it from
5451                  * swapping the buffers after we resize it.
5452                  * The update_max_tr is called from interrupts disabled
5453                  * so a synchronized_sched() is sufficient.
5454                  */
5455                 synchronize_sched();
5456                 free_snapshot(tr);
5457         }
5458 #endif
5459
5460 #ifdef CONFIG_TRACER_MAX_TRACE
5461         if (t->use_max_tr && !had_max_tr) {
5462                 ret = tracing_alloc_snapshot_instance(tr);
5463                 if (ret < 0)
5464                         goto out;
5465         }
5466 #endif
5467
5468         if (t->init) {
5469                 ret = tracer_init(t, tr);
5470                 if (ret)
5471                         goto out;
5472         }
5473
5474         tr->current_trace = t;
5475         tr->current_trace->enabled++;
5476         trace_branch_enable(tr);
5477  out:
5478         mutex_unlock(&trace_types_lock);
5479
5480         return ret;
5481 }
5482
5483 static ssize_t
5484 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5485                         size_t cnt, loff_t *ppos)
5486 {
5487         struct trace_array *tr = filp->private_data;
5488         char buf[MAX_TRACER_SIZE+1];
5489         int i;
5490         size_t ret;
5491         int err;
5492
5493         ret = cnt;
5494
5495         if (cnt > MAX_TRACER_SIZE)
5496                 cnt = MAX_TRACER_SIZE;
5497
5498         if (copy_from_user(buf, ubuf, cnt))
5499                 return -EFAULT;
5500
5501         buf[cnt] = 0;
5502
5503         /* strip ending whitespace. */
5504         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5505                 buf[i] = 0;
5506
5507         err = tracing_set_tracer(tr, buf);
5508         if (err)
5509                 return err;
5510
5511         *ppos += ret;
5512
5513         return ret;
5514 }
5515
5516 static ssize_t
5517 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5518                    size_t cnt, loff_t *ppos)
5519 {
5520         char buf[64];
5521         int r;
5522
5523         r = snprintf(buf, sizeof(buf), "%ld\n",
5524                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5525         if (r > sizeof(buf))
5526                 r = sizeof(buf);
5527         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5528 }
5529
5530 static ssize_t
5531 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5532                     size_t cnt, loff_t *ppos)
5533 {
5534         unsigned long val;
5535         int ret;
5536
5537         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5538         if (ret)
5539                 return ret;
5540
5541         *ptr = val * 1000;
5542
5543         return cnt;
5544 }
5545
5546 static ssize_t
5547 tracing_thresh_read(struct file *filp, char __user *ubuf,
5548                     size_t cnt, loff_t *ppos)
5549 {
5550         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5551 }
5552
5553 static ssize_t
5554 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5555                      size_t cnt, loff_t *ppos)
5556 {
5557         struct trace_array *tr = filp->private_data;
5558         int ret;
5559
5560         mutex_lock(&trace_types_lock);
5561         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5562         if (ret < 0)
5563                 goto out;
5564
5565         if (tr->current_trace->update_thresh) {
5566                 ret = tr->current_trace->update_thresh(tr);
5567                 if (ret < 0)
5568                         goto out;
5569         }
5570
5571         ret = cnt;
5572 out:
5573         mutex_unlock(&trace_types_lock);
5574
5575         return ret;
5576 }
5577
5578 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5579
5580 static ssize_t
5581 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5582                      size_t cnt, loff_t *ppos)
5583 {
5584         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5585 }
5586
5587 static ssize_t
5588 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5589                       size_t cnt, loff_t *ppos)
5590 {
5591         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5592 }
5593
5594 #endif
5595
5596 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5597 {
5598         struct trace_array *tr = inode->i_private;
5599         struct trace_iterator *iter;
5600         int ret = 0;
5601
5602         if (tracing_disabled)
5603                 return -ENODEV;
5604
5605         if (trace_array_get(tr) < 0)
5606                 return -ENODEV;
5607
5608         mutex_lock(&trace_types_lock);
5609
5610         /* create a buffer to store the information to pass to userspace */
5611         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5612         if (!iter) {
5613                 ret = -ENOMEM;
5614                 __trace_array_put(tr);
5615                 goto out;
5616         }
5617
5618         trace_seq_init(&iter->seq);
5619         iter->trace = tr->current_trace;
5620
5621         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5622                 ret = -ENOMEM;
5623                 goto fail;
5624         }
5625
5626         /* trace pipe does not show start of buffer */
5627         cpumask_setall(iter->started);
5628
5629         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5630                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5631
5632         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5633         if (trace_clocks[tr->clock_id].in_ns)
5634                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5635
5636         iter->tr = tr;
5637         iter->trace_buffer = &tr->trace_buffer;
5638         iter->cpu_file = tracing_get_cpu(inode);
5639         mutex_init(&iter->mutex);
5640         filp->private_data = iter;
5641
5642         if (iter->trace->pipe_open)
5643                 iter->trace->pipe_open(iter);
5644
5645         nonseekable_open(inode, filp);
5646
5647         tr->current_trace->ref++;
5648 out:
5649         mutex_unlock(&trace_types_lock);
5650         return ret;
5651
5652 fail:
5653         kfree(iter);
5654         __trace_array_put(tr);
5655         mutex_unlock(&trace_types_lock);
5656         return ret;
5657 }
5658
5659 static int tracing_release_pipe(struct inode *inode, struct file *file)
5660 {
5661         struct trace_iterator *iter = file->private_data;
5662         struct trace_array *tr = inode->i_private;
5663
5664         mutex_lock(&trace_types_lock);
5665
5666         tr->current_trace->ref--;
5667
5668         if (iter->trace->pipe_close)
5669                 iter->trace->pipe_close(iter);
5670
5671         mutex_unlock(&trace_types_lock);
5672
5673         free_cpumask_var(iter->started);
5674         mutex_destroy(&iter->mutex);
5675         kfree(iter);
5676
5677         trace_array_put(tr);
5678
5679         return 0;
5680 }
5681
5682 static __poll_t
5683 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5684 {
5685         struct trace_array *tr = iter->tr;
5686
5687         /* Iterators are static, they should be filled or empty */
5688         if (trace_buffer_iter(iter, iter->cpu_file))
5689                 return EPOLLIN | EPOLLRDNORM;
5690
5691         if (tr->trace_flags & TRACE_ITER_BLOCK)
5692                 /*
5693                  * Always select as readable when in blocking mode
5694                  */
5695                 return EPOLLIN | EPOLLRDNORM;
5696         else
5697                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5698                                              filp, poll_table);
5699 }
5700
5701 static __poll_t
5702 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5703 {
5704         struct trace_iterator *iter = filp->private_data;
5705
5706         return trace_poll(iter, filp, poll_table);
5707 }
5708
5709 /* Must be called with iter->mutex held. */
5710 static int tracing_wait_pipe(struct file *filp)
5711 {
5712         struct trace_iterator *iter = filp->private_data;
5713         int ret;
5714
5715         while (trace_empty(iter)) {
5716
5717                 if ((filp->f_flags & O_NONBLOCK)) {
5718                         return -EAGAIN;
5719                 }
5720
5721                 /*
5722                  * We block until we read something and tracing is disabled.
5723                  * We still block if tracing is disabled, but we have never
5724                  * read anything. This allows a user to cat this file, and
5725                  * then enable tracing. But after we have read something,
5726                  * we give an EOF when tracing is again disabled.
5727                  *
5728                  * iter->pos will be 0 if we haven't read anything.
5729                  */
5730                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5731                         break;
5732
5733                 mutex_unlock(&iter->mutex);
5734
5735                 ret = wait_on_pipe(iter, false);
5736
5737                 mutex_lock(&iter->mutex);
5738
5739                 if (ret)
5740                         return ret;
5741         }
5742
5743         return 1;
5744 }
5745
5746 /*
5747  * Consumer reader.
5748  */
5749 static ssize_t
5750 tracing_read_pipe(struct file *filp, char __user *ubuf,
5751                   size_t cnt, loff_t *ppos)
5752 {
5753         struct trace_iterator *iter = filp->private_data;
5754         ssize_t sret;
5755
5756         /*
5757          * Avoid more than one consumer on a single file descriptor
5758          * This is just a matter of traces coherency, the ring buffer itself
5759          * is protected.
5760          */
5761         mutex_lock(&iter->mutex);
5762
5763         /* return any leftover data */
5764         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5765         if (sret != -EBUSY)
5766                 goto out;
5767
5768         trace_seq_init(&iter->seq);
5769
5770         if (iter->trace->read) {
5771                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5772                 if (sret)
5773                         goto out;
5774         }
5775
5776 waitagain:
5777         sret = tracing_wait_pipe(filp);
5778         if (sret <= 0)
5779                 goto out;
5780
5781         /* stop when tracing is finished */
5782         if (trace_empty(iter)) {
5783                 sret = 0;
5784                 goto out;
5785         }
5786
5787         if (cnt >= PAGE_SIZE)
5788                 cnt = PAGE_SIZE - 1;
5789
5790         /* reset all but tr, trace, and overruns */
5791         memset(&iter->seq, 0,
5792                sizeof(struct trace_iterator) -
5793                offsetof(struct trace_iterator, seq));
5794         cpumask_clear(iter->started);
5795         trace_seq_init(&iter->seq);
5796         iter->pos = -1;
5797
5798         trace_event_read_lock();
5799         trace_access_lock(iter->cpu_file);
5800         while (trace_find_next_entry_inc(iter) != NULL) {
5801                 enum print_line_t ret;
5802                 int save_len = iter->seq.seq.len;
5803
5804                 ret = print_trace_line(iter);
5805                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5806                         /* don't print partial lines */
5807                         iter->seq.seq.len = save_len;
5808                         break;
5809                 }
5810                 if (ret != TRACE_TYPE_NO_CONSUME)
5811                         trace_consume(iter);
5812
5813                 if (trace_seq_used(&iter->seq) >= cnt)
5814                         break;
5815
5816                 /*
5817                  * Setting the full flag means we reached the trace_seq buffer
5818                  * size and we should leave by partial output condition above.
5819                  * One of the trace_seq_* functions is not used properly.
5820                  */
5821                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5822                           iter->ent->type);
5823         }
5824         trace_access_unlock(iter->cpu_file);
5825         trace_event_read_unlock();
5826
5827         /* Now copy what we have to the user */
5828         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5829         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5830                 trace_seq_init(&iter->seq);
5831
5832         /*
5833          * If there was nothing to send to user, in spite of consuming trace
5834          * entries, go back to wait for more entries.
5835          */
5836         if (sret == -EBUSY)
5837                 goto waitagain;
5838
5839 out:
5840         mutex_unlock(&iter->mutex);
5841
5842         return sret;
5843 }
5844
5845 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5846                                      unsigned int idx)
5847 {
5848         __free_page(spd->pages[idx]);
5849 }
5850
5851 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5852         .can_merge              = 0,
5853         .confirm                = generic_pipe_buf_confirm,
5854         .release                = generic_pipe_buf_release,
5855         .steal                  = generic_pipe_buf_steal,
5856         .get                    = generic_pipe_buf_get,
5857 };
5858
5859 static size_t
5860 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5861 {
5862         size_t count;
5863         int save_len;
5864         int ret;
5865
5866         /* Seq buffer is page-sized, exactly what we need. */
5867         for (;;) {
5868                 save_len = iter->seq.seq.len;
5869                 ret = print_trace_line(iter);
5870
5871                 if (trace_seq_has_overflowed(&iter->seq)) {
5872                         iter->seq.seq.len = save_len;
5873                         break;
5874                 }
5875
5876                 /*
5877                  * This should not be hit, because it should only
5878                  * be set if the iter->seq overflowed. But check it
5879                  * anyway to be safe.
5880                  */
5881                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5882                         iter->seq.seq.len = save_len;
5883                         break;
5884                 }
5885
5886                 count = trace_seq_used(&iter->seq) - save_len;
5887                 if (rem < count) {
5888                         rem = 0;
5889                         iter->seq.seq.len = save_len;
5890                         break;
5891                 }
5892
5893                 if (ret != TRACE_TYPE_NO_CONSUME)
5894                         trace_consume(iter);
5895                 rem -= count;
5896                 if (!trace_find_next_entry_inc(iter))   {
5897                         rem = 0;
5898                         iter->ent = NULL;
5899                         break;
5900                 }
5901         }
5902
5903         return rem;
5904 }
5905
5906 static ssize_t tracing_splice_read_pipe(struct file *filp,
5907                                         loff_t *ppos,
5908                                         struct pipe_inode_info *pipe,
5909                                         size_t len,
5910                                         unsigned int flags)
5911 {
5912         struct page *pages_def[PIPE_DEF_BUFFERS];
5913         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5914         struct trace_iterator *iter = filp->private_data;
5915         struct splice_pipe_desc spd = {
5916                 .pages          = pages_def,
5917                 .partial        = partial_def,
5918                 .nr_pages       = 0, /* This gets updated below. */
5919                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5920                 .ops            = &tracing_pipe_buf_ops,
5921                 .spd_release    = tracing_spd_release_pipe,
5922         };
5923         ssize_t ret;
5924         size_t rem;
5925         unsigned int i;
5926
5927         if (splice_grow_spd(pipe, &spd))
5928                 return -ENOMEM;
5929
5930         mutex_lock(&iter->mutex);
5931
5932         if (iter->trace->splice_read) {
5933                 ret = iter->trace->splice_read(iter, filp,
5934                                                ppos, pipe, len, flags);
5935                 if (ret)
5936                         goto out_err;
5937         }
5938
5939         ret = tracing_wait_pipe(filp);
5940         if (ret <= 0)
5941                 goto out_err;
5942
5943         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5944                 ret = -EFAULT;
5945                 goto out_err;
5946         }
5947
5948         trace_event_read_lock();
5949         trace_access_lock(iter->cpu_file);
5950
5951         /* Fill as many pages as possible. */
5952         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5953                 spd.pages[i] = alloc_page(GFP_KERNEL);
5954                 if (!spd.pages[i])
5955                         break;
5956
5957                 rem = tracing_fill_pipe_page(rem, iter);
5958
5959                 /* Copy the data into the page, so we can start over. */
5960                 ret = trace_seq_to_buffer(&iter->seq,
5961                                           page_address(spd.pages[i]),
5962                                           trace_seq_used(&iter->seq));
5963                 if (ret < 0) {
5964                         __free_page(spd.pages[i]);
5965                         break;
5966                 }
5967                 spd.partial[i].offset = 0;
5968                 spd.partial[i].len = trace_seq_used(&iter->seq);
5969
5970                 trace_seq_init(&iter->seq);
5971         }
5972
5973         trace_access_unlock(iter->cpu_file);
5974         trace_event_read_unlock();
5975         mutex_unlock(&iter->mutex);
5976
5977         spd.nr_pages = i;
5978
5979         if (i)
5980                 ret = splice_to_pipe(pipe, &spd);
5981         else
5982                 ret = 0;
5983 out:
5984         splice_shrink_spd(&spd);
5985         return ret;
5986
5987 out_err:
5988         mutex_unlock(&iter->mutex);
5989         goto out;
5990 }
5991
5992 static ssize_t
5993 tracing_entries_read(struct file *filp, char __user *ubuf,
5994                      size_t cnt, loff_t *ppos)
5995 {
5996         struct inode *inode = file_inode(filp);
5997         struct trace_array *tr = inode->i_private;
5998         int cpu = tracing_get_cpu(inode);
5999         char buf[64];
6000         int r = 0;
6001         ssize_t ret;
6002
6003         mutex_lock(&trace_types_lock);
6004
6005         if (cpu == RING_BUFFER_ALL_CPUS) {
6006                 int cpu, buf_size_same;
6007                 unsigned long size;
6008
6009                 size = 0;
6010                 buf_size_same = 1;
6011                 /* check if all cpu sizes are same */
6012                 for_each_tracing_cpu(cpu) {
6013                         /* fill in the size from first enabled cpu */
6014                         if (size == 0)
6015                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6016                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6017                                 buf_size_same = 0;
6018                                 break;
6019                         }
6020                 }
6021
6022                 if (buf_size_same) {
6023                         if (!ring_buffer_expanded)
6024                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6025                                             size >> 10,
6026                                             trace_buf_size >> 10);
6027                         else
6028                                 r = sprintf(buf, "%lu\n", size >> 10);
6029                 } else
6030                         r = sprintf(buf, "X\n");
6031         } else
6032                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6033
6034         mutex_unlock(&trace_types_lock);
6035
6036         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6037         return ret;
6038 }
6039
6040 static ssize_t
6041 tracing_entries_write(struct file *filp, const char __user *ubuf,
6042                       size_t cnt, loff_t *ppos)
6043 {
6044         struct inode *inode = file_inode(filp);
6045         struct trace_array *tr = inode->i_private;
6046         unsigned long val;
6047         int ret;
6048
6049         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6050         if (ret)
6051                 return ret;
6052
6053         /* must have at least 1 entry */
6054         if (!val)
6055                 return -EINVAL;
6056
6057         /* value is in KB */
6058         val <<= 10;
6059         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6060         if (ret < 0)
6061                 return ret;
6062
6063         *ppos += cnt;
6064
6065         return cnt;
6066 }
6067
6068 static ssize_t
6069 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6070                                 size_t cnt, loff_t *ppos)
6071 {
6072         struct trace_array *tr = filp->private_data;
6073         char buf[64];
6074         int r, cpu;
6075         unsigned long size = 0, expanded_size = 0;
6076
6077         mutex_lock(&trace_types_lock);
6078         for_each_tracing_cpu(cpu) {
6079                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6080                 if (!ring_buffer_expanded)
6081                         expanded_size += trace_buf_size >> 10;
6082         }
6083         if (ring_buffer_expanded)
6084                 r = sprintf(buf, "%lu\n", size);
6085         else
6086                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6087         mutex_unlock(&trace_types_lock);
6088
6089         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6090 }
6091
6092 static ssize_t
6093 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6094                           size_t cnt, loff_t *ppos)
6095 {
6096         /*
6097          * There is no need to read what the user has written, this function
6098          * is just to make sure that there is no error when "echo" is used
6099          */
6100
6101         *ppos += cnt;
6102
6103         return cnt;
6104 }
6105
6106 static int
6107 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6108 {
6109         struct trace_array *tr = inode->i_private;
6110
6111         /* disable tracing ? */
6112         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6113                 tracer_tracing_off(tr);
6114         /* resize the ring buffer to 0 */
6115         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6116
6117         trace_array_put(tr);
6118
6119         return 0;
6120 }
6121
6122 static ssize_t
6123 tracing_mark_write(struct file *filp, const char __user *ubuf,
6124                                         size_t cnt, loff_t *fpos)
6125 {
6126         struct trace_array *tr = filp->private_data;
6127         struct ring_buffer_event *event;
6128         enum event_trigger_type tt = ETT_NONE;
6129         struct ring_buffer *buffer;
6130         struct print_entry *entry;
6131         unsigned long irq_flags;
6132         const char faulted[] = "<faulted>";
6133         ssize_t written;
6134         int size;
6135         int len;
6136
6137 /* Used in tracing_mark_raw_write() as well */
6138 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6139
6140         if (tracing_disabled)
6141                 return -EINVAL;
6142
6143         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6144                 return -EINVAL;
6145
6146         if (cnt > TRACE_BUF_SIZE)
6147                 cnt = TRACE_BUF_SIZE;
6148
6149         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6150
6151         local_save_flags(irq_flags);
6152         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6153
6154         /* If less than "<faulted>", then make sure we can still add that */
6155         if (cnt < FAULTED_SIZE)
6156                 size += FAULTED_SIZE - cnt;
6157
6158         buffer = tr->trace_buffer.buffer;
6159         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6160                                             irq_flags, preempt_count());
6161         if (unlikely(!event))
6162                 /* Ring buffer disabled, return as if not open for write */
6163                 return -EBADF;
6164
6165         entry = ring_buffer_event_data(event);
6166         entry->ip = _THIS_IP_;
6167
6168         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6169         if (len) {
6170                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6171                 cnt = FAULTED_SIZE;
6172                 written = -EFAULT;
6173         } else
6174                 written = cnt;
6175         len = cnt;
6176
6177         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6178                 /* do not add \n before testing triggers, but add \0 */
6179                 entry->buf[cnt] = '\0';
6180                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6181         }
6182
6183         if (entry->buf[cnt - 1] != '\n') {
6184                 entry->buf[cnt] = '\n';
6185                 entry->buf[cnt + 1] = '\0';
6186         } else
6187                 entry->buf[cnt] = '\0';
6188
6189         __buffer_unlock_commit(buffer, event);
6190
6191         if (tt)
6192                 event_triggers_post_call(tr->trace_marker_file, tt);
6193
6194         if (written > 0)
6195                 *fpos += written;
6196
6197         return written;
6198 }
6199
6200 /* Limit it for now to 3K (including tag) */
6201 #define RAW_DATA_MAX_SIZE (1024*3)
6202
6203 static ssize_t
6204 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6205                                         size_t cnt, loff_t *fpos)
6206 {
6207         struct trace_array *tr = filp->private_data;
6208         struct ring_buffer_event *event;
6209         struct ring_buffer *buffer;
6210         struct raw_data_entry *entry;
6211         const char faulted[] = "<faulted>";
6212         unsigned long irq_flags;
6213         ssize_t written;
6214         int size;
6215         int len;
6216
6217 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6218
6219         if (tracing_disabled)
6220                 return -EINVAL;
6221
6222         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6223                 return -EINVAL;
6224
6225         /* The marker must at least have a tag id */
6226         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6227                 return -EINVAL;
6228
6229         if (cnt > TRACE_BUF_SIZE)
6230                 cnt = TRACE_BUF_SIZE;
6231
6232         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6233
6234         local_save_flags(irq_flags);
6235         size = sizeof(*entry) + cnt;
6236         if (cnt < FAULT_SIZE_ID)
6237                 size += FAULT_SIZE_ID - cnt;
6238
6239         buffer = tr->trace_buffer.buffer;
6240         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6241                                             irq_flags, preempt_count());
6242         if (!event)
6243                 /* Ring buffer disabled, return as if not open for write */
6244                 return -EBADF;
6245
6246         entry = ring_buffer_event_data(event);
6247
6248         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6249         if (len) {
6250                 entry->id = -1;
6251                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6252                 written = -EFAULT;
6253         } else
6254                 written = cnt;
6255
6256         __buffer_unlock_commit(buffer, event);
6257
6258         if (written > 0)
6259                 *fpos += written;
6260
6261         return written;
6262 }
6263
6264 static int tracing_clock_show(struct seq_file *m, void *v)
6265 {
6266         struct trace_array *tr = m->private;
6267         int i;
6268
6269         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6270                 seq_printf(m,
6271                         "%s%s%s%s", i ? " " : "",
6272                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6273                         i == tr->clock_id ? "]" : "");
6274         seq_putc(m, '\n');
6275
6276         return 0;
6277 }
6278
6279 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6280 {
6281         int i;
6282
6283         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6284                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6285                         break;
6286         }
6287         if (i == ARRAY_SIZE(trace_clocks))
6288                 return -EINVAL;
6289
6290         mutex_lock(&trace_types_lock);
6291
6292         tr->clock_id = i;
6293
6294         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6295
6296         /*
6297          * New clock may not be consistent with the previous clock.
6298          * Reset the buffer so that it doesn't have incomparable timestamps.
6299          */
6300         tracing_reset_online_cpus(&tr->trace_buffer);
6301
6302 #ifdef CONFIG_TRACER_MAX_TRACE
6303         if (tr->max_buffer.buffer)
6304                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6305         tracing_reset_online_cpus(&tr->max_buffer);
6306 #endif
6307
6308         mutex_unlock(&trace_types_lock);
6309
6310         return 0;
6311 }
6312
6313 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6314                                    size_t cnt, loff_t *fpos)
6315 {
6316         struct seq_file *m = filp->private_data;
6317         struct trace_array *tr = m->private;
6318         char buf[64];
6319         const char *clockstr;
6320         int ret;
6321
6322         if (cnt >= sizeof(buf))
6323                 return -EINVAL;
6324
6325         if (copy_from_user(buf, ubuf, cnt))
6326                 return -EFAULT;
6327
6328         buf[cnt] = 0;
6329
6330         clockstr = strstrip(buf);
6331
6332         ret = tracing_set_clock(tr, clockstr);
6333         if (ret)
6334                 return ret;
6335
6336         *fpos += cnt;
6337
6338         return cnt;
6339 }
6340
6341 static int tracing_clock_open(struct inode *inode, struct file *file)
6342 {
6343         struct trace_array *tr = inode->i_private;
6344         int ret;
6345
6346         if (tracing_disabled)
6347                 return -ENODEV;
6348
6349         if (trace_array_get(tr))
6350                 return -ENODEV;
6351
6352         ret = single_open(file, tracing_clock_show, inode->i_private);
6353         if (ret < 0)
6354                 trace_array_put(tr);
6355
6356         return ret;
6357 }
6358
6359 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6360 {
6361         struct trace_array *tr = m->private;
6362
6363         mutex_lock(&trace_types_lock);
6364
6365         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6366                 seq_puts(m, "delta [absolute]\n");
6367         else
6368                 seq_puts(m, "[delta] absolute\n");
6369
6370         mutex_unlock(&trace_types_lock);
6371
6372         return 0;
6373 }
6374
6375 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6376 {
6377         struct trace_array *tr = inode->i_private;
6378         int ret;
6379
6380         if (tracing_disabled)
6381                 return -ENODEV;
6382
6383         if (trace_array_get(tr))
6384                 return -ENODEV;
6385
6386         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6387         if (ret < 0)
6388                 trace_array_put(tr);
6389
6390         return ret;
6391 }
6392
6393 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6394 {
6395         int ret = 0;
6396
6397         mutex_lock(&trace_types_lock);
6398
6399         if (abs && tr->time_stamp_abs_ref++)
6400                 goto out;
6401
6402         if (!abs) {
6403                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6404                         ret = -EINVAL;
6405                         goto out;
6406                 }
6407
6408                 if (--tr->time_stamp_abs_ref)
6409                         goto out;
6410         }
6411
6412         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6413
6414 #ifdef CONFIG_TRACER_MAX_TRACE
6415         if (tr->max_buffer.buffer)
6416                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6417 #endif
6418  out:
6419         mutex_unlock(&trace_types_lock);
6420
6421         return ret;
6422 }
6423
6424 struct ftrace_buffer_info {
6425         struct trace_iterator   iter;
6426         void                    *spare;
6427         unsigned int            spare_cpu;
6428         unsigned int            read;
6429 };
6430
6431 #ifdef CONFIG_TRACER_SNAPSHOT
6432 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6433 {
6434         struct trace_array *tr = inode->i_private;
6435         struct trace_iterator *iter;
6436         struct seq_file *m;
6437         int ret = 0;
6438
6439         if (trace_array_get(tr) < 0)
6440                 return -ENODEV;
6441
6442         if (file->f_mode & FMODE_READ) {
6443                 iter = __tracing_open(inode, file, true);
6444                 if (IS_ERR(iter))
6445                         ret = PTR_ERR(iter);
6446         } else {
6447                 /* Writes still need the seq_file to hold the private data */
6448                 ret = -ENOMEM;
6449                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6450                 if (!m)
6451                         goto out;
6452                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6453                 if (!iter) {
6454                         kfree(m);
6455                         goto out;
6456                 }
6457                 ret = 0;
6458
6459                 iter->tr = tr;
6460                 iter->trace_buffer = &tr->max_buffer;
6461                 iter->cpu_file = tracing_get_cpu(inode);
6462                 m->private = iter;
6463                 file->private_data = m;
6464         }
6465 out:
6466         if (ret < 0)
6467                 trace_array_put(tr);
6468
6469         return ret;
6470 }
6471
6472 static ssize_t
6473 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6474                        loff_t *ppos)
6475 {
6476         struct seq_file *m = filp->private_data;
6477         struct trace_iterator *iter = m->private;
6478         struct trace_array *tr = iter->tr;
6479         unsigned long val;
6480         int ret;
6481
6482         ret = tracing_update_buffers();
6483         if (ret < 0)
6484                 return ret;
6485
6486         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6487         if (ret)
6488                 return ret;
6489
6490         mutex_lock(&trace_types_lock);
6491
6492         if (tr->current_trace->use_max_tr) {
6493                 ret = -EBUSY;
6494                 goto out;
6495         }
6496
6497         switch (val) {
6498         case 0:
6499                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6500                         ret = -EINVAL;
6501                         break;
6502                 }
6503                 if (tr->allocated_snapshot)
6504                         free_snapshot(tr);
6505                 break;
6506         case 1:
6507 /* Only allow per-cpu swap if the ring buffer supports it */
6508 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6509                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6510                         ret = -EINVAL;
6511                         break;
6512                 }
6513 #endif
6514                 if (tr->allocated_snapshot)
6515                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6516                                         &tr->trace_buffer, iter->cpu_file);
6517                 else
6518                         ret = tracing_alloc_snapshot_instance(tr);
6519                 if (ret < 0)
6520                         break;
6521                 local_irq_disable();
6522                 /* Now, we're going to swap */
6523                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6524                         update_max_tr(tr, current, smp_processor_id());
6525                 else
6526                         update_max_tr_single(tr, current, iter->cpu_file);
6527                 local_irq_enable();
6528                 break;
6529         default:
6530                 if (tr->allocated_snapshot) {
6531                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6532                                 tracing_reset_online_cpus(&tr->max_buffer);
6533                         else
6534                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6535                 }
6536                 break;
6537         }
6538
6539         if (ret >= 0) {
6540                 *ppos += cnt;
6541                 ret = cnt;
6542         }
6543 out:
6544         mutex_unlock(&trace_types_lock);
6545         return ret;
6546 }
6547
6548 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6549 {
6550         struct seq_file *m = file->private_data;
6551         int ret;
6552
6553         ret = tracing_release(inode, file);
6554
6555         if (file->f_mode & FMODE_READ)
6556                 return ret;
6557
6558         /* If write only, the seq_file is just a stub */
6559         if (m)
6560                 kfree(m->private);
6561         kfree(m);
6562
6563         return 0;
6564 }
6565
6566 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6567 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6568                                     size_t count, loff_t *ppos);
6569 static int tracing_buffers_release(struct inode *inode, struct file *file);
6570 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6571                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6572
6573 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6574 {
6575         struct ftrace_buffer_info *info;
6576         int ret;
6577
6578         ret = tracing_buffers_open(inode, filp);
6579         if (ret < 0)
6580                 return ret;
6581
6582         info = filp->private_data;
6583
6584         if (info->iter.trace->use_max_tr) {
6585                 tracing_buffers_release(inode, filp);
6586                 return -EBUSY;
6587         }
6588
6589         info->iter.snapshot = true;
6590         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6591
6592         return ret;
6593 }
6594
6595 #endif /* CONFIG_TRACER_SNAPSHOT */
6596
6597
6598 static const struct file_operations tracing_thresh_fops = {
6599         .open           = tracing_open_generic,
6600         .read           = tracing_thresh_read,
6601         .write          = tracing_thresh_write,
6602         .llseek         = generic_file_llseek,
6603 };
6604
6605 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6606 static const struct file_operations tracing_max_lat_fops = {
6607         .open           = tracing_open_generic,
6608         .read           = tracing_max_lat_read,
6609         .write          = tracing_max_lat_write,
6610         .llseek         = generic_file_llseek,
6611 };
6612 #endif
6613
6614 static const struct file_operations set_tracer_fops = {
6615         .open           = tracing_open_generic,
6616         .read           = tracing_set_trace_read,
6617         .write          = tracing_set_trace_write,
6618         .llseek         = generic_file_llseek,
6619 };
6620
6621 static const struct file_operations tracing_pipe_fops = {
6622         .open           = tracing_open_pipe,
6623         .poll           = tracing_poll_pipe,
6624         .read           = tracing_read_pipe,
6625         .splice_read    = tracing_splice_read_pipe,
6626         .release        = tracing_release_pipe,
6627         .llseek         = no_llseek,
6628 };
6629
6630 static const struct file_operations tracing_entries_fops = {
6631         .open           = tracing_open_generic_tr,
6632         .read           = tracing_entries_read,
6633         .write          = tracing_entries_write,
6634         .llseek         = generic_file_llseek,
6635         .release        = tracing_release_generic_tr,
6636 };
6637
6638 static const struct file_operations tracing_total_entries_fops = {
6639         .open           = tracing_open_generic_tr,
6640         .read           = tracing_total_entries_read,
6641         .llseek         = generic_file_llseek,
6642         .release        = tracing_release_generic_tr,
6643 };
6644
6645 static const struct file_operations tracing_free_buffer_fops = {
6646         .open           = tracing_open_generic_tr,
6647         .write          = tracing_free_buffer_write,
6648         .release        = tracing_free_buffer_release,
6649 };
6650
6651 static const struct file_operations tracing_mark_fops = {
6652         .open           = tracing_open_generic_tr,
6653         .write          = tracing_mark_write,
6654         .llseek         = generic_file_llseek,
6655         .release        = tracing_release_generic_tr,
6656 };
6657
6658 static const struct file_operations tracing_mark_raw_fops = {
6659         .open           = tracing_open_generic_tr,
6660         .write          = tracing_mark_raw_write,
6661         .llseek         = generic_file_llseek,
6662         .release        = tracing_release_generic_tr,
6663 };
6664
6665 static const struct file_operations trace_clock_fops = {
6666         .open           = tracing_clock_open,
6667         .read           = seq_read,
6668         .llseek         = seq_lseek,
6669         .release        = tracing_single_release_tr,
6670         .write          = tracing_clock_write,
6671 };
6672
6673 static const struct file_operations trace_time_stamp_mode_fops = {
6674         .open           = tracing_time_stamp_mode_open,
6675         .read           = seq_read,
6676         .llseek         = seq_lseek,
6677         .release        = tracing_single_release_tr,
6678 };
6679
6680 #ifdef CONFIG_TRACER_SNAPSHOT
6681 static const struct file_operations snapshot_fops = {
6682         .open           = tracing_snapshot_open,
6683         .read           = seq_read,
6684         .write          = tracing_snapshot_write,
6685         .llseek         = tracing_lseek,
6686         .release        = tracing_snapshot_release,
6687 };
6688
6689 static const struct file_operations snapshot_raw_fops = {
6690         .open           = snapshot_raw_open,
6691         .read           = tracing_buffers_read,
6692         .release        = tracing_buffers_release,
6693         .splice_read    = tracing_buffers_splice_read,
6694         .llseek         = no_llseek,
6695 };
6696
6697 #endif /* CONFIG_TRACER_SNAPSHOT */
6698
6699 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6700 {
6701         struct trace_array *tr = inode->i_private;
6702         struct ftrace_buffer_info *info;
6703         int ret;
6704
6705         if (tracing_disabled)
6706                 return -ENODEV;
6707
6708         if (trace_array_get(tr) < 0)
6709                 return -ENODEV;
6710
6711         info = kzalloc(sizeof(*info), GFP_KERNEL);
6712         if (!info) {
6713                 trace_array_put(tr);
6714                 return -ENOMEM;
6715         }
6716
6717         mutex_lock(&trace_types_lock);
6718
6719         info->iter.tr           = tr;
6720         info->iter.cpu_file     = tracing_get_cpu(inode);
6721         info->iter.trace        = tr->current_trace;
6722         info->iter.trace_buffer = &tr->trace_buffer;
6723         info->spare             = NULL;
6724         /* Force reading ring buffer for first read */
6725         info->read              = (unsigned int)-1;
6726
6727         filp->private_data = info;
6728
6729         tr->current_trace->ref++;
6730
6731         mutex_unlock(&trace_types_lock);
6732
6733         ret = nonseekable_open(inode, filp);
6734         if (ret < 0)
6735                 trace_array_put(tr);
6736
6737         return ret;
6738 }
6739
6740 static __poll_t
6741 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6742 {
6743         struct ftrace_buffer_info *info = filp->private_data;
6744         struct trace_iterator *iter = &info->iter;
6745
6746         return trace_poll(iter, filp, poll_table);
6747 }
6748
6749 static ssize_t
6750 tracing_buffers_read(struct file *filp, char __user *ubuf,
6751                      size_t count, loff_t *ppos)
6752 {
6753         struct ftrace_buffer_info *info = filp->private_data;
6754         struct trace_iterator *iter = &info->iter;
6755         ssize_t ret = 0;
6756         ssize_t size;
6757
6758         if (!count)
6759                 return 0;
6760
6761 #ifdef CONFIG_TRACER_MAX_TRACE
6762         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6763                 return -EBUSY;
6764 #endif
6765
6766         if (!info->spare) {
6767                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6768                                                           iter->cpu_file);
6769                 if (IS_ERR(info->spare)) {
6770                         ret = PTR_ERR(info->spare);
6771                         info->spare = NULL;
6772                 } else {
6773                         info->spare_cpu = iter->cpu_file;
6774                 }
6775         }
6776         if (!info->spare)
6777                 return ret;
6778
6779         /* Do we have previous read data to read? */
6780         if (info->read < PAGE_SIZE)
6781                 goto read;
6782
6783  again:
6784         trace_access_lock(iter->cpu_file);
6785         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6786                                     &info->spare,
6787                                     count,
6788                                     iter->cpu_file, 0);
6789         trace_access_unlock(iter->cpu_file);
6790
6791         if (ret < 0) {
6792                 if (trace_empty(iter)) {
6793                         if ((filp->f_flags & O_NONBLOCK))
6794                                 return -EAGAIN;
6795
6796                         ret = wait_on_pipe(iter, false);
6797                         if (ret)
6798                                 return ret;
6799
6800                         goto again;
6801                 }
6802                 return 0;
6803         }
6804
6805         info->read = 0;
6806  read:
6807         size = PAGE_SIZE - info->read;
6808         if (size > count)
6809                 size = count;
6810
6811         ret = copy_to_user(ubuf, info->spare + info->read, size);
6812         if (ret == size)
6813                 return -EFAULT;
6814
6815         size -= ret;
6816
6817         *ppos += size;
6818         info->read += size;
6819
6820         return size;
6821 }
6822
6823 static int tracing_buffers_release(struct inode *inode, struct file *file)
6824 {
6825         struct ftrace_buffer_info *info = file->private_data;
6826         struct trace_iterator *iter = &info->iter;
6827
6828         mutex_lock(&trace_types_lock);
6829
6830         iter->tr->current_trace->ref--;
6831
6832         __trace_array_put(iter->tr);
6833
6834         if (info->spare)
6835                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6836                                            info->spare_cpu, info->spare);
6837         kfree(info);
6838
6839         mutex_unlock(&trace_types_lock);
6840
6841         return 0;
6842 }
6843
6844 struct buffer_ref {
6845         struct ring_buffer      *buffer;
6846         void                    *page;
6847         int                     cpu;
6848         refcount_t              refcount;
6849 };
6850
6851 static void buffer_ref_release(struct buffer_ref *ref)
6852 {
6853         if (!refcount_dec_and_test(&ref->refcount))
6854                 return;
6855         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6856         kfree(ref);
6857 }
6858
6859 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6860                                     struct pipe_buffer *buf)
6861 {
6862         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6863
6864         buffer_ref_release(ref);
6865         buf->private = 0;
6866 }
6867
6868 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6869                                 struct pipe_buffer *buf)
6870 {
6871         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6872
6873         if (refcount_read(&ref->refcount) > INT_MAX/2)
6874                 return false;
6875
6876         refcount_inc(&ref->refcount);
6877         return true;
6878 }
6879
6880 /* Pipe buffer operations for a buffer. */
6881 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6882         .can_merge              = 0,
6883         .confirm                = generic_pipe_buf_confirm,
6884         .release                = buffer_pipe_buf_release,
6885         .steal                  = generic_pipe_buf_nosteal,
6886         .get                    = buffer_pipe_buf_get,
6887 };
6888
6889 /*
6890  * Callback from splice_to_pipe(), if we need to release some pages
6891  * at the end of the spd in case we error'ed out in filling the pipe.
6892  */
6893 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6894 {
6895         struct buffer_ref *ref =
6896                 (struct buffer_ref *)spd->partial[i].private;
6897
6898         buffer_ref_release(ref);
6899         spd->partial[i].private = 0;
6900 }
6901
6902 static ssize_t
6903 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6904                             struct pipe_inode_info *pipe, size_t len,
6905                             unsigned int flags)
6906 {
6907         struct ftrace_buffer_info *info = file->private_data;
6908         struct trace_iterator *iter = &info->iter;
6909         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6910         struct page *pages_def[PIPE_DEF_BUFFERS];
6911         struct splice_pipe_desc spd = {
6912                 .pages          = pages_def,
6913                 .partial        = partial_def,
6914                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6915                 .ops            = &buffer_pipe_buf_ops,
6916                 .spd_release    = buffer_spd_release,
6917         };
6918         struct buffer_ref *ref;
6919         int entries, i;
6920         ssize_t ret = 0;
6921
6922 #ifdef CONFIG_TRACER_MAX_TRACE
6923         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6924                 return -EBUSY;
6925 #endif
6926
6927         if (*ppos & (PAGE_SIZE - 1))
6928                 return -EINVAL;
6929
6930         if (len & (PAGE_SIZE - 1)) {
6931                 if (len < PAGE_SIZE)
6932                         return -EINVAL;
6933                 len &= PAGE_MASK;
6934         }
6935
6936         if (splice_grow_spd(pipe, &spd))
6937                 return -ENOMEM;
6938
6939  again:
6940         trace_access_lock(iter->cpu_file);
6941         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6942
6943         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6944                 struct page *page;
6945                 int r;
6946
6947                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6948                 if (!ref) {
6949                         ret = -ENOMEM;
6950                         break;
6951                 }
6952
6953                 refcount_set(&ref->refcount, 1);
6954                 ref->buffer = iter->trace_buffer->buffer;
6955                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6956                 if (IS_ERR(ref->page)) {
6957                         ret = PTR_ERR(ref->page);
6958                         ref->page = NULL;
6959                         kfree(ref);
6960                         break;
6961                 }
6962                 ref->cpu = iter->cpu_file;
6963
6964                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6965                                           len, iter->cpu_file, 1);
6966                 if (r < 0) {
6967                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6968                                                    ref->page);
6969                         kfree(ref);
6970                         break;
6971                 }
6972
6973                 page = virt_to_page(ref->page);
6974
6975                 spd.pages[i] = page;
6976                 spd.partial[i].len = PAGE_SIZE;
6977                 spd.partial[i].offset = 0;
6978                 spd.partial[i].private = (unsigned long)ref;
6979                 spd.nr_pages++;
6980                 *ppos += PAGE_SIZE;
6981
6982                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6983         }
6984
6985         trace_access_unlock(iter->cpu_file);
6986         spd.nr_pages = i;
6987
6988         /* did we read anything? */
6989         if (!spd.nr_pages) {
6990                 if (ret)
6991                         goto out;
6992
6993                 ret = -EAGAIN;
6994                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6995                         goto out;
6996
6997                 ret = wait_on_pipe(iter, true);
6998                 if (ret)
6999                         goto out;
7000
7001                 goto again;
7002         }
7003
7004         ret = splice_to_pipe(pipe, &spd);
7005 out:
7006         splice_shrink_spd(&spd);
7007
7008         return ret;
7009 }
7010
7011 static const struct file_operations tracing_buffers_fops = {
7012         .open           = tracing_buffers_open,
7013         .read           = tracing_buffers_read,
7014         .poll           = tracing_buffers_poll,
7015         .release        = tracing_buffers_release,
7016         .splice_read    = tracing_buffers_splice_read,
7017         .llseek         = no_llseek,
7018 };
7019
7020 static ssize_t
7021 tracing_stats_read(struct file *filp, char __user *ubuf,
7022                    size_t count, loff_t *ppos)
7023 {
7024         struct inode *inode = file_inode(filp);
7025         struct trace_array *tr = inode->i_private;
7026         struct trace_buffer *trace_buf = &tr->trace_buffer;
7027         int cpu = tracing_get_cpu(inode);
7028         struct trace_seq *s;
7029         unsigned long cnt;
7030         unsigned long long t;
7031         unsigned long usec_rem;
7032
7033         s = kmalloc(sizeof(*s), GFP_KERNEL);
7034         if (!s)
7035                 return -ENOMEM;
7036
7037         trace_seq_init(s);
7038
7039         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7040         trace_seq_printf(s, "entries: %ld\n", cnt);
7041
7042         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7043         trace_seq_printf(s, "overrun: %ld\n", cnt);
7044
7045         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7046         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7047
7048         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7049         trace_seq_printf(s, "bytes: %ld\n", cnt);
7050
7051         if (trace_clocks[tr->clock_id].in_ns) {
7052                 /* local or global for trace_clock */
7053                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7054                 usec_rem = do_div(t, USEC_PER_SEC);
7055                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7056                                                                 t, usec_rem);
7057
7058                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7059                 usec_rem = do_div(t, USEC_PER_SEC);
7060                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7061         } else {
7062                 /* counter or tsc mode for trace_clock */
7063                 trace_seq_printf(s, "oldest event ts: %llu\n",
7064                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7065
7066                 trace_seq_printf(s, "now ts: %llu\n",
7067                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7068         }
7069
7070         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7071         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7072
7073         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7074         trace_seq_printf(s, "read events: %ld\n", cnt);
7075
7076         count = simple_read_from_buffer(ubuf, count, ppos,
7077                                         s->buffer, trace_seq_used(s));
7078
7079         kfree(s);
7080
7081         return count;
7082 }
7083
7084 static const struct file_operations tracing_stats_fops = {
7085         .open           = tracing_open_generic_tr,
7086         .read           = tracing_stats_read,
7087         .llseek         = generic_file_llseek,
7088         .release        = tracing_release_generic_tr,
7089 };
7090
7091 #ifdef CONFIG_DYNAMIC_FTRACE
7092
7093 static ssize_t
7094 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7095                   size_t cnt, loff_t *ppos)
7096 {
7097         unsigned long *p = filp->private_data;
7098         char buf[64]; /* Not too big for a shallow stack */
7099         int r;
7100
7101         r = scnprintf(buf, 63, "%ld", *p);
7102         buf[r++] = '\n';
7103
7104         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7105 }
7106
7107 static const struct file_operations tracing_dyn_info_fops = {
7108         .open           = tracing_open_generic,
7109         .read           = tracing_read_dyn_info,
7110         .llseek         = generic_file_llseek,
7111 };
7112 #endif /* CONFIG_DYNAMIC_FTRACE */
7113
7114 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7115 static void
7116 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7117                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7118                 void *data)
7119 {
7120         tracing_snapshot_instance(tr);
7121 }
7122
7123 static void
7124 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7125                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7126                       void *data)
7127 {
7128         struct ftrace_func_mapper *mapper = data;
7129         long *count = NULL;
7130
7131         if (mapper)
7132                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7133
7134         if (count) {
7135
7136                 if (*count <= 0)
7137                         return;
7138
7139                 (*count)--;
7140         }
7141
7142         tracing_snapshot_instance(tr);
7143 }
7144
7145 static int
7146 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7147                       struct ftrace_probe_ops *ops, void *data)
7148 {
7149         struct ftrace_func_mapper *mapper = data;
7150         long *count = NULL;
7151
7152         seq_printf(m, "%ps:", (void *)ip);
7153
7154         seq_puts(m, "snapshot");
7155
7156         if (mapper)
7157                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7158
7159         if (count)
7160                 seq_printf(m, ":count=%ld\n", *count);
7161         else
7162                 seq_puts(m, ":unlimited\n");
7163
7164         return 0;
7165 }
7166
7167 static int
7168 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7169                      unsigned long ip, void *init_data, void **data)
7170 {
7171         struct ftrace_func_mapper *mapper = *data;
7172
7173         if (!mapper) {
7174                 mapper = allocate_ftrace_func_mapper();
7175                 if (!mapper)
7176                         return -ENOMEM;
7177                 *data = mapper;
7178         }
7179
7180         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7181 }
7182
7183 static void
7184 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7185                      unsigned long ip, void *data)
7186 {
7187         struct ftrace_func_mapper *mapper = data;
7188
7189         if (!ip) {
7190                 if (!mapper)
7191                         return;
7192                 free_ftrace_func_mapper(mapper, NULL);
7193                 return;
7194         }
7195
7196         ftrace_func_mapper_remove_ip(mapper, ip);
7197 }
7198
7199 static struct ftrace_probe_ops snapshot_probe_ops = {
7200         .func                   = ftrace_snapshot,
7201         .print                  = ftrace_snapshot_print,
7202 };
7203
7204 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7205         .func                   = ftrace_count_snapshot,
7206         .print                  = ftrace_snapshot_print,
7207         .init                   = ftrace_snapshot_init,
7208         .free                   = ftrace_snapshot_free,
7209 };
7210
7211 static int
7212 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7213                                char *glob, char *cmd, char *param, int enable)
7214 {
7215         struct ftrace_probe_ops *ops;
7216         void *count = (void *)-1;
7217         char *number;
7218         int ret;
7219
7220         if (!tr)
7221                 return -ENODEV;
7222
7223         /* hash funcs only work with set_ftrace_filter */
7224         if (!enable)
7225                 return -EINVAL;
7226
7227         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7228
7229         if (glob[0] == '!')
7230                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7231
7232         if (!param)
7233                 goto out_reg;
7234
7235         number = strsep(&param, ":");
7236
7237         if (!strlen(number))
7238                 goto out_reg;
7239
7240         /*
7241          * We use the callback data field (which is a pointer)
7242          * as our counter.
7243          */
7244         ret = kstrtoul(number, 0, (unsigned long *)&count);
7245         if (ret)
7246                 return ret;
7247
7248  out_reg:
7249         ret = tracing_alloc_snapshot_instance(tr);
7250         if (ret < 0)
7251                 goto out;
7252
7253         ret = register_ftrace_function_probe(glob, tr, ops, count);
7254
7255  out:
7256         return ret < 0 ? ret : 0;
7257 }
7258
7259 static struct ftrace_func_command ftrace_snapshot_cmd = {
7260         .name                   = "snapshot",
7261         .func                   = ftrace_trace_snapshot_callback,
7262 };
7263
7264 static __init int register_snapshot_cmd(void)
7265 {
7266         return register_ftrace_command(&ftrace_snapshot_cmd);
7267 }
7268 #else
7269 static inline __init int register_snapshot_cmd(void) { return 0; }
7270 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7271
7272 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7273 {
7274         if (WARN_ON(!tr->dir))
7275                 return ERR_PTR(-ENODEV);
7276
7277         /* Top directory uses NULL as the parent */
7278         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7279                 return NULL;
7280
7281         /* All sub buffers have a descriptor */
7282         return tr->dir;
7283 }
7284
7285 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7286 {
7287         struct dentry *d_tracer;
7288
7289         if (tr->percpu_dir)
7290                 return tr->percpu_dir;
7291
7292         d_tracer = tracing_get_dentry(tr);
7293         if (IS_ERR(d_tracer))
7294                 return NULL;
7295
7296         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7297
7298         WARN_ONCE(!tr->percpu_dir,
7299                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7300
7301         return tr->percpu_dir;
7302 }
7303
7304 static struct dentry *
7305 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7306                       void *data, long cpu, const struct file_operations *fops)
7307 {
7308         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7309
7310         if (ret) /* See tracing_get_cpu() */
7311                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7312         return ret;
7313 }
7314
7315 static void
7316 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7317 {
7318         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7319         struct dentry *d_cpu;
7320         char cpu_dir[30]; /* 30 characters should be more than enough */
7321
7322         if (!d_percpu)
7323                 return;
7324
7325         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7326         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7327         if (!d_cpu) {
7328                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7329                 return;
7330         }
7331
7332         /* per cpu trace_pipe */
7333         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7334                                 tr, cpu, &tracing_pipe_fops);
7335
7336         /* per cpu trace */
7337         trace_create_cpu_file("trace", 0644, d_cpu,
7338                                 tr, cpu, &tracing_fops);
7339
7340         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7341                                 tr, cpu, &tracing_buffers_fops);
7342
7343         trace_create_cpu_file("stats", 0444, d_cpu,
7344                                 tr, cpu, &tracing_stats_fops);
7345
7346         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7347                                 tr, cpu, &tracing_entries_fops);
7348
7349 #ifdef CONFIG_TRACER_SNAPSHOT
7350         trace_create_cpu_file("snapshot", 0644, d_cpu,
7351                                 tr, cpu, &snapshot_fops);
7352
7353         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7354                                 tr, cpu, &snapshot_raw_fops);
7355 #endif
7356 }
7357
7358 #ifdef CONFIG_FTRACE_SELFTEST
7359 /* Let selftest have access to static functions in this file */
7360 #include "trace_selftest.c"
7361 #endif
7362
7363 static ssize_t
7364 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7365                         loff_t *ppos)
7366 {
7367         struct trace_option_dentry *topt = filp->private_data;
7368         char *buf;
7369
7370         if (topt->flags->val & topt->opt->bit)
7371                 buf = "1\n";
7372         else
7373                 buf = "0\n";
7374
7375         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7376 }
7377
7378 static ssize_t
7379 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7380                          loff_t *ppos)
7381 {
7382         struct trace_option_dentry *topt = filp->private_data;
7383         unsigned long val;
7384         int ret;
7385
7386         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7387         if (ret)
7388                 return ret;
7389
7390         if (val != 0 && val != 1)
7391                 return -EINVAL;
7392
7393         if (!!(topt->flags->val & topt->opt->bit) != val) {
7394                 mutex_lock(&trace_types_lock);
7395                 ret = __set_tracer_option(topt->tr, topt->flags,
7396                                           topt->opt, !val);
7397                 mutex_unlock(&trace_types_lock);
7398                 if (ret)
7399                         return ret;
7400         }
7401
7402         *ppos += cnt;
7403
7404         return cnt;
7405 }
7406
7407
7408 static const struct file_operations trace_options_fops = {
7409         .open = tracing_open_generic,
7410         .read = trace_options_read,
7411         .write = trace_options_write,
7412         .llseek = generic_file_llseek,
7413 };
7414
7415 /*
7416  * In order to pass in both the trace_array descriptor as well as the index
7417  * to the flag that the trace option file represents, the trace_array
7418  * has a character array of trace_flags_index[], which holds the index
7419  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7420  * The address of this character array is passed to the flag option file
7421  * read/write callbacks.
7422  *
7423  * In order to extract both the index and the trace_array descriptor,
7424  * get_tr_index() uses the following algorithm.
7425  *
7426  *   idx = *ptr;
7427  *
7428  * As the pointer itself contains the address of the index (remember
7429  * index[1] == 1).
7430  *
7431  * Then to get the trace_array descriptor, by subtracting that index
7432  * from the ptr, we get to the start of the index itself.
7433  *
7434  *   ptr - idx == &index[0]
7435  *
7436  * Then a simple container_of() from that pointer gets us to the
7437  * trace_array descriptor.
7438  */
7439 static void get_tr_index(void *data, struct trace_array **ptr,
7440                          unsigned int *pindex)
7441 {
7442         *pindex = *(unsigned char *)data;
7443
7444         *ptr = container_of(data - *pindex, struct trace_array,
7445                             trace_flags_index);
7446 }
7447
7448 static ssize_t
7449 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7450                         loff_t *ppos)
7451 {
7452         void *tr_index = filp->private_data;
7453         struct trace_array *tr;
7454         unsigned int index;
7455         char *buf;
7456
7457         get_tr_index(tr_index, &tr, &index);
7458
7459         if (tr->trace_flags & (1 << index))
7460                 buf = "1\n";
7461         else
7462                 buf = "0\n";
7463
7464         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7465 }
7466
7467 static ssize_t
7468 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7469                          loff_t *ppos)
7470 {
7471         void *tr_index = filp->private_data;
7472         struct trace_array *tr;
7473         unsigned int index;
7474         unsigned long val;
7475         int ret;
7476
7477         get_tr_index(tr_index, &tr, &index);
7478
7479         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7480         if (ret)
7481                 return ret;
7482
7483         if (val != 0 && val != 1)
7484                 return -EINVAL;
7485
7486         mutex_lock(&event_mutex);
7487         mutex_lock(&trace_types_lock);
7488         ret = set_tracer_flag(tr, 1 << index, val);
7489         mutex_unlock(&trace_types_lock);
7490         mutex_unlock(&event_mutex);
7491
7492         if (ret < 0)
7493                 return ret;
7494
7495         *ppos += cnt;
7496
7497         return cnt;
7498 }
7499
7500 static const struct file_operations trace_options_core_fops = {
7501         .open = tracing_open_generic,
7502         .read = trace_options_core_read,
7503         .write = trace_options_core_write,
7504         .llseek = generic_file_llseek,
7505 };
7506
7507 struct dentry *trace_create_file(const char *name,
7508                                  umode_t mode,
7509                                  struct dentry *parent,
7510                                  void *data,
7511                                  const struct file_operations *fops)
7512 {
7513         struct dentry *ret;
7514
7515         ret = tracefs_create_file(name, mode, parent, data, fops);
7516         if (!ret)
7517                 pr_warn("Could not create tracefs '%s' entry\n", name);
7518
7519         return ret;
7520 }
7521
7522
7523 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7524 {
7525         struct dentry *d_tracer;
7526
7527         if (tr->options)
7528                 return tr->options;
7529
7530         d_tracer = tracing_get_dentry(tr);
7531         if (IS_ERR(d_tracer))
7532                 return NULL;
7533
7534         tr->options = tracefs_create_dir("options", d_tracer);
7535         if (!tr->options) {
7536                 pr_warn("Could not create tracefs directory 'options'\n");
7537                 return NULL;
7538         }
7539
7540         return tr->options;
7541 }
7542
7543 static void
7544 create_trace_option_file(struct trace_array *tr,
7545                          struct trace_option_dentry *topt,
7546                          struct tracer_flags *flags,
7547                          struct tracer_opt *opt)
7548 {
7549         struct dentry *t_options;
7550
7551         t_options = trace_options_init_dentry(tr);
7552         if (!t_options)
7553                 return;
7554
7555         topt->flags = flags;
7556         topt->opt = opt;
7557         topt->tr = tr;
7558
7559         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7560                                     &trace_options_fops);
7561
7562 }
7563
7564 static void
7565 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7566 {
7567         struct trace_option_dentry *topts;
7568         struct trace_options *tr_topts;
7569         struct tracer_flags *flags;
7570         struct tracer_opt *opts;
7571         int cnt;
7572         int i;
7573
7574         if (!tracer)
7575                 return;
7576
7577         flags = tracer->flags;
7578
7579         if (!flags || !flags->opts)
7580                 return;
7581
7582         /*
7583          * If this is an instance, only create flags for tracers
7584          * the instance may have.
7585          */
7586         if (!trace_ok_for_array(tracer, tr))
7587                 return;
7588
7589         for (i = 0; i < tr->nr_topts; i++) {
7590                 /* Make sure there's no duplicate flags. */
7591                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7592                         return;
7593         }
7594
7595         opts = flags->opts;
7596
7597         for (cnt = 0; opts[cnt].name; cnt++)
7598                 ;
7599
7600         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7601         if (!topts)
7602                 return;
7603
7604         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7605                             GFP_KERNEL);
7606         if (!tr_topts) {
7607                 kfree(topts);
7608                 return;
7609         }
7610
7611         tr->topts = tr_topts;
7612         tr->topts[tr->nr_topts].tracer = tracer;
7613         tr->topts[tr->nr_topts].topts = topts;
7614         tr->nr_topts++;
7615
7616         for (cnt = 0; opts[cnt].name; cnt++) {
7617                 create_trace_option_file(tr, &topts[cnt], flags,
7618                                          &opts[cnt]);
7619                 WARN_ONCE(topts[cnt].entry == NULL,
7620                           "Failed to create trace option: %s",
7621                           opts[cnt].name);
7622         }
7623 }
7624
7625 static struct dentry *
7626 create_trace_option_core_file(struct trace_array *tr,
7627                               const char *option, long index)
7628 {
7629         struct dentry *t_options;
7630
7631         t_options = trace_options_init_dentry(tr);
7632         if (!t_options)
7633                 return NULL;
7634
7635         return trace_create_file(option, 0644, t_options,
7636                                  (void *)&tr->trace_flags_index[index],
7637                                  &trace_options_core_fops);
7638 }
7639
7640 static void create_trace_options_dir(struct trace_array *tr)
7641 {
7642         struct dentry *t_options;
7643         bool top_level = tr == &global_trace;
7644         int i;
7645
7646         t_options = trace_options_init_dentry(tr);
7647         if (!t_options)
7648                 return;
7649
7650         for (i = 0; trace_options[i]; i++) {
7651                 if (top_level ||
7652                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7653                         create_trace_option_core_file(tr, trace_options[i], i);
7654         }
7655 }
7656
7657 static ssize_t
7658 rb_simple_read(struct file *filp, char __user *ubuf,
7659                size_t cnt, loff_t *ppos)
7660 {
7661         struct trace_array *tr = filp->private_data;
7662         char buf[64];
7663         int r;
7664
7665         r = tracer_tracing_is_on(tr);
7666         r = sprintf(buf, "%d\n", r);
7667
7668         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7669 }
7670
7671 static ssize_t
7672 rb_simple_write(struct file *filp, const char __user *ubuf,
7673                 size_t cnt, loff_t *ppos)
7674 {
7675         struct trace_array *tr = filp->private_data;
7676         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7677         unsigned long val;
7678         int ret;
7679
7680         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7681         if (ret)
7682                 return ret;
7683
7684         if (buffer) {
7685                 mutex_lock(&trace_types_lock);
7686                 if (!!val == tracer_tracing_is_on(tr)) {
7687                         val = 0; /* do nothing */
7688                 } else if (val) {
7689                         tracer_tracing_on(tr);
7690                         if (tr->current_trace->start)
7691                                 tr->current_trace->start(tr);
7692                 } else {
7693                         tracer_tracing_off(tr);
7694                         if (tr->current_trace->stop)
7695                                 tr->current_trace->stop(tr);
7696                 }
7697                 mutex_unlock(&trace_types_lock);
7698         }
7699
7700         (*ppos)++;
7701
7702         return cnt;
7703 }
7704
7705 static const struct file_operations rb_simple_fops = {
7706         .open           = tracing_open_generic_tr,
7707         .read           = rb_simple_read,
7708         .write          = rb_simple_write,
7709         .release        = tracing_release_generic_tr,
7710         .llseek         = default_llseek,
7711 };
7712
7713 struct dentry *trace_instance_dir;
7714
7715 static void
7716 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7717
7718 static int
7719 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7720 {
7721         enum ring_buffer_flags rb_flags;
7722
7723         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7724
7725         buf->tr = tr;
7726
7727         buf->buffer = ring_buffer_alloc(size, rb_flags);
7728         if (!buf->buffer)
7729                 return -ENOMEM;
7730
7731         buf->data = alloc_percpu(struct trace_array_cpu);
7732         if (!buf->data) {
7733                 ring_buffer_free(buf->buffer);
7734                 buf->buffer = NULL;
7735                 return -ENOMEM;
7736         }
7737
7738         /* Allocate the first page for all buffers */
7739         set_buffer_entries(&tr->trace_buffer,
7740                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7741
7742         return 0;
7743 }
7744
7745 static int allocate_trace_buffers(struct trace_array *tr, int size)
7746 {
7747         int ret;
7748
7749         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7750         if (ret)
7751                 return ret;
7752
7753 #ifdef CONFIG_TRACER_MAX_TRACE
7754         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7755                                     allocate_snapshot ? size : 1);
7756         if (WARN_ON(ret)) {
7757                 ring_buffer_free(tr->trace_buffer.buffer);
7758                 tr->trace_buffer.buffer = NULL;
7759                 free_percpu(tr->trace_buffer.data);
7760                 tr->trace_buffer.data = NULL;
7761                 return -ENOMEM;
7762         }
7763         tr->allocated_snapshot = allocate_snapshot;
7764
7765         /*
7766          * Only the top level trace array gets its snapshot allocated
7767          * from the kernel command line.
7768          */
7769         allocate_snapshot = false;
7770 #endif
7771
7772         /*
7773          * Because of some magic with the way alloc_percpu() works on
7774          * x86_64, we need to synchronize the pgd of all the tables,
7775          * otherwise the trace events that happen in x86_64 page fault
7776          * handlers can't cope with accessing the chance that a
7777          * alloc_percpu()'d memory might be touched in the page fault trace
7778          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7779          * calls in tracing, because something might get triggered within a
7780          * page fault trace event!
7781          */
7782         vmalloc_sync_mappings();
7783
7784         return 0;
7785 }
7786
7787 static void free_trace_buffer(struct trace_buffer *buf)
7788 {
7789         if (buf->buffer) {
7790                 ring_buffer_free(buf->buffer);
7791                 buf->buffer = NULL;
7792                 free_percpu(buf->data);
7793                 buf->data = NULL;
7794         }
7795 }
7796
7797 static void free_trace_buffers(struct trace_array *tr)
7798 {
7799         if (!tr)
7800                 return;
7801
7802         free_trace_buffer(&tr->trace_buffer);
7803
7804 #ifdef CONFIG_TRACER_MAX_TRACE
7805         free_trace_buffer(&tr->max_buffer);
7806 #endif
7807 }
7808
7809 static void init_trace_flags_index(struct trace_array *tr)
7810 {
7811         int i;
7812
7813         /* Used by the trace options files */
7814         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7815                 tr->trace_flags_index[i] = i;
7816 }
7817
7818 static void __update_tracer_options(struct trace_array *tr)
7819 {
7820         struct tracer *t;
7821
7822         for (t = trace_types; t; t = t->next)
7823                 add_tracer_options(tr, t);
7824 }
7825
7826 static void update_tracer_options(struct trace_array *tr)
7827 {
7828         mutex_lock(&trace_types_lock);
7829         tracer_options_updated = true;
7830         __update_tracer_options(tr);
7831         mutex_unlock(&trace_types_lock);
7832 }
7833
7834 static int instance_mkdir(const char *name)
7835 {
7836         struct trace_array *tr;
7837         int ret;
7838
7839         mutex_lock(&event_mutex);
7840         mutex_lock(&trace_types_lock);
7841
7842         ret = -EEXIST;
7843         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7844                 if (tr->name && strcmp(tr->name, name) == 0)
7845                         goto out_unlock;
7846         }
7847
7848         ret = -ENOMEM;
7849         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7850         if (!tr)
7851                 goto out_unlock;
7852
7853         tr->name = kstrdup(name, GFP_KERNEL);
7854         if (!tr->name)
7855                 goto out_free_tr;
7856
7857         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7858                 goto out_free_tr;
7859
7860         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7861
7862         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7863
7864         raw_spin_lock_init(&tr->start_lock);
7865
7866         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7867
7868         tr->current_trace = &nop_trace;
7869
7870         INIT_LIST_HEAD(&tr->systems);
7871         INIT_LIST_HEAD(&tr->events);
7872         INIT_LIST_HEAD(&tr->hist_vars);
7873
7874         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7875                 goto out_free_tr;
7876
7877         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7878         if (!tr->dir)
7879                 goto out_free_tr;
7880
7881         ret = event_trace_add_tracer(tr->dir, tr);
7882         if (ret) {
7883                 tracefs_remove_recursive(tr->dir);
7884                 goto out_free_tr;
7885         }
7886
7887         ftrace_init_trace_array(tr);
7888
7889         init_tracer_tracefs(tr, tr->dir);
7890         init_trace_flags_index(tr);
7891         __update_tracer_options(tr);
7892
7893         list_add(&tr->list, &ftrace_trace_arrays);
7894
7895         mutex_unlock(&trace_types_lock);
7896         mutex_unlock(&event_mutex);
7897
7898         return 0;
7899
7900  out_free_tr:
7901         free_trace_buffers(tr);
7902         free_cpumask_var(tr->tracing_cpumask);
7903         kfree(tr->name);
7904         kfree(tr);
7905
7906  out_unlock:
7907         mutex_unlock(&trace_types_lock);
7908         mutex_unlock(&event_mutex);
7909
7910         return ret;
7911
7912 }
7913
7914 static int instance_rmdir(const char *name)
7915 {
7916         struct trace_array *tr;
7917         int found = 0;
7918         int ret;
7919         int i;
7920
7921         mutex_lock(&event_mutex);
7922         mutex_lock(&trace_types_lock);
7923
7924         ret = -ENODEV;
7925         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7926                 if (tr->name && strcmp(tr->name, name) == 0) {
7927                         found = 1;
7928                         break;
7929                 }
7930         }
7931         if (!found)
7932                 goto out_unlock;
7933
7934         ret = -EBUSY;
7935         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7936                 goto out_unlock;
7937
7938         list_del(&tr->list);
7939
7940         /* Disable all the flags that were enabled coming in */
7941         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7942                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7943                         set_tracer_flag(tr, 1 << i, 0);
7944         }
7945
7946         tracing_set_nop(tr);
7947         clear_ftrace_function_probes(tr);
7948         event_trace_del_tracer(tr);
7949         ftrace_clear_pids(tr);
7950         ftrace_destroy_function_files(tr);
7951         tracefs_remove_recursive(tr->dir);
7952         free_trace_buffers(tr);
7953
7954         for (i = 0; i < tr->nr_topts; i++) {
7955                 kfree(tr->topts[i].topts);
7956         }
7957         kfree(tr->topts);
7958
7959         free_cpumask_var(tr->tracing_cpumask);
7960         kfree(tr->name);
7961         kfree(tr);
7962
7963         ret = 0;
7964
7965  out_unlock:
7966         mutex_unlock(&trace_types_lock);
7967         mutex_unlock(&event_mutex);
7968
7969         return ret;
7970 }
7971
7972 static __init void create_trace_instances(struct dentry *d_tracer)
7973 {
7974         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7975                                                          instance_mkdir,
7976                                                          instance_rmdir);
7977         if (WARN_ON(!trace_instance_dir))
7978                 return;
7979 }
7980
7981 static void
7982 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7983 {
7984         struct trace_event_file *file;
7985         int cpu;
7986
7987         trace_create_file("available_tracers", 0444, d_tracer,
7988                         tr, &show_traces_fops);
7989
7990         trace_create_file("current_tracer", 0644, d_tracer,
7991                         tr, &set_tracer_fops);
7992
7993         trace_create_file("tracing_cpumask", 0644, d_tracer,
7994                           tr, &tracing_cpumask_fops);
7995
7996         trace_create_file("trace_options", 0644, d_tracer,
7997                           tr, &tracing_iter_fops);
7998
7999         trace_create_file("trace", 0644, d_tracer,
8000                           tr, &tracing_fops);
8001
8002         trace_create_file("trace_pipe", 0444, d_tracer,
8003                           tr, &tracing_pipe_fops);
8004
8005         trace_create_file("buffer_size_kb", 0644, d_tracer,
8006                           tr, &tracing_entries_fops);
8007
8008         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8009                           tr, &tracing_total_entries_fops);
8010
8011         trace_create_file("free_buffer", 0200, d_tracer,
8012                           tr, &tracing_free_buffer_fops);
8013
8014         trace_create_file("trace_marker", 0220, d_tracer,
8015                           tr, &tracing_mark_fops);
8016
8017         file = __find_event_file(tr, "ftrace", "print");
8018         if (file && file->dir)
8019                 trace_create_file("trigger", 0644, file->dir, file,
8020                                   &event_trigger_fops);
8021         tr->trace_marker_file = file;
8022
8023         trace_create_file("trace_marker_raw", 0220, d_tracer,
8024                           tr, &tracing_mark_raw_fops);
8025
8026         trace_create_file("trace_clock", 0644, d_tracer, tr,
8027                           &trace_clock_fops);
8028
8029         trace_create_file("tracing_on", 0644, d_tracer,
8030                           tr, &rb_simple_fops);
8031
8032         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8033                           &trace_time_stamp_mode_fops);
8034
8035         create_trace_options_dir(tr);
8036
8037 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8038         trace_create_file("tracing_max_latency", 0644, d_tracer,
8039                         &tr->max_latency, &tracing_max_lat_fops);
8040 #endif
8041
8042         if (ftrace_create_function_files(tr, d_tracer))
8043                 WARN(1, "Could not allocate function filter files");
8044
8045 #ifdef CONFIG_TRACER_SNAPSHOT
8046         trace_create_file("snapshot", 0644, d_tracer,
8047                           tr, &snapshot_fops);
8048 #endif
8049
8050         for_each_tracing_cpu(cpu)
8051                 tracing_init_tracefs_percpu(tr, cpu);
8052
8053         ftrace_init_tracefs(tr, d_tracer);
8054 }
8055
8056 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8057 {
8058         struct vfsmount *mnt;
8059         struct file_system_type *type;
8060
8061         /*
8062          * To maintain backward compatibility for tools that mount
8063          * debugfs to get to the tracing facility, tracefs is automatically
8064          * mounted to the debugfs/tracing directory.
8065          */
8066         type = get_fs_type("tracefs");
8067         if (!type)
8068                 return NULL;
8069         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8070         put_filesystem(type);
8071         if (IS_ERR(mnt))
8072                 return NULL;
8073         mntget(mnt);
8074
8075         return mnt;
8076 }
8077
8078 /**
8079  * tracing_init_dentry - initialize top level trace array
8080  *
8081  * This is called when creating files or directories in the tracing
8082  * directory. It is called via fs_initcall() by any of the boot up code
8083  * and expects to return the dentry of the top level tracing directory.
8084  */
8085 struct dentry *tracing_init_dentry(void)
8086 {
8087         struct trace_array *tr = &global_trace;
8088
8089         /* The top level trace array uses  NULL as parent */
8090         if (tr->dir)
8091                 return NULL;
8092
8093         if (WARN_ON(!tracefs_initialized()) ||
8094                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8095                  WARN_ON(!debugfs_initialized())))
8096                 return ERR_PTR(-ENODEV);
8097
8098         /*
8099          * As there may still be users that expect the tracing
8100          * files to exist in debugfs/tracing, we must automount
8101          * the tracefs file system there, so older tools still
8102          * work with the newer kerenl.
8103          */
8104         tr->dir = debugfs_create_automount("tracing", NULL,
8105                                            trace_automount, NULL);
8106         if (!tr->dir) {
8107                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8108                 return ERR_PTR(-ENOMEM);
8109         }
8110
8111         return NULL;
8112 }
8113
8114 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8115 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8116
8117 static void __init trace_eval_init(void)
8118 {
8119         int len;
8120
8121         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8122         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8123 }
8124
8125 #ifdef CONFIG_MODULES
8126 static void trace_module_add_evals(struct module *mod)
8127 {
8128         if (!mod->num_trace_evals)
8129                 return;
8130
8131         /*
8132          * Modules with bad taint do not have events created, do
8133          * not bother with enums either.
8134          */
8135         if (trace_module_has_bad_taint(mod))
8136                 return;
8137
8138         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8139 }
8140
8141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8142 static void trace_module_remove_evals(struct module *mod)
8143 {
8144         union trace_eval_map_item *map;
8145         union trace_eval_map_item **last = &trace_eval_maps;
8146
8147         if (!mod->num_trace_evals)
8148                 return;
8149
8150         mutex_lock(&trace_eval_mutex);
8151
8152         map = trace_eval_maps;
8153
8154         while (map) {
8155                 if (map->head.mod == mod)
8156                         break;
8157                 map = trace_eval_jmp_to_tail(map);
8158                 last = &map->tail.next;
8159                 map = map->tail.next;
8160         }
8161         if (!map)
8162                 goto out;
8163
8164         *last = trace_eval_jmp_to_tail(map)->tail.next;
8165         kfree(map);
8166  out:
8167         mutex_unlock(&trace_eval_mutex);
8168 }
8169 #else
8170 static inline void trace_module_remove_evals(struct module *mod) { }
8171 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8172
8173 static int trace_module_notify(struct notifier_block *self,
8174                                unsigned long val, void *data)
8175 {
8176         struct module *mod = data;
8177
8178         switch (val) {
8179         case MODULE_STATE_COMING:
8180                 trace_module_add_evals(mod);
8181                 break;
8182         case MODULE_STATE_GOING:
8183                 trace_module_remove_evals(mod);
8184                 break;
8185         }
8186
8187         return 0;
8188 }
8189
8190 static struct notifier_block trace_module_nb = {
8191         .notifier_call = trace_module_notify,
8192         .priority = 0,
8193 };
8194 #endif /* CONFIG_MODULES */
8195
8196 static __init int tracer_init_tracefs(void)
8197 {
8198         struct dentry *d_tracer;
8199
8200         trace_access_lock_init();
8201
8202         d_tracer = tracing_init_dentry();
8203         if (IS_ERR(d_tracer))
8204                 return 0;
8205
8206         event_trace_init();
8207
8208         init_tracer_tracefs(&global_trace, d_tracer);
8209         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8210
8211         trace_create_file("tracing_thresh", 0644, d_tracer,
8212                         &global_trace, &tracing_thresh_fops);
8213
8214         trace_create_file("README", 0444, d_tracer,
8215                         NULL, &tracing_readme_fops);
8216
8217         trace_create_file("saved_cmdlines", 0444, d_tracer,
8218                         NULL, &tracing_saved_cmdlines_fops);
8219
8220         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8221                           NULL, &tracing_saved_cmdlines_size_fops);
8222
8223         trace_create_file("saved_tgids", 0444, d_tracer,
8224                         NULL, &tracing_saved_tgids_fops);
8225
8226         trace_eval_init();
8227
8228         trace_create_eval_file(d_tracer);
8229
8230 #ifdef CONFIG_MODULES
8231         register_module_notifier(&trace_module_nb);
8232 #endif
8233
8234 #ifdef CONFIG_DYNAMIC_FTRACE
8235         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8236                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8237 #endif
8238
8239         create_trace_instances(d_tracer);
8240
8241         update_tracer_options(&global_trace);
8242
8243         return 0;
8244 }
8245
8246 static int trace_panic_handler(struct notifier_block *this,
8247                                unsigned long event, void *unused)
8248 {
8249         if (ftrace_dump_on_oops)
8250                 ftrace_dump(ftrace_dump_on_oops);
8251         return NOTIFY_OK;
8252 }
8253
8254 static struct notifier_block trace_panic_notifier = {
8255         .notifier_call  = trace_panic_handler,
8256         .next           = NULL,
8257         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8258 };
8259
8260 static int trace_die_handler(struct notifier_block *self,
8261                              unsigned long val,
8262                              void *data)
8263 {
8264         switch (val) {
8265         case DIE_OOPS:
8266                 if (ftrace_dump_on_oops)
8267                         ftrace_dump(ftrace_dump_on_oops);
8268                 break;
8269         default:
8270                 break;
8271         }
8272         return NOTIFY_OK;
8273 }
8274
8275 static struct notifier_block trace_die_notifier = {
8276         .notifier_call = trace_die_handler,
8277         .priority = 200
8278 };
8279
8280 /*
8281  * printk is set to max of 1024, we really don't need it that big.
8282  * Nothing should be printing 1000 characters anyway.
8283  */
8284 #define TRACE_MAX_PRINT         1000
8285
8286 /*
8287  * Define here KERN_TRACE so that we have one place to modify
8288  * it if we decide to change what log level the ftrace dump
8289  * should be at.
8290  */
8291 #define KERN_TRACE              KERN_EMERG
8292
8293 void
8294 trace_printk_seq(struct trace_seq *s)
8295 {
8296         /* Probably should print a warning here. */
8297         if (s->seq.len >= TRACE_MAX_PRINT)
8298                 s->seq.len = TRACE_MAX_PRINT;
8299
8300         /*
8301          * More paranoid code. Although the buffer size is set to
8302          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8303          * an extra layer of protection.
8304          */
8305         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8306                 s->seq.len = s->seq.size - 1;
8307
8308         /* should be zero ended, but we are paranoid. */
8309         s->buffer[s->seq.len] = 0;
8310
8311         printk(KERN_TRACE "%s", s->buffer);
8312
8313         trace_seq_init(s);
8314 }
8315
8316 void trace_init_global_iter(struct trace_iterator *iter)
8317 {
8318         iter->tr = &global_trace;
8319         iter->trace = iter->tr->current_trace;
8320         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8321         iter->trace_buffer = &global_trace.trace_buffer;
8322
8323         if (iter->trace && iter->trace->open)
8324                 iter->trace->open(iter);
8325
8326         /* Annotate start of buffers if we had overruns */
8327         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8328                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8329
8330         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8331         if (trace_clocks[iter->tr->clock_id].in_ns)
8332                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8333 }
8334
8335 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8336 {
8337         /* use static because iter can be a bit big for the stack */
8338         static struct trace_iterator iter;
8339         static atomic_t dump_running;
8340         struct trace_array *tr = &global_trace;
8341         unsigned int old_userobj;
8342         unsigned long flags;
8343         int cnt = 0, cpu;
8344
8345         /* Only allow one dump user at a time. */
8346         if (atomic_inc_return(&dump_running) != 1) {
8347                 atomic_dec(&dump_running);
8348                 return;
8349         }
8350
8351         /*
8352          * Always turn off tracing when we dump.
8353          * We don't need to show trace output of what happens
8354          * between multiple crashes.
8355          *
8356          * If the user does a sysrq-z, then they can re-enable
8357          * tracing with echo 1 > tracing_on.
8358          */
8359         tracing_off();
8360
8361         local_irq_save(flags);
8362         printk_nmi_direct_enter();
8363
8364         /* Simulate the iterator */
8365         trace_init_global_iter(&iter);
8366
8367         for_each_tracing_cpu(cpu) {
8368                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8369         }
8370
8371         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8372
8373         /* don't look at user memory in panic mode */
8374         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8375
8376         switch (oops_dump_mode) {
8377         case DUMP_ALL:
8378                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8379                 break;
8380         case DUMP_ORIG:
8381                 iter.cpu_file = raw_smp_processor_id();
8382                 break;
8383         case DUMP_NONE:
8384                 goto out_enable;
8385         default:
8386                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8387                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8388         }
8389
8390         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8391
8392         /* Did function tracer already get disabled? */
8393         if (ftrace_is_dead()) {
8394                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8395                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8396         }
8397
8398         /*
8399          * We need to stop all tracing on all CPUS to read the
8400          * the next buffer. This is a bit expensive, but is
8401          * not done often. We fill all what we can read,
8402          * and then release the locks again.
8403          */
8404
8405         while (!trace_empty(&iter)) {
8406
8407                 if (!cnt)
8408                         printk(KERN_TRACE "---------------------------------\n");
8409
8410                 cnt++;
8411
8412                 trace_iterator_reset(&iter);
8413                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8414
8415                 if (trace_find_next_entry_inc(&iter) != NULL) {
8416                         int ret;
8417
8418                         ret = print_trace_line(&iter);
8419                         if (ret != TRACE_TYPE_NO_CONSUME)
8420                                 trace_consume(&iter);
8421                 }
8422                 touch_nmi_watchdog();
8423
8424                 trace_printk_seq(&iter.seq);
8425         }
8426
8427         if (!cnt)
8428                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8429         else
8430                 printk(KERN_TRACE "---------------------------------\n");
8431
8432  out_enable:
8433         tr->trace_flags |= old_userobj;
8434
8435         for_each_tracing_cpu(cpu) {
8436                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8437         }
8438         atomic_dec(&dump_running);
8439         printk_nmi_direct_exit();
8440         local_irq_restore(flags);
8441 }
8442 EXPORT_SYMBOL_GPL(ftrace_dump);
8443
8444 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8445 {
8446         char **argv;
8447         int argc, ret;
8448
8449         argc = 0;
8450         ret = 0;
8451         argv = argv_split(GFP_KERNEL, buf, &argc);
8452         if (!argv)
8453                 return -ENOMEM;
8454
8455         if (argc)
8456                 ret = createfn(argc, argv);
8457
8458         argv_free(argv);
8459
8460         return ret;
8461 }
8462
8463 #define WRITE_BUFSIZE  4096
8464
8465 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8466                                 size_t count, loff_t *ppos,
8467                                 int (*createfn)(int, char **))
8468 {
8469         char *kbuf, *buf, *tmp;
8470         int ret = 0;
8471         size_t done = 0;
8472         size_t size;
8473
8474         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8475         if (!kbuf)
8476                 return -ENOMEM;
8477
8478         while (done < count) {
8479                 size = count - done;
8480
8481                 if (size >= WRITE_BUFSIZE)
8482                         size = WRITE_BUFSIZE - 1;
8483
8484                 if (copy_from_user(kbuf, buffer + done, size)) {
8485                         ret = -EFAULT;
8486                         goto out;
8487                 }
8488                 kbuf[size] = '\0';
8489                 buf = kbuf;
8490                 do {
8491                         tmp = strchr(buf, '\n');
8492                         if (tmp) {
8493                                 *tmp = '\0';
8494                                 size = tmp - buf + 1;
8495                         } else {
8496                                 size = strlen(buf);
8497                                 if (done + size < count) {
8498                                         if (buf != kbuf)
8499                                                 break;
8500                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8501                                         pr_warn("Line length is too long: Should be less than %d\n",
8502                                                 WRITE_BUFSIZE - 2);
8503                                         ret = -EINVAL;
8504                                         goto out;
8505                                 }
8506                         }
8507                         done += size;
8508
8509                         /* Remove comments */
8510                         tmp = strchr(buf, '#');
8511
8512                         if (tmp)
8513                                 *tmp = '\0';
8514
8515                         ret = trace_run_command(buf, createfn);
8516                         if (ret)
8517                                 goto out;
8518                         buf += size;
8519
8520                 } while (done < count);
8521         }
8522         ret = done;
8523
8524 out:
8525         kfree(kbuf);
8526
8527         return ret;
8528 }
8529
8530 __init static int tracer_alloc_buffers(void)
8531 {
8532         int ring_buf_size;
8533         int ret = -ENOMEM;
8534
8535         /*
8536          * Make sure we don't accidently add more trace options
8537          * than we have bits for.
8538          */
8539         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8540
8541         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8542                 goto out;
8543
8544         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8545                 goto out_free_buffer_mask;
8546
8547         /* Only allocate trace_printk buffers if a trace_printk exists */
8548         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
8549                 /* Must be called before global_trace.buffer is allocated */
8550                 trace_printk_init_buffers();
8551
8552         /* To save memory, keep the ring buffer size to its minimum */
8553         if (ring_buffer_expanded)
8554                 ring_buf_size = trace_buf_size;
8555         else
8556                 ring_buf_size = 1;
8557
8558         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8559         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8560
8561         raw_spin_lock_init(&global_trace.start_lock);
8562
8563         /*
8564          * The prepare callbacks allocates some memory for the ring buffer. We
8565          * don't free the buffer if the if the CPU goes down. If we were to free
8566          * the buffer, then the user would lose any trace that was in the
8567          * buffer. The memory will be removed once the "instance" is removed.
8568          */
8569         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8570                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8571                                       NULL);
8572         if (ret < 0)
8573                 goto out_free_cpumask;
8574         /* Used for event triggers */
8575         ret = -ENOMEM;
8576         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8577         if (!temp_buffer)
8578                 goto out_rm_hp_state;
8579
8580         if (trace_create_savedcmd() < 0)
8581                 goto out_free_temp_buffer;
8582
8583         /* TODO: make the number of buffers hot pluggable with CPUS */
8584         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8585                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8586                 WARN_ON(1);
8587                 goto out_free_savedcmd;
8588         }
8589
8590         if (global_trace.buffer_disabled)
8591                 tracing_off();
8592
8593         if (trace_boot_clock) {
8594                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8595                 if (ret < 0)
8596                         pr_warn("Trace clock %s not defined, going back to default\n",
8597                                 trace_boot_clock);
8598         }
8599
8600         /*
8601          * register_tracer() might reference current_trace, so it
8602          * needs to be set before we register anything. This is
8603          * just a bootstrap of current_trace anyway.
8604          */
8605         global_trace.current_trace = &nop_trace;
8606
8607         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8608
8609         ftrace_init_global_array_ops(&global_trace);
8610
8611         init_trace_flags_index(&global_trace);
8612
8613         register_tracer(&nop_trace);
8614
8615         /* Function tracing may start here (via kernel command line) */
8616         init_function_trace();
8617
8618         /* All seems OK, enable tracing */
8619         tracing_disabled = 0;
8620
8621         atomic_notifier_chain_register(&panic_notifier_list,
8622                                        &trace_panic_notifier);
8623
8624         register_die_notifier(&trace_die_notifier);
8625
8626         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8627
8628         INIT_LIST_HEAD(&global_trace.systems);
8629         INIT_LIST_HEAD(&global_trace.events);
8630         INIT_LIST_HEAD(&global_trace.hist_vars);
8631         list_add(&global_trace.list, &ftrace_trace_arrays);
8632
8633         apply_trace_boot_options();
8634
8635         register_snapshot_cmd();
8636
8637         return 0;
8638
8639 out_free_savedcmd:
8640         free_saved_cmdlines_buffer(savedcmd);
8641 out_free_temp_buffer:
8642         ring_buffer_free(temp_buffer);
8643 out_rm_hp_state:
8644         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8645 out_free_cpumask:
8646         free_cpumask_var(global_trace.tracing_cpumask);
8647 out_free_buffer_mask:
8648         free_cpumask_var(tracing_buffer_mask);
8649 out:
8650         return ret;
8651 }
8652
8653 void __init early_trace_init(void)
8654 {
8655         if (tracepoint_printk) {
8656                 tracepoint_print_iter =
8657                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8658                 if (WARN_ON(!tracepoint_print_iter))
8659                         tracepoint_printk = 0;
8660                 else
8661                         static_key_enable(&tracepoint_printk_key.key);
8662         }
8663         tracer_alloc_buffers();
8664 }
8665
8666 void __init trace_init(void)
8667 {
8668         trace_event_init();
8669 }
8670
8671 __init static int clear_boot_tracer(void)
8672 {
8673         /*
8674          * The default tracer at boot buffer is an init section.
8675          * This function is called in lateinit. If we did not
8676          * find the boot tracer, then clear it out, to prevent
8677          * later registration from accessing the buffer that is
8678          * about to be freed.
8679          */
8680         if (!default_bootup_tracer)
8681                 return 0;
8682
8683         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8684                default_bootup_tracer);
8685         default_bootup_tracer = NULL;
8686
8687         return 0;
8688 }
8689
8690 fs_initcall(tracer_init_tracefs);
8691 late_initcall_sync(clear_boot_tracer);
8692
8693 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8694 __init static int tracing_set_default_clock(void)
8695 {
8696         /* sched_clock_stable() is determined in late_initcall */
8697         if (!trace_boot_clock && !sched_clock_stable()) {
8698                 printk(KERN_WARNING
8699                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8700                        "If you want to keep using the local clock, then add:\n"
8701                        "  \"trace_clock=local\"\n"
8702                        "on the kernel command line\n");
8703                 tracing_set_clock(&global_trace, "global");
8704         }
8705
8706         return 0;
8707 }
8708 late_initcall_sync(tracing_set_default_clock);
8709 #endif