GNU Linux-libre 4.19.286-gnu1
[releases.git] / arch / x86 / kernel / kprobes / opt.c
1 /*
2  *  Kernel Probes Jump Optimization (Optprobes)
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright (C) IBM Corporation, 2002, 2004
19  * Copyright (C) Hitachi Ltd., 2012
20  */
21 #include <linux/kprobes.h>
22 #include <linux/ptrace.h>
23 #include <linux/string.h>
24 #include <linux/slab.h>
25 #include <linux/hardirq.h>
26 #include <linux/preempt.h>
27 #include <linux/extable.h>
28 #include <linux/kdebug.h>
29 #include <linux/kallsyms.h>
30 #include <linux/ftrace.h>
31 #include <linux/frame.h>
32
33 #include <asm/text-patching.h>
34 #include <asm/cacheflush.h>
35 #include <asm/desc.h>
36 #include <asm/pgtable.h>
37 #include <linux/uaccess.h>
38 #include <asm/alternative.h>
39 #include <asm/insn.h>
40 #include <asm/debugreg.h>
41 #include <asm/set_memory.h>
42 #include <asm/sections.h>
43 #include <asm/nospec-branch.h>
44
45 #include "common.h"
46
47 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
48 {
49         struct optimized_kprobe *op;
50         struct kprobe *kp;
51         long offs;
52         int i;
53
54         for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
55                 kp = get_kprobe((void *)addr - i);
56                 /* This function only handles jump-optimized kprobe */
57                 if (kp && kprobe_optimized(kp)) {
58                         op = container_of(kp, struct optimized_kprobe, kp);
59                         /* If op is optimized or under unoptimizing */
60                         if (list_empty(&op->list) || optprobe_queued_unopt(op))
61                                 goto found;
62                 }
63         }
64
65         return addr;
66 found:
67         /*
68          * If the kprobe can be optimized, original bytes which can be
69          * overwritten by jump destination address. In this case, original
70          * bytes must be recovered from op->optinsn.copied_insn buffer.
71          */
72         if (probe_kernel_read(buf, (void *)addr,
73                 MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
74                 return 0UL;
75
76         if (addr == (unsigned long)kp->addr) {
77                 buf[0] = kp->opcode;
78                 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
79         } else {
80                 offs = addr - (unsigned long)kp->addr - 1;
81                 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
82         }
83
84         return (unsigned long)buf;
85 }
86
87 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
88 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
89 {
90 #ifdef CONFIG_X86_64
91         *addr++ = 0x48;
92         *addr++ = 0xbf;
93 #else
94         *addr++ = 0xb8;
95 #endif
96         *(unsigned long *)addr = val;
97 }
98
99 asm (
100                         "optprobe_template_func:\n"
101                         ".global optprobe_template_entry\n"
102                         "optprobe_template_entry:\n"
103 #ifdef CONFIG_X86_64
104                         /* We don't bother saving the ss register */
105                         "       pushq %rsp\n"
106                         "       pushfq\n"
107                         SAVE_REGS_STRING
108                         "       movq %rsp, %rsi\n"
109                         ".global optprobe_template_val\n"
110                         "optprobe_template_val:\n"
111                         ASM_NOP5
112                         ASM_NOP5
113                         ".global optprobe_template_call\n"
114                         "optprobe_template_call:\n"
115                         ASM_NOP5
116                         /* Move flags to rsp */
117                         "       movq 144(%rsp), %rdx\n"
118                         "       movq %rdx, 152(%rsp)\n"
119                         RESTORE_REGS_STRING
120                         /* Skip flags entry */
121                         "       addq $8, %rsp\n"
122                         "       popfq\n"
123 #else /* CONFIG_X86_32 */
124                         "       pushf\n"
125                         SAVE_REGS_STRING
126                         "       movl %esp, %edx\n"
127                         ".global optprobe_template_val\n"
128                         "optprobe_template_val:\n"
129                         ASM_NOP5
130                         ".global optprobe_template_call\n"
131                         "optprobe_template_call:\n"
132                         ASM_NOP5
133                         RESTORE_REGS_STRING
134                         "       addl $4, %esp\n"        /* skip cs */
135                         "       popf\n"
136 #endif
137                         ".global optprobe_template_end\n"
138                         "optprobe_template_end:\n"
139                         ".type optprobe_template_func, @function\n"
140                         ".size optprobe_template_func, .-optprobe_template_func\n");
141
142 void optprobe_template_func(void);
143 STACK_FRAME_NON_STANDARD(optprobe_template_func);
144 NOKPROBE_SYMBOL(optprobe_template_func);
145 NOKPROBE_SYMBOL(optprobe_template_entry);
146 NOKPROBE_SYMBOL(optprobe_template_val);
147 NOKPROBE_SYMBOL(optprobe_template_call);
148 NOKPROBE_SYMBOL(optprobe_template_end);
149
150 #define TMPL_MOVE_IDX \
151         ((long)optprobe_template_val - (long)optprobe_template_entry)
152 #define TMPL_CALL_IDX \
153         ((long)optprobe_template_call - (long)optprobe_template_entry)
154 #define TMPL_END_IDX \
155         ((long)optprobe_template_end - (long)optprobe_template_entry)
156
157 #define INT3_SIZE sizeof(kprobe_opcode_t)
158
159 /* Optimized kprobe call back function: called from optinsn */
160 static void
161 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
162 {
163         /* This is possible if op is under delayed unoptimizing */
164         if (kprobe_disabled(&op->kp))
165                 return;
166
167         preempt_disable();
168         if (kprobe_running()) {
169                 kprobes_inc_nmissed_count(&op->kp);
170         } else {
171                 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
172                 /* Save skipped registers */
173 #ifdef CONFIG_X86_64
174                 regs->cs = __KERNEL_CS;
175 #else
176                 regs->cs = __KERNEL_CS | get_kernel_rpl();
177                 regs->gs = 0;
178 #endif
179                 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
180                 regs->orig_ax = ~0UL;
181
182                 __this_cpu_write(current_kprobe, &op->kp);
183                 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
184                 opt_pre_handler(&op->kp, regs);
185                 __this_cpu_write(current_kprobe, NULL);
186         }
187         preempt_enable();
188 }
189 NOKPROBE_SYMBOL(optimized_callback);
190
191 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
192 {
193         struct insn insn;
194         int len = 0, ret;
195
196         while (len < RELATIVEJUMP_SIZE) {
197                 ret = __copy_instruction(dest + len, src + len, real + len, &insn);
198                 if (!ret || !can_boost(&insn, src + len))
199                         return -EINVAL;
200                 len += ret;
201         }
202         /* Check whether the address range is reserved */
203         if (ftrace_text_reserved(src, src + len - 1) ||
204             alternatives_text_reserved(src, src + len - 1) ||
205             jump_label_text_reserved(src, src + len - 1))
206                 return -EBUSY;
207
208         return len;
209 }
210
211 /* Check whether insn is indirect jump */
212 static int __insn_is_indirect_jump(struct insn *insn)
213 {
214         return ((insn->opcode.bytes[0] == 0xff &&
215                 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
216                 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
217 }
218
219 /* Check whether insn jumps into specified address range */
220 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
221 {
222         unsigned long target = 0;
223
224         switch (insn->opcode.bytes[0]) {
225         case 0xe0:      /* loopne */
226         case 0xe1:      /* loope */
227         case 0xe2:      /* loop */
228         case 0xe3:      /* jcxz */
229         case 0xe9:      /* near relative jump */
230         case 0xeb:      /* short relative jump */
231                 break;
232         case 0x0f:
233                 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
234                         break;
235                 return 0;
236         default:
237                 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
238                         break;
239                 return 0;
240         }
241         target = (unsigned long)insn->next_byte + insn->immediate.value;
242
243         return (start <= target && target <= start + len);
244 }
245
246 static int insn_is_indirect_jump(struct insn *insn)
247 {
248         int ret = __insn_is_indirect_jump(insn);
249
250 #ifdef CONFIG_RETPOLINE
251         /*
252          * Jump to x86_indirect_thunk_* is treated as an indirect jump.
253          * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
254          * older gcc may use indirect jump. So we add this check instead of
255          * replace indirect-jump check.
256          */
257         if (!ret)
258                 ret = insn_jump_into_range(insn,
259                                 (unsigned long)__indirect_thunk_start,
260                                 (unsigned long)__indirect_thunk_end -
261                                 (unsigned long)__indirect_thunk_start);
262 #endif
263         return ret;
264 }
265
266 /* Decode whole function to ensure any instructions don't jump into target */
267 static int can_optimize(unsigned long paddr)
268 {
269         unsigned long addr, size = 0, offset = 0;
270         struct insn insn;
271         kprobe_opcode_t buf[MAX_INSN_SIZE];
272
273         /* Lookup symbol including addr */
274         if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
275                 return 0;
276
277         /*
278          * Do not optimize in the entry code due to the unstable
279          * stack handling and registers setup.
280          */
281         if (((paddr >= (unsigned long)__entry_text_start) &&
282              (paddr <  (unsigned long)__entry_text_end)) ||
283             ((paddr >= (unsigned long)__irqentry_text_start) &&
284              (paddr <  (unsigned long)__irqentry_text_end)))
285                 return 0;
286
287         /* Check there is enough space for a relative jump. */
288         if (size - offset < RELATIVEJUMP_SIZE)
289                 return 0;
290
291         /* Decode instructions */
292         addr = paddr - offset;
293         while (addr < paddr - offset + size) { /* Decode until function end */
294                 unsigned long recovered_insn;
295                 if (search_exception_tables(addr))
296                         /*
297                          * Since some fixup code will jumps into this function,
298                          * we can't optimize kprobe in this function.
299                          */
300                         return 0;
301                 recovered_insn = recover_probed_instruction(buf, addr);
302                 if (!recovered_insn)
303                         return 0;
304                 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
305                 insn_get_length(&insn);
306                 /* Another subsystem puts a breakpoint */
307                 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
308                         return 0;
309                 /* Recover address */
310                 insn.kaddr = (void *)addr;
311                 insn.next_byte = (void *)(addr + insn.length);
312                 /* Check any instructions don't jump into target */
313                 if (insn_is_indirect_jump(&insn) ||
314                     insn_jump_into_range(&insn, paddr + INT3_SIZE,
315                                          RELATIVE_ADDR_SIZE))
316                         return 0;
317                 addr += insn.length;
318         }
319
320         return 1;
321 }
322
323 /* Check optimized_kprobe can actually be optimized. */
324 int arch_check_optimized_kprobe(struct optimized_kprobe *op)
325 {
326         int i;
327         struct kprobe *p;
328
329         for (i = 1; i < op->optinsn.size; i++) {
330                 p = get_kprobe(op->kp.addr + i);
331                 if (p && !kprobe_disarmed(p))
332                         return -EEXIST;
333         }
334
335         return 0;
336 }
337
338 /* Check the addr is within the optimized instructions. */
339 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
340                                  unsigned long addr)
341 {
342         return ((unsigned long)op->kp.addr <= addr &&
343                 (unsigned long)op->kp.addr + op->optinsn.size > addr);
344 }
345
346 /* Free optimized instruction slot */
347 static
348 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
349 {
350         if (op->optinsn.insn) {
351                 free_optinsn_slot(op->optinsn.insn, dirty);
352                 op->optinsn.insn = NULL;
353                 op->optinsn.size = 0;
354         }
355 }
356
357 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
358 {
359         __arch_remove_optimized_kprobe(op, 1);
360 }
361
362 /*
363  * Copy replacing target instructions
364  * Target instructions MUST be relocatable (checked inside)
365  * This is called when new aggr(opt)probe is allocated or reused.
366  */
367 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
368                                   struct kprobe *__unused)
369 {
370         u8 *buf = NULL, *slot;
371         int ret, len;
372         long rel;
373
374         if (!can_optimize((unsigned long)op->kp.addr))
375                 return -EILSEQ;
376
377         buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
378         if (!buf)
379                 return -ENOMEM;
380
381         op->optinsn.insn = slot = get_optinsn_slot();
382         if (!slot) {
383                 ret = -ENOMEM;
384                 goto out;
385         }
386
387         /*
388          * Verify if the address gap is in 2GB range, because this uses
389          * a relative jump.
390          */
391         rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
392         if (abs(rel) > 0x7fffffff) {
393                 ret = -ERANGE;
394                 goto err;
395         }
396
397         /* Copy arch-dep-instance from template */
398         memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
399
400         /* Copy instructions into the out-of-line buffer */
401         ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
402                                           slot + TMPL_END_IDX);
403         if (ret < 0)
404                 goto err;
405         op->optinsn.size = ret;
406         len = TMPL_END_IDX + op->optinsn.size;
407
408         /* Set probe information */
409         synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
410
411         /* Set probe function call */
412         synthesize_relcall(buf + TMPL_CALL_IDX,
413                            slot + TMPL_CALL_IDX, optimized_callback);
414
415         /* Set returning jmp instruction at the tail of out-of-line buffer */
416         synthesize_reljump(buf + len, slot + len,
417                            (u8 *)op->kp.addr + op->optinsn.size);
418         len += RELATIVEJUMP_SIZE;
419
420         /* We have to use text_poke for instuction buffer because it is RO */
421         text_poke(slot, buf, len);
422         ret = 0;
423 out:
424         kfree(buf);
425         return ret;
426
427 err:
428         __arch_remove_optimized_kprobe(op, 0);
429         goto out;
430 }
431
432 /*
433  * Replace breakpoints (int3) with relative jumps.
434  * Caller must call with locking kprobe_mutex and text_mutex.
435  */
436 void arch_optimize_kprobes(struct list_head *oplist)
437 {
438         struct optimized_kprobe *op, *tmp;
439         u8 insn_buf[RELATIVEJUMP_SIZE];
440
441         list_for_each_entry_safe(op, tmp, oplist, list) {
442                 s32 rel = (s32)((long)op->optinsn.insn -
443                         ((long)op->kp.addr + RELATIVEJUMP_SIZE));
444
445                 WARN_ON(kprobe_disabled(&op->kp));
446
447                 /* Backup instructions which will be replaced by jump address */
448                 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
449                        RELATIVE_ADDR_SIZE);
450
451                 insn_buf[0] = RELATIVEJUMP_OPCODE;
452                 *(s32 *)(&insn_buf[1]) = rel;
453
454                 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
455                              op->optinsn.insn);
456
457                 list_del_init(&op->list);
458         }
459 }
460
461 /* Replace a relative jump with a breakpoint (int3).  */
462 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
463 {
464         u8 insn_buf[RELATIVEJUMP_SIZE];
465
466         /* Set int3 to first byte for kprobes */
467         insn_buf[0] = BREAKPOINT_INSTRUCTION;
468         memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
469         text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
470                      op->optinsn.insn);
471 }
472
473 /*
474  * Recover original instructions and breakpoints from relative jumps.
475  * Caller must call with locking kprobe_mutex.
476  */
477 extern void arch_unoptimize_kprobes(struct list_head *oplist,
478                                     struct list_head *done_list)
479 {
480         struct optimized_kprobe *op, *tmp;
481
482         list_for_each_entry_safe(op, tmp, oplist, list) {
483                 arch_unoptimize_kprobe(op);
484                 list_move(&op->list, done_list);
485         }
486 }
487
488 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
489 {
490         struct optimized_kprobe *op;
491
492         if (p->flags & KPROBE_FLAG_OPTIMIZED) {
493                 /* This kprobe is really able to run optimized path. */
494                 op = container_of(p, struct optimized_kprobe, kp);
495                 /* Detour through copied instructions */
496                 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
497                 if (!reenter)
498                         reset_current_kprobe();
499                 return 1;
500         }
501         return 0;
502 }
503 NOKPROBE_SYMBOL(setup_detour_execution);