2 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
3 * Copyright (c) 2014-2015 Andrew Lutomirski
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * This is a series of tests that exercises the sigreturn(2) syscall and
15 * the IRET / SYSRET paths in the kernel.
17 * For now, this focuses on the effects of unusual CS and SS values,
18 * and it has a bunch of tests to make sure that ESP/RSP is restored
21 * The basic idea behind these tests is to raise(SIGUSR1) to create a
22 * sigcontext frame, plug in the values to be tested, and then return,
23 * which implicitly invokes sigreturn(2) and programs the user context
26 * For tests for which we expect sigreturn and the subsequent return to
27 * user mode to succeed, we return to a short trampoline that generates
28 * SIGTRAP so that the meat of the tests can be ordinary C code in a
31 * The inner workings of each test is documented below.
33 * Do not run on outdated, unpatched kernels at risk of nasty crashes.
41 #include <sys/syscall.h>
47 #include <sys/signal.h>
48 #include <sys/ucontext.h>
54 #include <sys/ptrace.h>
58 * In principle, this test can run on Linux emulation layers (e.g.
59 * Illumos "LX branded zones"). Solaris-based kernels reserve LDT
60 * entries 0-5 for their own internal purposes, so start our LDT
61 * allocations above that reservation. (The tests don't pass on LX
62 * branded zones, but at least this lets them run.)
66 /* An aligned stack accessible through some of our segments. */
67 static unsigned char stack16[65536] __attribute__((aligned(4096)));
70 * An aligned int3 instruction used as a trampoline. Some of the tests
71 * want to fish out their ss values, so this trampoline copies ss to eax
74 asm (".pushsection .text\n\t"
75 ".type int3, @function\n\t"
80 ".size int3, . - int3\n\t"
81 ".align 4096, 0xcc\n\t"
83 extern char int3[4096];
86 * At startup, we prepapre:
88 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
89 * descriptor or out of bounds).
90 * - code16_sel: A 16-bit LDT code segment pointing to int3.
91 * - data16_sel: A 16-bit LDT data segment pointing to stack16.
92 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
93 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
94 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
95 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
98 * For no particularly good reason, xyz_sel is a selector value with the
99 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
100 * descriptor table. These variables will be zero if their respective
101 * segments could not be allocated.
103 static unsigned short ldt_nonexistent_sel;
104 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
106 static unsigned short gdt_data16_idx, gdt_npdata32_idx;
108 static unsigned short GDT3(int idx)
110 return (idx << 3) | 3;
113 static unsigned short LDT3(int idx)
115 return (idx << 3) | 7;
118 /* Our sigaltstack scratch space. */
119 static char altstack_data[SIGSTKSZ];
121 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
125 memset(&sa, 0, sizeof(sa));
126 sa.sa_sigaction = handler;
127 sa.sa_flags = SA_SIGINFO | flags;
128 sigemptyset(&sa.sa_mask);
129 if (sigaction(sig, &sa, 0))
133 static void clearhandler(int sig)
136 memset(&sa, 0, sizeof(sa));
137 sa.sa_handler = SIG_DFL;
138 sigemptyset(&sa.sa_mask);
139 if (sigaction(sig, &sa, 0))
143 static void add_ldt(const struct user_desc *desc, unsigned short *var,
146 if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
147 *var = LDT3(desc->entry_number);
149 printf("[NOTE]\tFailed to create %s segment\n", name);
154 static void setup_ldt(void)
156 if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
157 errx(1, "stack16 is too high\n");
158 if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
159 errx(1, "int3 is too high\n");
161 ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
163 const struct user_desc code16_desc = {
164 .entry_number = LDT_OFFSET + 0,
165 .base_addr = (unsigned long)int3,
168 .contents = 2, /* Code, not conforming */
171 .seg_not_present = 0,
174 add_ldt(&code16_desc, &code16_sel, "code16");
176 const struct user_desc data16_desc = {
177 .entry_number = LDT_OFFSET + 1,
178 .base_addr = (unsigned long)stack16,
181 .contents = 0, /* Data, grow-up */
184 .seg_not_present = 0,
187 add_ldt(&data16_desc, &data16_sel, "data16");
189 const struct user_desc npcode32_desc = {
190 .entry_number = LDT_OFFSET + 3,
191 .base_addr = (unsigned long)int3,
194 .contents = 2, /* Code, not conforming */
197 .seg_not_present = 1,
200 add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
202 const struct user_desc npdata32_desc = {
203 .entry_number = LDT_OFFSET + 4,
204 .base_addr = (unsigned long)stack16,
207 .contents = 0, /* Data, grow-up */
210 .seg_not_present = 1,
213 add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
215 struct user_desc gdt_data16_desc = {
217 .base_addr = (unsigned long)stack16,
220 .contents = 0, /* Data, grow-up */
223 .seg_not_present = 0,
227 if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
229 * This probably indicates vulnerability to CVE-2014-8133.
230 * Merely getting here isn't definitive, though, and we'll
231 * diagnose the problem for real later on.
233 printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
234 gdt_data16_desc.entry_number);
235 gdt_data16_idx = gdt_data16_desc.entry_number;
237 printf("[OK]\tset_thread_area refused 16-bit data\n");
240 struct user_desc gdt_npdata32_desc = {
242 .base_addr = (unsigned long)stack16,
245 .contents = 0, /* Data, grow-up */
248 .seg_not_present = 1,
252 if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
254 * As a hardening measure, newer kernels don't allow this.
256 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
257 gdt_npdata32_desc.entry_number);
258 gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
260 printf("[OK]\tset_thread_area refused 16-bit data\n");
264 /* State used by our signal handlers. */
265 static gregset_t initial_regs, requested_regs, resulting_regs;
267 /* Instructions for the SIGUSR1 handler. */
268 static volatile unsigned short sig_cs, sig_ss;
269 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
271 /* Abstractions for some 32-bit vs 64-bit differences. */
273 # define REG_IP REG_RIP
274 # define REG_SP REG_RSP
275 # define REG_AX REG_RAX
278 unsigned short cs, gs, fs, ss;
281 static unsigned short *ssptr(ucontext_t *ctx)
283 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
287 static unsigned short *csptr(ucontext_t *ctx)
289 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
293 # define REG_IP REG_EIP
294 # define REG_SP REG_ESP
295 # define REG_AX REG_EAX
297 static greg_t *ssptr(ucontext_t *ctx)
299 return &ctx->uc_mcontext.gregs[REG_SS];
302 static greg_t *csptr(ucontext_t *ctx)
304 return &ctx->uc_mcontext.gregs[REG_CS];
308 /* Number of errors in the current test case. */
309 static volatile sig_atomic_t nerrs;
312 * SIGUSR1 handler. Sets CS and SS as requested and points IP to the
313 * int3 trampoline. Sets SP to a large known value so that we can see
314 * whether the value round-trips back to user mode correctly.
316 static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
318 ucontext_t *ctx = (ucontext_t*)ctx_void;
320 memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
322 *csptr(ctx) = sig_cs;
323 *ssptr(ctx) = sig_ss;
325 ctx->uc_mcontext.gregs[REG_IP] =
326 sig_cs == code16_sel ? 0 : (unsigned long)&int3;
327 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
328 ctx->uc_mcontext.gregs[REG_AX] = 0;
330 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
331 requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */
337 * Called after a successful sigreturn. Restores our state so that
338 * the original raise(SIGUSR1) returns.
340 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
342 ucontext_t *ctx = (ucontext_t*)ctx_void;
344 sig_err = ctx->uc_mcontext.gregs[REG_ERR];
345 sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
348 asm ("mov %%ss,%0" : "=r" (ss));
350 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
351 if (asm_ss != sig_ss && sig == SIGTRAP) {
352 /* Sanity check failure. */
353 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
354 ss, *ssptr(ctx), (unsigned long long)asm_ss);
358 memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
359 memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
365 * Checks a given selector for its code bitness or returns -1 if it's not
366 * a usable code segment selector.
368 int cs_bitness(unsigned short cs)
370 uint32_t valid = 0, ar;
371 asm ("lar %[cs], %[ar]\n\t"
373 "mov $1, %[valid]\n\t"
375 : [ar] "=r" (ar), [valid] "+rm" (valid)
381 bool db = (ar & (1 << 22));
382 bool l = (ar & (1 << 21));
385 return -1; /* Not code. */
394 return -1; /* Unknown bitness. */
397 /* Finds a usable code segment of the requested bitness. */
398 int find_cs(int bitness)
400 unsigned short my_cs;
402 asm ("mov %%cs,%0" : "=r" (my_cs));
404 if (cs_bitness(my_cs) == bitness)
406 if (cs_bitness(my_cs + (2 << 3)) == bitness)
407 return my_cs + (2 << 3);
408 if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
409 return my_cs - (2 << 3);
410 if (cs_bitness(code16_sel) == bitness)
413 printf("[WARN]\tCould not find %d-bit CS\n", bitness);
417 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
419 int cs = find_cs(cs_bits);
421 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
422 cs_bits, use_16bit_ss ? 16 : 32);
426 if (force_ss != -1) {
431 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
437 asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
443 printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
444 cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
445 (sig_ss & 4) ? "" : ", GDT");
452 * Check that each register had an acceptable value when the
453 * int3 trampoline was invoked.
455 for (int i = 0; i < NGREG; i++) {
456 greg_t req = requested_regs[i], res = resulting_regs[i];
457 if (i == REG_TRAPNO || i == REG_IP)
458 continue; /* don't care */
462 * If we were using a 16-bit stack segment, then
463 * the kernel is a bit stuck: IRET only restores
464 * the low 16 bits of ESP/RSP if SS is 16-bit.
465 * The kernel uses a hack to restore bits 31:16,
466 * but that hack doesn't help with bits 63:32.
467 * On Intel CPUs, bits 63:32 end up zeroed, and, on
468 * AMD CPUs, they leak the high bits of the kernel
469 * espfix64 stack pointer. There's very little that
470 * the kernel can do about it.
472 * Similarly, if we are returning to a 32-bit context,
473 * the CPU will often lose the high 32 bits of RSP.
479 if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
480 printf("[NOTE]\tSP: %llx -> %llx\n",
481 (unsigned long long)req,
482 (unsigned long long)res);
486 printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
487 (unsigned long long)requested_regs[i],
488 (unsigned long long)resulting_regs[i]);
493 bool ignore_reg = false;
498 if (i == REG_CSGSFS) {
499 struct selectors *req_sels =
500 (void *)&requested_regs[REG_CSGSFS];
501 struct selectors *res_sels =
502 (void *)&resulting_regs[REG_CSGSFS];
503 if (req_sels->cs != res_sels->cs) {
504 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
505 req_sels->cs, res_sels->cs);
509 if (req_sels->ss != res_sels->ss) {
510 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
511 req_sels->ss, res_sels->ss);
519 /* Sanity check on the kernel */
520 if (i == REG_AX && requested_regs[i] != resulting_regs[i]) {
521 printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
522 (unsigned long long)requested_regs[i],
523 (unsigned long long)resulting_regs[i]);
528 if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
529 printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
530 i, (unsigned long long)requested_regs[i],
531 (unsigned long long)resulting_regs[i]);
537 printf("[OK]\tall registers okay\n");
542 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
544 int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
551 printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
552 cs_bits, sig_cs, sig_ss);
557 char errdesc[32] = "";
559 const char *src = (sig_err & 1) ? " EXT" : "";
561 if ((sig_err & 0x6) == 0x0)
563 else if ((sig_err & 0x6) == 0x4)
565 else if ((sig_err & 0x6) == 0x2)
570 sprintf(errdesc, "%s%s index %d, ",
571 table, src, sig_err >> 3);
575 if (sig_trapno == 13)
576 strcpy(trapname, "GP");
577 else if (sig_trapno == 11)
578 strcpy(trapname, "NP");
579 else if (sig_trapno == 12)
580 strcpy(trapname, "SS");
581 else if (sig_trapno == 32)
582 strcpy(trapname, "IRET"); /* X86_TRAP_IRET */
584 sprintf(trapname, "%d", sig_trapno);
586 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
587 trapname, (unsigned long)sig_err,
588 errdesc, strsignal(sig_trapped));
591 printf("[FAIL]\tDid not get SIGSEGV\n");
599 unsigned short my_cs, my_ss;
601 asm volatile ("mov %%cs,%0" : "=r" (my_cs));
602 asm volatile ("mov %%ss,%0" : "=r" (my_ss));
606 .ss_sp = altstack_data,
609 if (sigaltstack(&stack, NULL) != 0)
610 err(1, "sigaltstack");
612 sethandler(SIGUSR1, sigusr1, 0);
613 sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
615 /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
616 total_nerrs += test_valid_sigreturn(64, false, -1);
617 total_nerrs += test_valid_sigreturn(32, false, -1);
618 total_nerrs += test_valid_sigreturn(16, false, -1);
621 * Test easy espfix cases: return to a 16-bit LDT SS in each possible
622 * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant.
624 * This catches the original missing-espfix-on-64-bit-kernels issue
625 * as well as CVE-2014-8134.
627 total_nerrs += test_valid_sigreturn(64, true, -1);
628 total_nerrs += test_valid_sigreturn(32, true, -1);
629 total_nerrs += test_valid_sigreturn(16, true, -1);
631 if (gdt_data16_idx) {
633 * For performance reasons, Linux skips espfix if SS points
634 * to the GDT. If we were able to allocate a 16-bit SS in
635 * the GDT, see if it leaks parts of the kernel stack pointer.
637 * This tests for CVE-2014-8133.
639 total_nerrs += test_valid_sigreturn(64, true,
640 GDT3(gdt_data16_idx));
641 total_nerrs += test_valid_sigreturn(32, true,
642 GDT3(gdt_data16_idx));
643 total_nerrs += test_valid_sigreturn(16, true,
644 GDT3(gdt_data16_idx));
648 * We're done testing valid sigreturn cases. Now we test states
649 * for which sigreturn itself will succeed but the subsequent
650 * entry to user mode will fail.
652 * Depending on the failure mode and the kernel bitness, these
653 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
655 clearhandler(SIGTRAP);
656 sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
657 sethandler(SIGBUS, sigtrap, SA_ONSTACK);
658 sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */
660 /* Easy failures: invalid SS, resulting in #GP(0) */
661 test_bad_iret(64, ldt_nonexistent_sel, -1);
662 test_bad_iret(32, ldt_nonexistent_sel, -1);
663 test_bad_iret(16, ldt_nonexistent_sel, -1);
665 /* These fail because SS isn't a data segment, resulting in #GP(SS) */
666 test_bad_iret(64, my_cs, -1);
667 test_bad_iret(32, my_cs, -1);
668 test_bad_iret(16, my_cs, -1);
670 /* Try to return to a not-present code segment, triggering #NP(SS). */
671 test_bad_iret(32, my_ss, npcode32_sel);
674 * Try to return to a not-present but otherwise valid data segment.
675 * This will cause IRET to fail with #SS on the espfix stack. This
676 * exercises CVE-2014-9322.
678 * Note that, if espfix is enabled, 64-bit Linux will lose track
679 * of the actual cause of failure and report #GP(0) instead.
680 * This would be very difficult for Linux to avoid, because
681 * espfix64 causes IRET failures to be promoted to #DF, so the
682 * original exception frame is never pushed onto the stack.
684 test_bad_iret(32, npdata32_sel, -1);
687 * Try to return to a not-present but otherwise valid data
688 * segment without invoking espfix. Newer kernels don't allow
689 * this to happen in the first place. On older kernels, though,
690 * this can trigger CVE-2014-9322.
692 if (gdt_npdata32_idx)
693 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
695 return total_nerrs ? 1 : 0;