GNU Linux-libre 4.14.290-gnu1
[releases.git] / tools / testing / selftests / seccomp / seccomp_bpf.c
1 /*
2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by the GPLv2 license.
4  *
5  * Test code for seccomp bpf.
6  */
7
8 #include <sys/types.h>
9
10 /*
11  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
12  * we need to use the kernel's siginfo.h file and trick glibc
13  * into accepting it.
14  */
15 #if !__GLIBC_PREREQ(2, 26)
16 # include <asm/siginfo.h>
17 # define __have_siginfo_t 1
18 # define __have_sigval_t 1
19 # define __have_sigevent_t 1
20 #endif
21
22 #include <errno.h>
23 #include <linux/filter.h>
24 #include <sys/prctl.h>
25 #include <sys/ptrace.h>
26 #include <sys/user.h>
27 #include <linux/prctl.h>
28 #include <linux/ptrace.h>
29 #include <linux/seccomp.h>
30 #include <pthread.h>
31 #include <semaphore.h>
32 #include <signal.h>
33 #include <stddef.h>
34 #include <stdbool.h>
35 #include <string.h>
36 #include <time.h>
37 #include <linux/elf.h>
38 #include <sys/uio.h>
39 #include <sys/utsname.h>
40 #include <sys/fcntl.h>
41 #include <sys/mman.h>
42 #include <sys/times.h>
43
44 #define _GNU_SOURCE
45 #include <unistd.h>
46 #include <sys/syscall.h>
47
48 #include "../kselftest_harness.h"
49
50 #ifndef PR_SET_PTRACER
51 # define PR_SET_PTRACER 0x59616d61
52 #endif
53
54 #ifndef PR_SET_NO_NEW_PRIVS
55 #define PR_SET_NO_NEW_PRIVS 38
56 #define PR_GET_NO_NEW_PRIVS 39
57 #endif
58
59 #ifndef PR_SECCOMP_EXT
60 #define PR_SECCOMP_EXT 43
61 #endif
62
63 #ifndef SECCOMP_EXT_ACT
64 #define SECCOMP_EXT_ACT 1
65 #endif
66
67 #ifndef SECCOMP_EXT_ACT_TSYNC
68 #define SECCOMP_EXT_ACT_TSYNC 1
69 #endif
70
71 #ifndef SECCOMP_MODE_STRICT
72 #define SECCOMP_MODE_STRICT 1
73 #endif
74
75 #ifndef SECCOMP_MODE_FILTER
76 #define SECCOMP_MODE_FILTER 2
77 #endif
78
79 #ifndef SECCOMP_RET_ALLOW
80 struct seccomp_data {
81         int nr;
82         __u32 arch;
83         __u64 instruction_pointer;
84         __u64 args[6];
85 };
86 #endif
87
88 #ifndef SECCOMP_RET_KILL_PROCESS
89 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
90 #define SECCOMP_RET_KILL_THREAD  0x00000000U /* kill the thread */
91 #endif
92 #ifndef SECCOMP_RET_KILL
93 #define SECCOMP_RET_KILL         SECCOMP_RET_KILL_THREAD
94 #define SECCOMP_RET_TRAP         0x00030000U /* disallow and force a SIGSYS */
95 #define SECCOMP_RET_ERRNO        0x00050000U /* returns an errno */
96 #define SECCOMP_RET_TRACE        0x7ff00000U /* pass to a tracer or disallow */
97 #define SECCOMP_RET_ALLOW        0x7fff0000U /* allow */
98 #endif
99 #ifndef SECCOMP_RET_LOG
100 #define SECCOMP_RET_LOG          0x7ffc0000U /* allow after logging */
101 #endif
102
103 #ifndef __NR_seccomp
104 # if defined(__i386__)
105 #  define __NR_seccomp 354
106 # elif defined(__x86_64__)
107 #  define __NR_seccomp 317
108 # elif defined(__arm__)
109 #  define __NR_seccomp 383
110 # elif defined(__aarch64__)
111 #  define __NR_seccomp 277
112 # elif defined(__hppa__)
113 #  define __NR_seccomp 338
114 # elif defined(__powerpc__)
115 #  define __NR_seccomp 358
116 # elif defined(__s390__)
117 #  define __NR_seccomp 348
118 # else
119 #  warning "seccomp syscall number unknown for this architecture"
120 #  define __NR_seccomp 0xffff
121 # endif
122 #endif
123
124 #ifndef SECCOMP_SET_MODE_STRICT
125 #define SECCOMP_SET_MODE_STRICT 0
126 #endif
127
128 #ifndef SECCOMP_SET_MODE_FILTER
129 #define SECCOMP_SET_MODE_FILTER 1
130 #endif
131
132 #ifndef SECCOMP_GET_ACTION_AVAIL
133 #define SECCOMP_GET_ACTION_AVAIL 2
134 #endif
135
136 #ifndef SECCOMP_FILTER_FLAG_TSYNC
137 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
138 #endif
139
140 #ifndef SECCOMP_FILTER_FLAG_LOG
141 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
142 #endif
143
144 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
145 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
146 #endif
147
148 #ifndef seccomp
149 int seccomp(unsigned int op, unsigned int flags, void *args)
150 {
151         errno = 0;
152         return syscall(__NR_seccomp, op, flags, args);
153 }
154 #endif
155
156 #if __BYTE_ORDER == __LITTLE_ENDIAN
157 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
158 #elif __BYTE_ORDER == __BIG_ENDIAN
159 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
160 #else
161 #error "wut? Unknown __BYTE_ORDER?!"
162 #endif
163
164 #define SIBLING_EXIT_UNKILLED   0xbadbeef
165 #define SIBLING_EXIT_FAILURE    0xbadface
166 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
167
168 TEST(mode_strict_support)
169 {
170         long ret;
171
172         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
173         ASSERT_EQ(0, ret) {
174                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
175         }
176         syscall(__NR_exit, 0);
177 }
178
179 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
180 {
181         long ret;
182
183         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
184         ASSERT_EQ(0, ret) {
185                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
186         }
187         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
188                 NULL, NULL, NULL);
189         EXPECT_FALSE(true) {
190                 TH_LOG("Unreachable!");
191         }
192 }
193
194 /* Note! This doesn't test no new privs behavior */
195 TEST(no_new_privs_support)
196 {
197         long ret;
198
199         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
200         EXPECT_EQ(0, ret) {
201                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
202         }
203 }
204
205 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
206 TEST(mode_filter_support)
207 {
208         long ret;
209
210         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
211         ASSERT_EQ(0, ret) {
212                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
213         }
214         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
215         EXPECT_EQ(-1, ret);
216         EXPECT_EQ(EFAULT, errno) {
217                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
218         }
219 }
220
221 TEST(mode_filter_without_nnp)
222 {
223         struct sock_filter filter[] = {
224                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
225         };
226         struct sock_fprog prog = {
227                 .len = (unsigned short)ARRAY_SIZE(filter),
228                 .filter = filter,
229         };
230         long ret;
231
232         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
233         ASSERT_LE(0, ret) {
234                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
235         }
236         errno = 0;
237         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
238         /* Succeeds with CAP_SYS_ADMIN, fails without */
239         /* TODO(wad) check caps not euid */
240         if (geteuid()) {
241                 EXPECT_EQ(-1, ret);
242                 EXPECT_EQ(EACCES, errno);
243         } else {
244                 EXPECT_EQ(0, ret);
245         }
246 }
247
248 #define MAX_INSNS_PER_PATH 32768
249
250 TEST(filter_size_limits)
251 {
252         int i;
253         int count = BPF_MAXINSNS + 1;
254         struct sock_filter allow[] = {
255                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
256         };
257         struct sock_filter *filter;
258         struct sock_fprog prog = { };
259         long ret;
260
261         filter = calloc(count, sizeof(*filter));
262         ASSERT_NE(NULL, filter);
263
264         for (i = 0; i < count; i++)
265                 filter[i] = allow[0];
266
267         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
268         ASSERT_EQ(0, ret);
269
270         prog.filter = filter;
271         prog.len = count;
272
273         /* Too many filter instructions in a single filter. */
274         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
275         ASSERT_NE(0, ret) {
276                 TH_LOG("Installing %d insn filter was allowed", prog.len);
277         }
278
279         /* One less is okay, though. */
280         prog.len -= 1;
281         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
282         ASSERT_EQ(0, ret) {
283                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
284         }
285 }
286
287 TEST(filter_chain_limits)
288 {
289         int i;
290         int count = BPF_MAXINSNS;
291         struct sock_filter allow[] = {
292                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
293         };
294         struct sock_filter *filter;
295         struct sock_fprog prog = { };
296         long ret;
297
298         filter = calloc(count, sizeof(*filter));
299         ASSERT_NE(NULL, filter);
300
301         for (i = 0; i < count; i++)
302                 filter[i] = allow[0];
303
304         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
305         ASSERT_EQ(0, ret);
306
307         prog.filter = filter;
308         prog.len = 1;
309
310         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
311         ASSERT_EQ(0, ret);
312
313         prog.len = count;
314
315         /* Too many total filter instructions. */
316         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
317                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
318                 if (ret != 0)
319                         break;
320         }
321         ASSERT_NE(0, ret) {
322                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
323                        i, count, i * (count + 4));
324         }
325 }
326
327 TEST(mode_filter_cannot_move_to_strict)
328 {
329         struct sock_filter filter[] = {
330                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
331         };
332         struct sock_fprog prog = {
333                 .len = (unsigned short)ARRAY_SIZE(filter),
334                 .filter = filter,
335         };
336         long ret;
337
338         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
339         ASSERT_EQ(0, ret);
340
341         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
342         ASSERT_EQ(0, ret);
343
344         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
345         EXPECT_EQ(-1, ret);
346         EXPECT_EQ(EINVAL, errno);
347 }
348
349
350 TEST(mode_filter_get_seccomp)
351 {
352         struct sock_filter filter[] = {
353                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
354         };
355         struct sock_fprog prog = {
356                 .len = (unsigned short)ARRAY_SIZE(filter),
357                 .filter = filter,
358         };
359         long ret;
360
361         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
362         ASSERT_EQ(0, ret);
363
364         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
365         EXPECT_EQ(0, ret);
366
367         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
368         ASSERT_EQ(0, ret);
369
370         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
371         EXPECT_EQ(2, ret);
372 }
373
374
375 TEST(ALLOW_all)
376 {
377         struct sock_filter filter[] = {
378                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
379         };
380         struct sock_fprog prog = {
381                 .len = (unsigned short)ARRAY_SIZE(filter),
382                 .filter = filter,
383         };
384         long ret;
385
386         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
387         ASSERT_EQ(0, ret);
388
389         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
390         ASSERT_EQ(0, ret);
391 }
392
393 TEST(empty_prog)
394 {
395         struct sock_filter filter[] = {
396         };
397         struct sock_fprog prog = {
398                 .len = (unsigned short)ARRAY_SIZE(filter),
399                 .filter = filter,
400         };
401         long ret;
402
403         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
404         ASSERT_EQ(0, ret);
405
406         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
407         EXPECT_EQ(-1, ret);
408         EXPECT_EQ(EINVAL, errno);
409 }
410
411 TEST(log_all)
412 {
413         struct sock_filter filter[] = {
414                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
415         };
416         struct sock_fprog prog = {
417                 .len = (unsigned short)ARRAY_SIZE(filter),
418                 .filter = filter,
419         };
420         long ret;
421         pid_t parent = getppid();
422
423         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
424         ASSERT_EQ(0, ret);
425
426         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
427         ASSERT_EQ(0, ret);
428
429         /* getppid() should succeed and be logged (no check for logging) */
430         EXPECT_EQ(parent, syscall(__NR_getppid));
431 }
432
433 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
434 {
435         struct sock_filter filter[] = {
436                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
437         };
438         struct sock_fprog prog = {
439                 .len = (unsigned short)ARRAY_SIZE(filter),
440                 .filter = filter,
441         };
442         long ret;
443
444         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
445         ASSERT_EQ(0, ret);
446
447         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
448         ASSERT_EQ(0, ret);
449         EXPECT_EQ(0, syscall(__NR_getpid)) {
450                 TH_LOG("getpid() shouldn't ever return");
451         }
452 }
453
454 /* return code >= 0x80000000 is unused. */
455 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
456 {
457         struct sock_filter filter[] = {
458                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
459         };
460         struct sock_fprog prog = {
461                 .len = (unsigned short)ARRAY_SIZE(filter),
462                 .filter = filter,
463         };
464         long ret;
465
466         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
467         ASSERT_EQ(0, ret);
468
469         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
470         ASSERT_EQ(0, ret);
471         EXPECT_EQ(0, syscall(__NR_getpid)) {
472                 TH_LOG("getpid() shouldn't ever return");
473         }
474 }
475
476 TEST_SIGNAL(KILL_all, SIGSYS)
477 {
478         struct sock_filter filter[] = {
479                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
480         };
481         struct sock_fprog prog = {
482                 .len = (unsigned short)ARRAY_SIZE(filter),
483                 .filter = filter,
484         };
485         long ret;
486
487         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
488         ASSERT_EQ(0, ret);
489
490         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
491         ASSERT_EQ(0, ret);
492 }
493
494 TEST_SIGNAL(KILL_one, SIGSYS)
495 {
496         struct sock_filter filter[] = {
497                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
498                         offsetof(struct seccomp_data, nr)),
499                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
500                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
501                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
502         };
503         struct sock_fprog prog = {
504                 .len = (unsigned short)ARRAY_SIZE(filter),
505                 .filter = filter,
506         };
507         long ret;
508         pid_t parent = getppid();
509
510         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
511         ASSERT_EQ(0, ret);
512
513         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
514         ASSERT_EQ(0, ret);
515
516         EXPECT_EQ(parent, syscall(__NR_getppid));
517         /* getpid() should never return. */
518         EXPECT_EQ(0, syscall(__NR_getpid));
519 }
520
521 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
522 {
523         void *fatal_address;
524         struct sock_filter filter[] = {
525                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
526                         offsetof(struct seccomp_data, nr)),
527                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
528                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
529                 /* Only both with lower 32-bit for now. */
530                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
531                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
532                         (unsigned long)&fatal_address, 0, 1),
533                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
534                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
535         };
536         struct sock_fprog prog = {
537                 .len = (unsigned short)ARRAY_SIZE(filter),
538                 .filter = filter,
539         };
540         long ret;
541         pid_t parent = getppid();
542         struct tms timebuf;
543         clock_t clock = times(&timebuf);
544
545         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
546         ASSERT_EQ(0, ret);
547
548         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
549         ASSERT_EQ(0, ret);
550
551         EXPECT_EQ(parent, syscall(__NR_getppid));
552         EXPECT_LE(clock, syscall(__NR_times, &timebuf));
553         /* times() should never return. */
554         EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
555 }
556
557 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
558 {
559 #ifndef __NR_mmap2
560         int sysno = __NR_mmap;
561 #else
562         int sysno = __NR_mmap2;
563 #endif
564         struct sock_filter filter[] = {
565                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
566                         offsetof(struct seccomp_data, nr)),
567                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
568                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
569                 /* Only both with lower 32-bit for now. */
570                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
571                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
572                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
573                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
574         };
575         struct sock_fprog prog = {
576                 .len = (unsigned short)ARRAY_SIZE(filter),
577                 .filter = filter,
578         };
579         long ret;
580         pid_t parent = getppid();
581         int fd;
582         void *map1, *map2;
583         int page_size = sysconf(_SC_PAGESIZE);
584
585         ASSERT_LT(0, page_size);
586
587         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
588         ASSERT_EQ(0, ret);
589
590         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
591         ASSERT_EQ(0, ret);
592
593         fd = open("/dev/zero", O_RDONLY);
594         ASSERT_NE(-1, fd);
595
596         EXPECT_EQ(parent, syscall(__NR_getppid));
597         map1 = (void *)syscall(sysno,
598                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
599         EXPECT_NE(MAP_FAILED, map1);
600         /* mmap2() should never return. */
601         map2 = (void *)syscall(sysno,
602                  NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
603         EXPECT_EQ(MAP_FAILED, map2);
604
605         /* The test failed, so clean up the resources. */
606         munmap(map1, page_size);
607         munmap(map2, page_size);
608         close(fd);
609 }
610
611 /* This is a thread task to die via seccomp filter violation. */
612 void *kill_thread(void *data)
613 {
614         bool die = (bool)data;
615
616         if (die) {
617                 prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
618                 return (void *)SIBLING_EXIT_FAILURE;
619         }
620
621         return (void *)SIBLING_EXIT_UNKILLED;
622 }
623
624 /* Prepare a thread that will kill itself or both of us. */
625 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
626 {
627         pthread_t thread;
628         void *status;
629         /* Kill only when calling __NR_prctl. */
630         struct sock_filter filter_thread[] = {
631                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
632                         offsetof(struct seccomp_data, nr)),
633                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
634                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
635                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
636         };
637         struct sock_fprog prog_thread = {
638                 .len = (unsigned short)ARRAY_SIZE(filter_thread),
639                 .filter = filter_thread,
640         };
641         struct sock_filter filter_process[] = {
642                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
643                         offsetof(struct seccomp_data, nr)),
644                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
645                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
646                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
647         };
648         struct sock_fprog prog_process = {
649                 .len = (unsigned short)ARRAY_SIZE(filter_process),
650                 .filter = filter_process,
651         };
652
653         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
654                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
655         }
656
657         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
658                              kill_process ? &prog_process : &prog_thread));
659
660         /*
661          * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
662          * flag cannot be downgraded by a new filter.
663          */
664         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
665
666         /* Start a thread that will exit immediately. */
667         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
668         ASSERT_EQ(0, pthread_join(thread, &status));
669         ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
670
671         /* Start a thread that will die immediately. */
672         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
673         ASSERT_EQ(0, pthread_join(thread, &status));
674         ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
675
676         /*
677          * If we get here, only the spawned thread died. Let the parent know
678          * the whole process didn't die (i.e. this thread, the spawner,
679          * stayed running).
680          */
681         exit(42);
682 }
683
684 TEST(KILL_thread)
685 {
686         int status;
687         pid_t child_pid;
688
689         child_pid = fork();
690         ASSERT_LE(0, child_pid);
691         if (child_pid == 0) {
692                 kill_thread_or_group(_metadata, false);
693                 _exit(38);
694         }
695
696         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
697
698         /* If only the thread was killed, we'll see exit 42. */
699         ASSERT_TRUE(WIFEXITED(status));
700         ASSERT_EQ(42, WEXITSTATUS(status));
701 }
702
703 TEST(KILL_process)
704 {
705         int status;
706         pid_t child_pid;
707
708         child_pid = fork();
709         ASSERT_LE(0, child_pid);
710         if (child_pid == 0) {
711                 kill_thread_or_group(_metadata, true);
712                 _exit(38);
713         }
714
715         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
716
717         /* If the entire process was killed, we'll see SIGSYS. */
718         ASSERT_TRUE(WIFSIGNALED(status));
719         ASSERT_EQ(SIGSYS, WTERMSIG(status));
720 }
721
722 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
723 TEST(arg_out_of_range)
724 {
725         struct sock_filter filter[] = {
726                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
727                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
728         };
729         struct sock_fprog prog = {
730                 .len = (unsigned short)ARRAY_SIZE(filter),
731                 .filter = filter,
732         };
733         long ret;
734
735         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
736         ASSERT_EQ(0, ret);
737
738         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
739         EXPECT_EQ(-1, ret);
740         EXPECT_EQ(EINVAL, errno);
741 }
742
743 #define ERRNO_FILTER(name, errno)                                       \
744         struct sock_filter _read_filter_##name[] = {                    \
745                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
746                         offsetof(struct seccomp_data, nr)),             \
747                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
748                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
749                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
750         };                                                              \
751         struct sock_fprog prog_##name = {                               \
752                 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
753                 .filter = _read_filter_##name,                          \
754         }
755
756 /* Make sure basic errno values are correctly passed through a filter. */
757 TEST(ERRNO_valid)
758 {
759         ERRNO_FILTER(valid, E2BIG);
760         long ret;
761         pid_t parent = getppid();
762
763         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
764         ASSERT_EQ(0, ret);
765
766         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
767         ASSERT_EQ(0, ret);
768
769         EXPECT_EQ(parent, syscall(__NR_getppid));
770         EXPECT_EQ(-1, read(0, NULL, 0));
771         EXPECT_EQ(E2BIG, errno);
772 }
773
774 /* Make sure an errno of zero is correctly handled by the arch code. */
775 TEST(ERRNO_zero)
776 {
777         ERRNO_FILTER(zero, 0);
778         long ret;
779         pid_t parent = getppid();
780
781         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
782         ASSERT_EQ(0, ret);
783
784         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
785         ASSERT_EQ(0, ret);
786
787         EXPECT_EQ(parent, syscall(__NR_getppid));
788         /* "errno" of 0 is ok. */
789         EXPECT_EQ(0, read(0, NULL, 0));
790 }
791
792 /*
793  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
794  * This tests that the errno value gets capped correctly, fixed by
795  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
796  */
797 TEST(ERRNO_capped)
798 {
799         ERRNO_FILTER(capped, 4096);
800         long ret;
801         pid_t parent = getppid();
802
803         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
804         ASSERT_EQ(0, ret);
805
806         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
807         ASSERT_EQ(0, ret);
808
809         EXPECT_EQ(parent, syscall(__NR_getppid));
810         EXPECT_EQ(-1, read(0, NULL, 0));
811         EXPECT_EQ(4095, errno);
812 }
813
814 /*
815  * Filters are processed in reverse order: last applied is executed first.
816  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
817  * SECCOMP_RET_DATA mask results will follow the most recently applied
818  * matching filter return (and not the lowest or highest value).
819  */
820 TEST(ERRNO_order)
821 {
822         ERRNO_FILTER(first,  11);
823         ERRNO_FILTER(second, 13);
824         ERRNO_FILTER(third,  12);
825         long ret;
826         pid_t parent = getppid();
827
828         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
829         ASSERT_EQ(0, ret);
830
831         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
832         ASSERT_EQ(0, ret);
833
834         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
835         ASSERT_EQ(0, ret);
836
837         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
838         ASSERT_EQ(0, ret);
839
840         EXPECT_EQ(parent, syscall(__NR_getppid));
841         EXPECT_EQ(-1, read(0, NULL, 0));
842         EXPECT_EQ(12, errno);
843 }
844
845 FIXTURE_DATA(TRAP) {
846         struct sock_fprog prog;
847 };
848
849 FIXTURE_SETUP(TRAP)
850 {
851         struct sock_filter filter[] = {
852                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
853                         offsetof(struct seccomp_data, nr)),
854                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
855                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
856                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
857         };
858
859         memset(&self->prog, 0, sizeof(self->prog));
860         self->prog.filter = malloc(sizeof(filter));
861         ASSERT_NE(NULL, self->prog.filter);
862         memcpy(self->prog.filter, filter, sizeof(filter));
863         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
864 }
865
866 FIXTURE_TEARDOWN(TRAP)
867 {
868         if (self->prog.filter)
869                 free(self->prog.filter);
870 }
871
872 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
873 {
874         long ret;
875
876         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
877         ASSERT_EQ(0, ret);
878
879         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
880         ASSERT_EQ(0, ret);
881         syscall(__NR_getpid);
882 }
883
884 /* Ensure that SIGSYS overrides SIG_IGN */
885 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
886 {
887         long ret;
888
889         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
890         ASSERT_EQ(0, ret);
891
892         signal(SIGSYS, SIG_IGN);
893
894         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
895         ASSERT_EQ(0, ret);
896         syscall(__NR_getpid);
897 }
898
899 static siginfo_t TRAP_info;
900 static volatile int TRAP_nr;
901 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
902 {
903         memcpy(&TRAP_info, info, sizeof(TRAP_info));
904         TRAP_nr = nr;
905 }
906
907 TEST_F(TRAP, handler)
908 {
909         int ret, test;
910         struct sigaction act;
911         sigset_t mask;
912
913         memset(&act, 0, sizeof(act));
914         sigemptyset(&mask);
915         sigaddset(&mask, SIGSYS);
916
917         act.sa_sigaction = &TRAP_action;
918         act.sa_flags = SA_SIGINFO;
919         ret = sigaction(SIGSYS, &act, NULL);
920         ASSERT_EQ(0, ret) {
921                 TH_LOG("sigaction failed");
922         }
923         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
924         ASSERT_EQ(0, ret) {
925                 TH_LOG("sigprocmask failed");
926         }
927
928         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
929         ASSERT_EQ(0, ret);
930         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
931         ASSERT_EQ(0, ret);
932         TRAP_nr = 0;
933         memset(&TRAP_info, 0, sizeof(TRAP_info));
934         /* Expect the registers to be rolled back. (nr = error) may vary
935          * based on arch. */
936         ret = syscall(__NR_getpid);
937         /* Silence gcc warning about volatile. */
938         test = TRAP_nr;
939         EXPECT_EQ(SIGSYS, test);
940         struct local_sigsys {
941                 void *_call_addr;       /* calling user insn */
942                 int _syscall;           /* triggering system call number */
943                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
944         } *sigsys = (struct local_sigsys *)
945 #ifdef si_syscall
946                 &(TRAP_info.si_call_addr);
947 #else
948                 &TRAP_info.si_pid;
949 #endif
950         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
951         /* Make sure arch is non-zero. */
952         EXPECT_NE(0, sigsys->_arch);
953         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
954 }
955
956 FIXTURE_DATA(precedence) {
957         struct sock_fprog allow;
958         struct sock_fprog log;
959         struct sock_fprog trace;
960         struct sock_fprog error;
961         struct sock_fprog trap;
962         struct sock_fprog kill;
963 };
964
965 FIXTURE_SETUP(precedence)
966 {
967         struct sock_filter allow_insns[] = {
968                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
969         };
970         struct sock_filter log_insns[] = {
971                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
972                         offsetof(struct seccomp_data, nr)),
973                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
974                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
975                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
976         };
977         struct sock_filter trace_insns[] = {
978                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
979                         offsetof(struct seccomp_data, nr)),
980                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
981                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
982                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
983         };
984         struct sock_filter error_insns[] = {
985                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
986                         offsetof(struct seccomp_data, nr)),
987                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
988                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
989                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
990         };
991         struct sock_filter trap_insns[] = {
992                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
993                         offsetof(struct seccomp_data, nr)),
994                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
995                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
996                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
997         };
998         struct sock_filter kill_insns[] = {
999                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1000                         offsetof(struct seccomp_data, nr)),
1001                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1002                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1003                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1004         };
1005
1006         memset(self, 0, sizeof(*self));
1007 #define FILTER_ALLOC(_x) \
1008         self->_x.filter = malloc(sizeof(_x##_insns)); \
1009         ASSERT_NE(NULL, self->_x.filter); \
1010         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1011         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1012         FILTER_ALLOC(allow);
1013         FILTER_ALLOC(log);
1014         FILTER_ALLOC(trace);
1015         FILTER_ALLOC(error);
1016         FILTER_ALLOC(trap);
1017         FILTER_ALLOC(kill);
1018 }
1019
1020 FIXTURE_TEARDOWN(precedence)
1021 {
1022 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1023         FILTER_FREE(allow);
1024         FILTER_FREE(log);
1025         FILTER_FREE(trace);
1026         FILTER_FREE(error);
1027         FILTER_FREE(trap);
1028         FILTER_FREE(kill);
1029 }
1030
1031 TEST_F(precedence, allow_ok)
1032 {
1033         pid_t parent, res = 0;
1034         long ret;
1035
1036         parent = getppid();
1037         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1038         ASSERT_EQ(0, ret);
1039
1040         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1041         ASSERT_EQ(0, ret);
1042         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1043         ASSERT_EQ(0, ret);
1044         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1045         ASSERT_EQ(0, ret);
1046         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1047         ASSERT_EQ(0, ret);
1048         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1049         ASSERT_EQ(0, ret);
1050         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1051         ASSERT_EQ(0, ret);
1052         /* Should work just fine. */
1053         res = syscall(__NR_getppid);
1054         EXPECT_EQ(parent, res);
1055 }
1056
1057 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1058 {
1059         pid_t parent, res = 0;
1060         long ret;
1061
1062         parent = getppid();
1063         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1064         ASSERT_EQ(0, ret);
1065
1066         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1067         ASSERT_EQ(0, ret);
1068         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1069         ASSERT_EQ(0, ret);
1070         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1071         ASSERT_EQ(0, ret);
1072         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1073         ASSERT_EQ(0, ret);
1074         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1075         ASSERT_EQ(0, ret);
1076         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1077         ASSERT_EQ(0, ret);
1078         /* Should work just fine. */
1079         res = syscall(__NR_getppid);
1080         EXPECT_EQ(parent, res);
1081         /* getpid() should never return. */
1082         res = syscall(__NR_getpid);
1083         EXPECT_EQ(0, res);
1084 }
1085
1086 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1087 {
1088         pid_t parent;
1089         long ret;
1090
1091         parent = getppid();
1092         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1093         ASSERT_EQ(0, ret);
1094
1095         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1096         ASSERT_EQ(0, ret);
1097         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1098         ASSERT_EQ(0, ret);
1099         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1100         ASSERT_EQ(0, ret);
1101         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1102         ASSERT_EQ(0, ret);
1103         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1104         ASSERT_EQ(0, ret);
1105         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1106         ASSERT_EQ(0, ret);
1107         /* Should work just fine. */
1108         EXPECT_EQ(parent, syscall(__NR_getppid));
1109         /* getpid() should never return. */
1110         EXPECT_EQ(0, syscall(__NR_getpid));
1111 }
1112
1113 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1114 {
1115         pid_t parent;
1116         long ret;
1117
1118         parent = getppid();
1119         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1120         ASSERT_EQ(0, ret);
1121
1122         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1123         ASSERT_EQ(0, ret);
1124         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1125         ASSERT_EQ(0, ret);
1126         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1127         ASSERT_EQ(0, ret);
1128         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1129         ASSERT_EQ(0, ret);
1130         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1131         ASSERT_EQ(0, ret);
1132         /* Should work just fine. */
1133         EXPECT_EQ(parent, syscall(__NR_getppid));
1134         /* getpid() should never return. */
1135         EXPECT_EQ(0, syscall(__NR_getpid));
1136 }
1137
1138 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1139 {
1140         pid_t parent;
1141         long ret;
1142
1143         parent = getppid();
1144         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1145         ASSERT_EQ(0, ret);
1146
1147         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1148         ASSERT_EQ(0, ret);
1149         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1150         ASSERT_EQ(0, ret);
1151         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1152         ASSERT_EQ(0, ret);
1153         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1154         ASSERT_EQ(0, ret);
1155         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1156         ASSERT_EQ(0, ret);
1157         /* Should work just fine. */
1158         EXPECT_EQ(parent, syscall(__NR_getppid));
1159         /* getpid() should never return. */
1160         EXPECT_EQ(0, syscall(__NR_getpid));
1161 }
1162
1163 TEST_F(precedence, errno_is_third)
1164 {
1165         pid_t parent;
1166         long ret;
1167
1168         parent = getppid();
1169         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1170         ASSERT_EQ(0, ret);
1171
1172         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1173         ASSERT_EQ(0, ret);
1174         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1175         ASSERT_EQ(0, ret);
1176         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1177         ASSERT_EQ(0, ret);
1178         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1179         ASSERT_EQ(0, ret);
1180         /* Should work just fine. */
1181         EXPECT_EQ(parent, syscall(__NR_getppid));
1182         EXPECT_EQ(0, syscall(__NR_getpid));
1183 }
1184
1185 TEST_F(precedence, errno_is_third_in_any_order)
1186 {
1187         pid_t parent;
1188         long ret;
1189
1190         parent = getppid();
1191         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1192         ASSERT_EQ(0, ret);
1193
1194         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1195         ASSERT_EQ(0, ret);
1196         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1197         ASSERT_EQ(0, ret);
1198         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1199         ASSERT_EQ(0, ret);
1200         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1201         ASSERT_EQ(0, ret);
1202         /* Should work just fine. */
1203         EXPECT_EQ(parent, syscall(__NR_getppid));
1204         EXPECT_EQ(0, syscall(__NR_getpid));
1205 }
1206
1207 TEST_F(precedence, trace_is_fourth)
1208 {
1209         pid_t parent;
1210         long ret;
1211
1212         parent = getppid();
1213         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1214         ASSERT_EQ(0, ret);
1215
1216         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1217         ASSERT_EQ(0, ret);
1218         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1219         ASSERT_EQ(0, ret);
1220         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1221         ASSERT_EQ(0, ret);
1222         /* Should work just fine. */
1223         EXPECT_EQ(parent, syscall(__NR_getppid));
1224         /* No ptracer */
1225         EXPECT_EQ(-1, syscall(__NR_getpid));
1226 }
1227
1228 TEST_F(precedence, trace_is_fourth_in_any_order)
1229 {
1230         pid_t parent;
1231         long ret;
1232
1233         parent = getppid();
1234         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1235         ASSERT_EQ(0, ret);
1236
1237         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1238         ASSERT_EQ(0, ret);
1239         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1240         ASSERT_EQ(0, ret);
1241         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1242         ASSERT_EQ(0, ret);
1243         /* Should work just fine. */
1244         EXPECT_EQ(parent, syscall(__NR_getppid));
1245         /* No ptracer */
1246         EXPECT_EQ(-1, syscall(__NR_getpid));
1247 }
1248
1249 TEST_F(precedence, log_is_fifth)
1250 {
1251         pid_t mypid, parent;
1252         long ret;
1253
1254         mypid = getpid();
1255         parent = getppid();
1256         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1257         ASSERT_EQ(0, ret);
1258
1259         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1260         ASSERT_EQ(0, ret);
1261         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1262         ASSERT_EQ(0, ret);
1263         /* Should work just fine. */
1264         EXPECT_EQ(parent, syscall(__NR_getppid));
1265         /* Should also work just fine */
1266         EXPECT_EQ(mypid, syscall(__NR_getpid));
1267 }
1268
1269 TEST_F(precedence, log_is_fifth_in_any_order)
1270 {
1271         pid_t mypid, parent;
1272         long ret;
1273
1274         mypid = getpid();
1275         parent = getppid();
1276         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1277         ASSERT_EQ(0, ret);
1278
1279         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1280         ASSERT_EQ(0, ret);
1281         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1282         ASSERT_EQ(0, ret);
1283         /* Should work just fine. */
1284         EXPECT_EQ(parent, syscall(__NR_getppid));
1285         /* Should also work just fine */
1286         EXPECT_EQ(mypid, syscall(__NR_getpid));
1287 }
1288
1289 #ifndef PTRACE_O_TRACESECCOMP
1290 #define PTRACE_O_TRACESECCOMP   0x00000080
1291 #endif
1292
1293 /* Catch the Ubuntu 12.04 value error. */
1294 #if PTRACE_EVENT_SECCOMP != 7
1295 #undef PTRACE_EVENT_SECCOMP
1296 #endif
1297
1298 #ifndef PTRACE_EVENT_SECCOMP
1299 #define PTRACE_EVENT_SECCOMP 7
1300 #endif
1301
1302 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1303 bool tracer_running;
1304 void tracer_stop(int sig)
1305 {
1306         tracer_running = false;
1307 }
1308
1309 typedef void tracer_func_t(struct __test_metadata *_metadata,
1310                            pid_t tracee, int status, void *args);
1311
1312 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1313             tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1314 {
1315         int ret = -1;
1316         struct sigaction action = {
1317                 .sa_handler = tracer_stop,
1318         };
1319
1320         /* Allow external shutdown. */
1321         tracer_running = true;
1322         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1323
1324         errno = 0;
1325         while (ret == -1 && errno != EINVAL)
1326                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1327         ASSERT_EQ(0, ret) {
1328                 kill(tracee, SIGKILL);
1329         }
1330         /* Wait for attach stop */
1331         wait(NULL);
1332
1333         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1334                                                       PTRACE_O_TRACESYSGOOD :
1335                                                       PTRACE_O_TRACESECCOMP);
1336         ASSERT_EQ(0, ret) {
1337                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1338                 kill(tracee, SIGKILL);
1339         }
1340         ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1341                      tracee, NULL, 0);
1342         ASSERT_EQ(0, ret);
1343
1344         /* Unblock the tracee */
1345         ASSERT_EQ(1, write(fd, "A", 1));
1346         ASSERT_EQ(0, close(fd));
1347
1348         /* Run until we're shut down. Must assert to stop execution. */
1349         while (tracer_running) {
1350                 int status;
1351
1352                 if (wait(&status) != tracee)
1353                         continue;
1354                 if (WIFSIGNALED(status) || WIFEXITED(status))
1355                         /* Child is dead. Time to go. */
1356                         return;
1357
1358                 /* Check if this is a seccomp event. */
1359                 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1360
1361                 tracer_func(_metadata, tracee, status, args);
1362
1363                 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1364                              tracee, NULL, 0);
1365                 ASSERT_EQ(0, ret);
1366         }
1367         /* Directly report the status of our test harness results. */
1368         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1369 }
1370
1371 /* Common tracer setup/teardown functions. */
1372 void cont_handler(int num)
1373 { }
1374 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1375                           tracer_func_t func, void *args, bool ptrace_syscall)
1376 {
1377         char sync;
1378         int pipefd[2];
1379         pid_t tracer_pid;
1380         pid_t tracee = getpid();
1381
1382         /* Setup a pipe for clean synchronization. */
1383         ASSERT_EQ(0, pipe(pipefd));
1384
1385         /* Fork a child which we'll promote to tracer */
1386         tracer_pid = fork();
1387         ASSERT_LE(0, tracer_pid);
1388         signal(SIGALRM, cont_handler);
1389         if (tracer_pid == 0) {
1390                 close(pipefd[0]);
1391                 start_tracer(_metadata, pipefd[1], tracee, func, args,
1392                              ptrace_syscall);
1393                 syscall(__NR_exit, 0);
1394         }
1395         close(pipefd[1]);
1396         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1397         read(pipefd[0], &sync, 1);
1398         close(pipefd[0]);
1399
1400         return tracer_pid;
1401 }
1402 void teardown_trace_fixture(struct __test_metadata *_metadata,
1403                             pid_t tracer)
1404 {
1405         if (tracer) {
1406                 int status;
1407                 /*
1408                  * Extract the exit code from the other process and
1409                  * adopt it for ourselves in case its asserts failed.
1410                  */
1411                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1412                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1413                 if (WEXITSTATUS(status))
1414                         _metadata->passed = 0;
1415         }
1416 }
1417
1418 /* "poke" tracer arguments and function. */
1419 struct tracer_args_poke_t {
1420         unsigned long poke_addr;
1421 };
1422
1423 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1424                  void *args)
1425 {
1426         int ret;
1427         unsigned long msg;
1428         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1429
1430         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1431         EXPECT_EQ(0, ret);
1432         /* If this fails, don't try to recover. */
1433         ASSERT_EQ(0x1001, msg) {
1434                 kill(tracee, SIGKILL);
1435         }
1436         /*
1437          * Poke in the message.
1438          * Registers are not touched to try to keep this relatively arch
1439          * agnostic.
1440          */
1441         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1442         EXPECT_EQ(0, ret);
1443 }
1444
1445 FIXTURE_DATA(TRACE_poke) {
1446         struct sock_fprog prog;
1447         pid_t tracer;
1448         long poked;
1449         struct tracer_args_poke_t tracer_args;
1450 };
1451
1452 FIXTURE_SETUP(TRACE_poke)
1453 {
1454         struct sock_filter filter[] = {
1455                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1456                         offsetof(struct seccomp_data, nr)),
1457                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1458                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1459                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1460         };
1461
1462         self->poked = 0;
1463         memset(&self->prog, 0, sizeof(self->prog));
1464         self->prog.filter = malloc(sizeof(filter));
1465         ASSERT_NE(NULL, self->prog.filter);
1466         memcpy(self->prog.filter, filter, sizeof(filter));
1467         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1468
1469         /* Set up tracer args. */
1470         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1471
1472         /* Launch tracer. */
1473         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1474                                            &self->tracer_args, false);
1475 }
1476
1477 FIXTURE_TEARDOWN(TRACE_poke)
1478 {
1479         teardown_trace_fixture(_metadata, self->tracer);
1480         if (self->prog.filter)
1481                 free(self->prog.filter);
1482 }
1483
1484 TEST_F(TRACE_poke, read_has_side_effects)
1485 {
1486         ssize_t ret;
1487
1488         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1489         ASSERT_EQ(0, ret);
1490
1491         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1492         ASSERT_EQ(0, ret);
1493
1494         EXPECT_EQ(0, self->poked);
1495         ret = read(-1, NULL, 0);
1496         EXPECT_EQ(-1, ret);
1497         EXPECT_EQ(0x1001, self->poked);
1498 }
1499
1500 TEST_F(TRACE_poke, getpid_runs_normally)
1501 {
1502         long ret;
1503
1504         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1505         ASSERT_EQ(0, ret);
1506
1507         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1508         ASSERT_EQ(0, ret);
1509
1510         EXPECT_EQ(0, self->poked);
1511         EXPECT_NE(0, syscall(__NR_getpid));
1512         EXPECT_EQ(0, self->poked);
1513 }
1514
1515 #if defined(__x86_64__)
1516 # define ARCH_REGS      struct user_regs_struct
1517 # define SYSCALL_NUM    orig_rax
1518 # define SYSCALL_RET    rax
1519 #elif defined(__i386__)
1520 # define ARCH_REGS      struct user_regs_struct
1521 # define SYSCALL_NUM    orig_eax
1522 # define SYSCALL_RET    eax
1523 #elif defined(__arm__)
1524 # define ARCH_REGS      struct pt_regs
1525 # define SYSCALL_NUM    ARM_r7
1526 # define SYSCALL_RET    ARM_r0
1527 #elif defined(__aarch64__)
1528 # define ARCH_REGS      struct user_pt_regs
1529 # define SYSCALL_NUM    regs[8]
1530 # define SYSCALL_RET    regs[0]
1531 #elif defined(__hppa__)
1532 # define ARCH_REGS      struct user_regs_struct
1533 # define SYSCALL_NUM    gr[20]
1534 # define SYSCALL_RET    gr[28]
1535 #elif defined(__powerpc__)
1536 # define ARCH_REGS      struct pt_regs
1537 # define SYSCALL_NUM    gpr[0]
1538 # define SYSCALL_RET    gpr[3]
1539 #elif defined(__s390__)
1540 # define ARCH_REGS     s390_regs
1541 # define SYSCALL_NUM   gprs[2]
1542 # define SYSCALL_RET   gprs[2]
1543 #elif defined(__mips__)
1544 # define ARCH_REGS      struct pt_regs
1545 # define SYSCALL_NUM    regs[2]
1546 # define SYSCALL_SYSCALL_NUM regs[4]
1547 # define SYSCALL_RET    regs[2]
1548 # define SYSCALL_NUM_RET_SHARE_REG
1549 #else
1550 # error "Do not know how to find your architecture's registers and syscalls"
1551 #endif
1552
1553 /* When the syscall return can't be changed, stub out the tests for it. */
1554 #ifdef SYSCALL_NUM_RET_SHARE_REG
1555 # define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(-1, action)
1556 #else
1557 # define EXPECT_SYSCALL_RETURN(val, action)             \
1558         do {                                            \
1559                 errno = 0;                              \
1560                 if (val < 0) {                          \
1561                         EXPECT_EQ(-1, action);          \
1562                         EXPECT_EQ(-(val), errno);       \
1563                 } else {                                \
1564                         EXPECT_EQ(val, action);         \
1565                 }                                       \
1566         } while (0)
1567 #endif
1568
1569 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1570  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1571  */
1572 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1573 #define HAVE_GETREGS
1574 #endif
1575
1576 /* Architecture-specific syscall fetching routine. */
1577 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1578 {
1579         ARCH_REGS regs;
1580 #ifdef HAVE_GETREGS
1581         EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1582                 TH_LOG("PTRACE_GETREGS failed");
1583                 return -1;
1584         }
1585 #else
1586         struct iovec iov;
1587
1588         iov.iov_base = &regs;
1589         iov.iov_len = sizeof(regs);
1590         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1591                 TH_LOG("PTRACE_GETREGSET failed");
1592                 return -1;
1593         }
1594 #endif
1595
1596 #if defined(__mips__)
1597         if (regs.SYSCALL_NUM == __NR_O32_Linux)
1598                 return regs.SYSCALL_SYSCALL_NUM;
1599 #endif
1600         return regs.SYSCALL_NUM;
1601 }
1602
1603 /* Architecture-specific syscall changing routine. */
1604 void change_syscall(struct __test_metadata *_metadata,
1605                     pid_t tracee, int syscall, int result)
1606 {
1607         int ret;
1608         ARCH_REGS regs;
1609 #ifdef HAVE_GETREGS
1610         ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1611 #else
1612         struct iovec iov;
1613         iov.iov_base = &regs;
1614         iov.iov_len = sizeof(regs);
1615         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1616 #endif
1617         EXPECT_EQ(0, ret) {}
1618
1619 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1620     defined(__s390__) || defined(__hppa__)
1621         {
1622                 regs.SYSCALL_NUM = syscall;
1623         }
1624 #elif defined(__mips__)
1625         {
1626                 if (regs.SYSCALL_NUM == __NR_O32_Linux)
1627                         regs.SYSCALL_SYSCALL_NUM = syscall;
1628                 else
1629                         regs.SYSCALL_NUM = syscall;
1630         }
1631
1632 #elif defined(__arm__)
1633 # ifndef PTRACE_SET_SYSCALL
1634 #  define PTRACE_SET_SYSCALL   23
1635 # endif
1636         {
1637                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1638                 EXPECT_EQ(0, ret);
1639         }
1640
1641 #elif defined(__aarch64__)
1642 # ifndef NT_ARM_SYSTEM_CALL
1643 #  define NT_ARM_SYSTEM_CALL 0x404
1644 # endif
1645         {
1646                 iov.iov_base = &syscall;
1647                 iov.iov_len = sizeof(syscall);
1648                 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1649                              &iov);
1650                 EXPECT_EQ(0, ret);
1651         }
1652
1653 #else
1654         ASSERT_EQ(1, 0) {
1655                 TH_LOG("How is the syscall changed on this architecture?");
1656         }
1657 #endif
1658
1659         /* If syscall is skipped, change return value. */
1660         if (syscall == -1)
1661 #ifdef SYSCALL_NUM_RET_SHARE_REG
1662                 TH_LOG("Can't modify syscall return on this architecture");
1663 #else
1664                 regs.SYSCALL_RET = result;
1665 #endif
1666
1667 #ifdef HAVE_GETREGS
1668         ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1669 #else
1670         iov.iov_base = &regs;
1671         iov.iov_len = sizeof(regs);
1672         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1673 #endif
1674         EXPECT_EQ(0, ret);
1675 }
1676
1677 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1678                     int status, void *args)
1679 {
1680         int ret;
1681         unsigned long msg;
1682
1683         /* Make sure we got the right message. */
1684         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1685         EXPECT_EQ(0, ret);
1686
1687         /* Validate and take action on expected syscalls. */
1688         switch (msg) {
1689         case 0x1002:
1690                 /* change getpid to getppid. */
1691                 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1692                 change_syscall(_metadata, tracee, __NR_getppid, 0);
1693                 break;
1694         case 0x1003:
1695                 /* skip gettid with valid return code. */
1696                 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1697                 change_syscall(_metadata, tracee, -1, 45000);
1698                 break;
1699         case 0x1004:
1700                 /* skip openat with error. */
1701                 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1702                 change_syscall(_metadata, tracee, -1, -ESRCH);
1703                 break;
1704         case 0x1005:
1705                 /* do nothing (allow getppid) */
1706                 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1707                 break;
1708         default:
1709                 EXPECT_EQ(0, msg) {
1710                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1711                         kill(tracee, SIGKILL);
1712                 }
1713         }
1714
1715 }
1716
1717 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1718                    int status, void *args)
1719 {
1720         int ret, nr;
1721         unsigned long msg;
1722         static bool entry;
1723
1724         /* Make sure we got an empty message. */
1725         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1726         EXPECT_EQ(0, ret);
1727         EXPECT_EQ(0, msg);
1728
1729         /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
1730         entry = !entry;
1731         if (!entry)
1732                 return;
1733
1734         nr = get_syscall(_metadata, tracee);
1735
1736         if (nr == __NR_getpid)
1737                 change_syscall(_metadata, tracee, __NR_getppid, 0);
1738         if (nr == __NR_gettid)
1739                 change_syscall(_metadata, tracee, -1, 45000);
1740         if (nr == __NR_openat)
1741                 change_syscall(_metadata, tracee, -1, -ESRCH);
1742 }
1743
1744 FIXTURE_DATA(TRACE_syscall) {
1745         struct sock_fprog prog;
1746         pid_t tracer, mytid, mypid, parent;
1747 };
1748
1749 FIXTURE_SETUP(TRACE_syscall)
1750 {
1751         struct sock_filter filter[] = {
1752                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1753                         offsetof(struct seccomp_data, nr)),
1754                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1755                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1756                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1757                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1758                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1759                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1760                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1761                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1762                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1763         };
1764
1765         memset(&self->prog, 0, sizeof(self->prog));
1766         self->prog.filter = malloc(sizeof(filter));
1767         ASSERT_NE(NULL, self->prog.filter);
1768         memcpy(self->prog.filter, filter, sizeof(filter));
1769         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1770
1771         /* Prepare some testable syscall results. */
1772         self->mytid = syscall(__NR_gettid);
1773         ASSERT_GT(self->mytid, 0);
1774         ASSERT_NE(self->mytid, 1) {
1775                 TH_LOG("Running this test as init is not supported. :)");
1776         }
1777
1778         self->mypid = getpid();
1779         ASSERT_GT(self->mypid, 0);
1780         ASSERT_EQ(self->mytid, self->mypid);
1781
1782         self->parent = getppid();
1783         ASSERT_GT(self->parent, 0);
1784         ASSERT_NE(self->parent, self->mypid);
1785
1786         /* Launch tracer. */
1787         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1788                                            false);
1789 }
1790
1791 FIXTURE_TEARDOWN(TRACE_syscall)
1792 {
1793         teardown_trace_fixture(_metadata, self->tracer);
1794         if (self->prog.filter)
1795                 free(self->prog.filter);
1796 }
1797
1798 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1799 {
1800         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1801         teardown_trace_fixture(_metadata, self->tracer);
1802         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1803                                            true);
1804
1805         /* Tracer will redirect getpid to getppid. */
1806         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1807 }
1808
1809 TEST_F(TRACE_syscall, ptrace_syscall_errno)
1810 {
1811         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1812         teardown_trace_fixture(_metadata, self->tracer);
1813         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1814                                            true);
1815
1816         /* Tracer should skip the open syscall, resulting in ESRCH. */
1817         EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1818 }
1819
1820 TEST_F(TRACE_syscall, ptrace_syscall_faked)
1821 {
1822         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1823         teardown_trace_fixture(_metadata, self->tracer);
1824         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1825                                            true);
1826
1827         /* Tracer should skip the gettid syscall, resulting fake pid. */
1828         EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1829 }
1830
1831 TEST_F(TRACE_syscall, syscall_allowed)
1832 {
1833         long ret;
1834
1835         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1836         ASSERT_EQ(0, ret);
1837
1838         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1839         ASSERT_EQ(0, ret);
1840
1841         /* getppid works as expected (no changes). */
1842         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1843         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1844 }
1845
1846 TEST_F(TRACE_syscall, syscall_redirected)
1847 {
1848         long ret;
1849
1850         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1851         ASSERT_EQ(0, ret);
1852
1853         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1854         ASSERT_EQ(0, ret);
1855
1856         /* getpid has been redirected to getppid as expected. */
1857         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1858         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1859 }
1860
1861 TEST_F(TRACE_syscall, syscall_errno)
1862 {
1863         long ret;
1864
1865         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1866         ASSERT_EQ(0, ret);
1867
1868         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1869         ASSERT_EQ(0, ret);
1870
1871         /* openat has been skipped and an errno return. */
1872         EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1873 }
1874
1875 TEST_F(TRACE_syscall, syscall_faked)
1876 {
1877         long ret;
1878
1879         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1880         ASSERT_EQ(0, ret);
1881
1882         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1883         ASSERT_EQ(0, ret);
1884
1885         /* gettid has been skipped and an altered return value stored. */
1886         EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1887 }
1888
1889 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1890 {
1891         struct sock_filter filter[] = {
1892                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1893                         offsetof(struct seccomp_data, nr)),
1894                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1895                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1896                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1897         };
1898         struct sock_fprog prog = {
1899                 .len = (unsigned short)ARRAY_SIZE(filter),
1900                 .filter = filter,
1901         };
1902         long ret;
1903
1904         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1905         ASSERT_EQ(0, ret);
1906
1907         /* Install fixture filter. */
1908         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1909         ASSERT_EQ(0, ret);
1910
1911         /* Install "errno on getppid" filter. */
1912         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1913         ASSERT_EQ(0, ret);
1914
1915         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1916         errno = 0;
1917         EXPECT_EQ(-1, syscall(__NR_getpid));
1918         EXPECT_EQ(EPERM, errno);
1919 }
1920
1921 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1922 {
1923         struct sock_filter filter[] = {
1924                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1925                         offsetof(struct seccomp_data, nr)),
1926                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1927                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1928                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1929         };
1930         struct sock_fprog prog = {
1931                 .len = (unsigned short)ARRAY_SIZE(filter),
1932                 .filter = filter,
1933         };
1934         long ret;
1935
1936         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1937         ASSERT_EQ(0, ret);
1938
1939         /* Install fixture filter. */
1940         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1941         ASSERT_EQ(0, ret);
1942
1943         /* Install "death on getppid" filter. */
1944         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1945         ASSERT_EQ(0, ret);
1946
1947         /* Tracer will redirect getpid to getppid, and we should die. */
1948         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1949 }
1950
1951 TEST_F(TRACE_syscall, skip_after_ptrace)
1952 {
1953         struct sock_filter filter[] = {
1954                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1955                         offsetof(struct seccomp_data, nr)),
1956                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1957                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1958                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1959         };
1960         struct sock_fprog prog = {
1961                 .len = (unsigned short)ARRAY_SIZE(filter),
1962                 .filter = filter,
1963         };
1964         long ret;
1965
1966         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1967         teardown_trace_fixture(_metadata, self->tracer);
1968         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1969                                            true);
1970
1971         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1972         ASSERT_EQ(0, ret);
1973
1974         /* Install "errno on getppid" filter. */
1975         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1976         ASSERT_EQ(0, ret);
1977
1978         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1979         EXPECT_EQ(-1, syscall(__NR_getpid));
1980         EXPECT_EQ(EPERM, errno);
1981 }
1982
1983 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
1984 {
1985         struct sock_filter filter[] = {
1986                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1987                         offsetof(struct seccomp_data, nr)),
1988                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1989                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1990                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1991         };
1992         struct sock_fprog prog = {
1993                 .len = (unsigned short)ARRAY_SIZE(filter),
1994                 .filter = filter,
1995         };
1996         long ret;
1997
1998         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1999         teardown_trace_fixture(_metadata, self->tracer);
2000         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2001                                            true);
2002
2003         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2004         ASSERT_EQ(0, ret);
2005
2006         /* Install "death on getppid" filter. */
2007         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2008         ASSERT_EQ(0, ret);
2009
2010         /* Tracer will redirect getpid to getppid, and we should die. */
2011         EXPECT_NE(self->mypid, syscall(__NR_getpid));
2012 }
2013
2014 TEST(seccomp_syscall)
2015 {
2016         struct sock_filter filter[] = {
2017                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2018         };
2019         struct sock_fprog prog = {
2020                 .len = (unsigned short)ARRAY_SIZE(filter),
2021                 .filter = filter,
2022         };
2023         long ret;
2024
2025         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2026         ASSERT_EQ(0, ret) {
2027                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2028         }
2029
2030         /* Reject insane operation. */
2031         ret = seccomp(-1, 0, &prog);
2032         ASSERT_NE(ENOSYS, errno) {
2033                 TH_LOG("Kernel does not support seccomp syscall!");
2034         }
2035         EXPECT_EQ(EINVAL, errno) {
2036                 TH_LOG("Did not reject crazy op value!");
2037         }
2038
2039         /* Reject strict with flags or pointer. */
2040         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2041         EXPECT_EQ(EINVAL, errno) {
2042                 TH_LOG("Did not reject mode strict with flags!");
2043         }
2044         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2045         EXPECT_EQ(EINVAL, errno) {
2046                 TH_LOG("Did not reject mode strict with uargs!");
2047         }
2048
2049         /* Reject insane args for filter. */
2050         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2051         EXPECT_EQ(EINVAL, errno) {
2052                 TH_LOG("Did not reject crazy filter flags!");
2053         }
2054         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2055         EXPECT_EQ(EFAULT, errno) {
2056                 TH_LOG("Did not reject NULL filter!");
2057         }
2058
2059         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2060         EXPECT_EQ(0, errno) {
2061                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2062                         strerror(errno));
2063         }
2064 }
2065
2066 TEST(seccomp_syscall_mode_lock)
2067 {
2068         struct sock_filter filter[] = {
2069                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2070         };
2071         struct sock_fprog prog = {
2072                 .len = (unsigned short)ARRAY_SIZE(filter),
2073                 .filter = filter,
2074         };
2075         long ret;
2076
2077         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2078         ASSERT_EQ(0, ret) {
2079                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2080         }
2081
2082         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2083         ASSERT_NE(ENOSYS, errno) {
2084                 TH_LOG("Kernel does not support seccomp syscall!");
2085         }
2086         EXPECT_EQ(0, ret) {
2087                 TH_LOG("Could not install filter!");
2088         }
2089
2090         /* Make sure neither entry point will switch to strict. */
2091         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2092         EXPECT_EQ(EINVAL, errno) {
2093                 TH_LOG("Switched to mode strict!");
2094         }
2095
2096         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2097         EXPECT_EQ(EINVAL, errno) {
2098                 TH_LOG("Switched to mode strict!");
2099         }
2100 }
2101
2102 /*
2103  * Test detection of known and unknown filter flags. Userspace needs to be able
2104  * to check if a filter flag is supported by the current kernel and a good way
2105  * of doing that is by attempting to enter filter mode, with the flag bit in
2106  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2107  * that the flag is valid and EINVAL indicates that the flag is invalid.
2108  */
2109 TEST(detect_seccomp_filter_flags)
2110 {
2111         unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2112                                  SECCOMP_FILTER_FLAG_LOG,
2113                                  SECCOMP_FILTER_FLAG_SPEC_ALLOW };
2114         unsigned int flag, all_flags;
2115         int i;
2116         long ret;
2117
2118         /* Test detection of known-good filter flags */
2119         for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2120                 int bits = 0;
2121
2122                 flag = flags[i];
2123                 /* Make sure the flag is a single bit! */
2124                 while (flag) {
2125                         if (flag & 0x1)
2126                                 bits ++;
2127                         flag >>= 1;
2128                 }
2129                 ASSERT_EQ(1, bits);
2130                 flag = flags[i];
2131
2132                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2133                 ASSERT_NE(ENOSYS, errno) {
2134                         TH_LOG("Kernel does not support seccomp syscall!");
2135                 }
2136                 EXPECT_EQ(-1, ret);
2137                 EXPECT_EQ(EFAULT, errno) {
2138                         TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2139                                flag);
2140                 }
2141
2142                 all_flags |= flag;
2143         }
2144
2145         /* Test detection of all known-good filter flags */
2146         ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
2147         EXPECT_EQ(-1, ret);
2148         EXPECT_EQ(EFAULT, errno) {
2149                 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2150                        all_flags);
2151         }
2152
2153         /* Test detection of an unknown filter flag */
2154         flag = -1;
2155         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2156         EXPECT_EQ(-1, ret);
2157         EXPECT_EQ(EINVAL, errno) {
2158                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2159                        flag);
2160         }
2161
2162         /*
2163          * Test detection of an unknown filter flag that may simply need to be
2164          * added to this test
2165          */
2166         flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2167         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2168         EXPECT_EQ(-1, ret);
2169         EXPECT_EQ(EINVAL, errno) {
2170                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2171                        flag);
2172         }
2173 }
2174
2175 TEST(TSYNC_first)
2176 {
2177         struct sock_filter filter[] = {
2178                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2179         };
2180         struct sock_fprog prog = {
2181                 .len = (unsigned short)ARRAY_SIZE(filter),
2182                 .filter = filter,
2183         };
2184         long ret;
2185
2186         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2187         ASSERT_EQ(0, ret) {
2188                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2189         }
2190
2191         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2192                       &prog);
2193         ASSERT_NE(ENOSYS, errno) {
2194                 TH_LOG("Kernel does not support seccomp syscall!");
2195         }
2196         EXPECT_EQ(0, ret) {
2197                 TH_LOG("Could not install initial filter with TSYNC!");
2198         }
2199 }
2200
2201 #define TSYNC_SIBLINGS 2
2202 struct tsync_sibling {
2203         pthread_t tid;
2204         pid_t system_tid;
2205         sem_t *started;
2206         pthread_cond_t *cond;
2207         pthread_mutex_t *mutex;
2208         int diverge;
2209         int num_waits;
2210         struct sock_fprog *prog;
2211         struct __test_metadata *metadata;
2212 };
2213
2214 /*
2215  * To avoid joining joined threads (which is not allowed by Bionic),
2216  * make sure we both successfully join and clear the tid to skip a
2217  * later join attempt during fixture teardown. Any remaining threads
2218  * will be directly killed during teardown.
2219  */
2220 #define PTHREAD_JOIN(tid, status)                                       \
2221         do {                                                            \
2222                 int _rc = pthread_join(tid, status);                    \
2223                 if (_rc) {                                              \
2224                         TH_LOG("pthread_join of tid %u failed: %d\n",   \
2225                                 (unsigned int)tid, _rc);                \
2226                 } else {                                                \
2227                         tid = 0;                                        \
2228                 }                                                       \
2229         } while (0)
2230
2231 FIXTURE_DATA(TSYNC) {
2232         struct sock_fprog root_prog, apply_prog;
2233         struct tsync_sibling sibling[TSYNC_SIBLINGS];
2234         sem_t started;
2235         pthread_cond_t cond;
2236         pthread_mutex_t mutex;
2237         int sibling_count;
2238 };
2239
2240 FIXTURE_SETUP(TSYNC)
2241 {
2242         struct sock_filter root_filter[] = {
2243                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2244         };
2245         struct sock_filter apply_filter[] = {
2246                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2247                         offsetof(struct seccomp_data, nr)),
2248                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2249                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2250                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2251         };
2252
2253         memset(&self->root_prog, 0, sizeof(self->root_prog));
2254         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2255         memset(&self->sibling, 0, sizeof(self->sibling));
2256         self->root_prog.filter = malloc(sizeof(root_filter));
2257         ASSERT_NE(NULL, self->root_prog.filter);
2258         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2259         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2260
2261         self->apply_prog.filter = malloc(sizeof(apply_filter));
2262         ASSERT_NE(NULL, self->apply_prog.filter);
2263         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2264         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2265
2266         self->sibling_count = 0;
2267         pthread_mutex_init(&self->mutex, NULL);
2268         pthread_cond_init(&self->cond, NULL);
2269         sem_init(&self->started, 0, 0);
2270         self->sibling[0].tid = 0;
2271         self->sibling[0].cond = &self->cond;
2272         self->sibling[0].started = &self->started;
2273         self->sibling[0].mutex = &self->mutex;
2274         self->sibling[0].diverge = 0;
2275         self->sibling[0].num_waits = 1;
2276         self->sibling[0].prog = &self->root_prog;
2277         self->sibling[0].metadata = _metadata;
2278         self->sibling[1].tid = 0;
2279         self->sibling[1].cond = &self->cond;
2280         self->sibling[1].started = &self->started;
2281         self->sibling[1].mutex = &self->mutex;
2282         self->sibling[1].diverge = 0;
2283         self->sibling[1].prog = &self->root_prog;
2284         self->sibling[1].num_waits = 1;
2285         self->sibling[1].metadata = _metadata;
2286 }
2287
2288 FIXTURE_TEARDOWN(TSYNC)
2289 {
2290         int sib = 0;
2291
2292         if (self->root_prog.filter)
2293                 free(self->root_prog.filter);
2294         if (self->apply_prog.filter)
2295                 free(self->apply_prog.filter);
2296
2297         for ( ; sib < self->sibling_count; ++sib) {
2298                 struct tsync_sibling *s = &self->sibling[sib];
2299
2300                 if (!s->tid)
2301                         continue;
2302                 /*
2303                  * If a thread is still running, it may be stuck, so hit
2304                  * it over the head really hard.
2305                  */
2306                 pthread_kill(s->tid, 9);
2307         }
2308         pthread_mutex_destroy(&self->mutex);
2309         pthread_cond_destroy(&self->cond);
2310         sem_destroy(&self->started);
2311 }
2312
2313 void *tsync_sibling(void *data)
2314 {
2315         long ret = 0;
2316         struct tsync_sibling *me = data;
2317
2318         me->system_tid = syscall(__NR_gettid);
2319
2320         pthread_mutex_lock(me->mutex);
2321         if (me->diverge) {
2322                 /* Just re-apply the root prog to fork the tree */
2323                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2324                                 me->prog, 0, 0);
2325         }
2326         sem_post(me->started);
2327         /* Return outside of started so parent notices failures. */
2328         if (ret) {
2329                 pthread_mutex_unlock(me->mutex);
2330                 return (void *)SIBLING_EXIT_FAILURE;
2331         }
2332         do {
2333                 pthread_cond_wait(me->cond, me->mutex);
2334                 me->num_waits = me->num_waits - 1;
2335         } while (me->num_waits);
2336         pthread_mutex_unlock(me->mutex);
2337
2338         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2339         if (!ret)
2340                 return (void *)SIBLING_EXIT_NEWPRIVS;
2341         read(0, NULL, 0);
2342         return (void *)SIBLING_EXIT_UNKILLED;
2343 }
2344
2345 void tsync_start_sibling(struct tsync_sibling *sibling)
2346 {
2347         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2348 }
2349
2350 TEST_F(TSYNC, siblings_fail_prctl)
2351 {
2352         long ret;
2353         void *status;
2354         struct sock_filter filter[] = {
2355                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2356                         offsetof(struct seccomp_data, nr)),
2357                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2358                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2359                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2360         };
2361         struct sock_fprog prog = {
2362                 .len = (unsigned short)ARRAY_SIZE(filter),
2363                 .filter = filter,
2364         };
2365
2366         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2367                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2368         }
2369
2370         /* Check prctl failure detection by requesting sib 0 diverge. */
2371         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2372         ASSERT_NE(ENOSYS, errno) {
2373                 TH_LOG("Kernel does not support seccomp syscall!");
2374         }
2375         ASSERT_EQ(0, ret) {
2376                 TH_LOG("setting filter failed");
2377         }
2378
2379         self->sibling[0].diverge = 1;
2380         tsync_start_sibling(&self->sibling[0]);
2381         tsync_start_sibling(&self->sibling[1]);
2382
2383         while (self->sibling_count < TSYNC_SIBLINGS) {
2384                 sem_wait(&self->started);
2385                 self->sibling_count++;
2386         }
2387
2388         /* Signal the threads to clean up*/
2389         pthread_mutex_lock(&self->mutex);
2390         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2391                 TH_LOG("cond broadcast non-zero");
2392         }
2393         pthread_mutex_unlock(&self->mutex);
2394
2395         /* Ensure diverging sibling failed to call prctl. */
2396         PTHREAD_JOIN(self->sibling[0].tid, &status);
2397         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2398         PTHREAD_JOIN(self->sibling[1].tid, &status);
2399         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2400 }
2401
2402 TEST_F(TSYNC, two_siblings_with_ancestor)
2403 {
2404         long ret;
2405         void *status;
2406
2407         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2408                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2409         }
2410
2411         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2412         ASSERT_NE(ENOSYS, errno) {
2413                 TH_LOG("Kernel does not support seccomp syscall!");
2414         }
2415         ASSERT_EQ(0, ret) {
2416                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2417         }
2418         tsync_start_sibling(&self->sibling[0]);
2419         tsync_start_sibling(&self->sibling[1]);
2420
2421         while (self->sibling_count < TSYNC_SIBLINGS) {
2422                 sem_wait(&self->started);
2423                 self->sibling_count++;
2424         }
2425
2426         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2427                       &self->apply_prog);
2428         ASSERT_EQ(0, ret) {
2429                 TH_LOG("Could install filter on all threads!");
2430         }
2431         /* Tell the siblings to test the policy */
2432         pthread_mutex_lock(&self->mutex);
2433         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2434                 TH_LOG("cond broadcast non-zero");
2435         }
2436         pthread_mutex_unlock(&self->mutex);
2437         /* Ensure they are both killed and don't exit cleanly. */
2438         PTHREAD_JOIN(self->sibling[0].tid, &status);
2439         EXPECT_EQ(0x0, (long)status);
2440         PTHREAD_JOIN(self->sibling[1].tid, &status);
2441         EXPECT_EQ(0x0, (long)status);
2442 }
2443
2444 TEST_F(TSYNC, two_sibling_want_nnp)
2445 {
2446         void *status;
2447
2448         /* start siblings before any prctl() operations */
2449         tsync_start_sibling(&self->sibling[0]);
2450         tsync_start_sibling(&self->sibling[1]);
2451         while (self->sibling_count < TSYNC_SIBLINGS) {
2452                 sem_wait(&self->started);
2453                 self->sibling_count++;
2454         }
2455
2456         /* Tell the siblings to test no policy */
2457         pthread_mutex_lock(&self->mutex);
2458         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2459                 TH_LOG("cond broadcast non-zero");
2460         }
2461         pthread_mutex_unlock(&self->mutex);
2462
2463         /* Ensure they are both upset about lacking nnp. */
2464         PTHREAD_JOIN(self->sibling[0].tid, &status);
2465         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2466         PTHREAD_JOIN(self->sibling[1].tid, &status);
2467         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2468 }
2469
2470 TEST_F(TSYNC, two_siblings_with_no_filter)
2471 {
2472         long ret;
2473         void *status;
2474
2475         /* start siblings before any prctl() operations */
2476         tsync_start_sibling(&self->sibling[0]);
2477         tsync_start_sibling(&self->sibling[1]);
2478         while (self->sibling_count < TSYNC_SIBLINGS) {
2479                 sem_wait(&self->started);
2480                 self->sibling_count++;
2481         }
2482
2483         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2484                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2485         }
2486
2487         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2488                       &self->apply_prog);
2489         ASSERT_NE(ENOSYS, errno) {
2490                 TH_LOG("Kernel does not support seccomp syscall!");
2491         }
2492         ASSERT_EQ(0, ret) {
2493                 TH_LOG("Could install filter on all threads!");
2494         }
2495
2496         /* Tell the siblings to test the policy */
2497         pthread_mutex_lock(&self->mutex);
2498         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2499                 TH_LOG("cond broadcast non-zero");
2500         }
2501         pthread_mutex_unlock(&self->mutex);
2502
2503         /* Ensure they are both killed and don't exit cleanly. */
2504         PTHREAD_JOIN(self->sibling[0].tid, &status);
2505         EXPECT_EQ(0x0, (long)status);
2506         PTHREAD_JOIN(self->sibling[1].tid, &status);
2507         EXPECT_EQ(0x0, (long)status);
2508 }
2509
2510 TEST_F(TSYNC, two_siblings_with_one_divergence)
2511 {
2512         long ret;
2513         void *status;
2514
2515         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2516                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2517         }
2518
2519         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2520         ASSERT_NE(ENOSYS, errno) {
2521                 TH_LOG("Kernel does not support seccomp syscall!");
2522         }
2523         ASSERT_EQ(0, ret) {
2524                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2525         }
2526         self->sibling[0].diverge = 1;
2527         tsync_start_sibling(&self->sibling[0]);
2528         tsync_start_sibling(&self->sibling[1]);
2529
2530         while (self->sibling_count < TSYNC_SIBLINGS) {
2531                 sem_wait(&self->started);
2532                 self->sibling_count++;
2533         }
2534
2535         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2536                       &self->apply_prog);
2537         ASSERT_EQ(self->sibling[0].system_tid, ret) {
2538                 TH_LOG("Did not fail on diverged sibling.");
2539         }
2540
2541         /* Wake the threads */
2542         pthread_mutex_lock(&self->mutex);
2543         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2544                 TH_LOG("cond broadcast non-zero");
2545         }
2546         pthread_mutex_unlock(&self->mutex);
2547
2548         /* Ensure they are both unkilled. */
2549         PTHREAD_JOIN(self->sibling[0].tid, &status);
2550         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2551         PTHREAD_JOIN(self->sibling[1].tid, &status);
2552         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2553 }
2554
2555 TEST_F(TSYNC, two_siblings_not_under_filter)
2556 {
2557         long ret, sib;
2558         void *status;
2559
2560         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2561                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2562         }
2563
2564         /*
2565          * Sibling 0 will have its own seccomp policy
2566          * and Sibling 1 will not be under seccomp at
2567          * all. Sibling 1 will enter seccomp and 0
2568          * will cause failure.
2569          */
2570         self->sibling[0].diverge = 1;
2571         tsync_start_sibling(&self->sibling[0]);
2572         tsync_start_sibling(&self->sibling[1]);
2573
2574         while (self->sibling_count < TSYNC_SIBLINGS) {
2575                 sem_wait(&self->started);
2576                 self->sibling_count++;
2577         }
2578
2579         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2580         ASSERT_NE(ENOSYS, errno) {
2581                 TH_LOG("Kernel does not support seccomp syscall!");
2582         }
2583         ASSERT_EQ(0, ret) {
2584                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2585         }
2586
2587         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2588                       &self->apply_prog);
2589         ASSERT_EQ(ret, self->sibling[0].system_tid) {
2590                 TH_LOG("Did not fail on diverged sibling.");
2591         }
2592         sib = 1;
2593         if (ret == self->sibling[0].system_tid)
2594                 sib = 0;
2595
2596         pthread_mutex_lock(&self->mutex);
2597
2598         /* Increment the other siblings num_waits so we can clean up
2599          * the one we just saw.
2600          */
2601         self->sibling[!sib].num_waits += 1;
2602
2603         /* Signal the thread to clean up*/
2604         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2605                 TH_LOG("cond broadcast non-zero");
2606         }
2607         pthread_mutex_unlock(&self->mutex);
2608         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2609         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2610         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2611         while (!kill(self->sibling[sib].system_tid, 0))
2612                 sleep(0.1);
2613         /* Switch to the remaining sibling */
2614         sib = !sib;
2615
2616         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2617                       &self->apply_prog);
2618         ASSERT_EQ(0, ret) {
2619                 TH_LOG("Expected the remaining sibling to sync");
2620         };
2621
2622         pthread_mutex_lock(&self->mutex);
2623
2624         /* If remaining sibling didn't have a chance to wake up during
2625          * the first broadcast, manually reduce the num_waits now.
2626          */
2627         if (self->sibling[sib].num_waits > 1)
2628                 self->sibling[sib].num_waits = 1;
2629         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2630                 TH_LOG("cond broadcast non-zero");
2631         }
2632         pthread_mutex_unlock(&self->mutex);
2633         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2634         EXPECT_EQ(0, (long)status);
2635         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2636         while (!kill(self->sibling[sib].system_tid, 0))
2637                 sleep(0.1);
2638
2639         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2640                       &self->apply_prog);
2641         ASSERT_EQ(0, ret);  /* just us chickens */
2642 }
2643
2644 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2645 TEST(syscall_restart)
2646 {
2647         long ret;
2648         unsigned long msg;
2649         pid_t child_pid;
2650         int pipefd[2];
2651         int status;
2652         siginfo_t info = { };
2653         struct sock_filter filter[] = {
2654                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2655                          offsetof(struct seccomp_data, nr)),
2656
2657 #ifdef __NR_sigreturn
2658                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2659 #endif
2660                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2661                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2662                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2663                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2664                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2665
2666                 /* Allow __NR_write for easy logging. */
2667                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2668                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2669                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2670                 /* The nanosleep jump target. */
2671                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2672                 /* The restart_syscall jump target. */
2673                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2674         };
2675         struct sock_fprog prog = {
2676                 .len = (unsigned short)ARRAY_SIZE(filter),
2677                 .filter = filter,
2678         };
2679 #if defined(__arm__)
2680         struct utsname utsbuf;
2681 #endif
2682
2683         ASSERT_EQ(0, pipe(pipefd));
2684
2685         child_pid = fork();
2686         ASSERT_LE(0, child_pid);
2687         if (child_pid == 0) {
2688                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2689                 char buf = ' ';
2690                 struct timespec timeout = { };
2691
2692                 /* Attach parent as tracer and stop. */
2693                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2694                 EXPECT_EQ(0, raise(SIGSTOP));
2695
2696                 EXPECT_EQ(0, close(pipefd[1]));
2697
2698                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2699                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2700                 }
2701
2702                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2703                 EXPECT_EQ(0, ret) {
2704                         TH_LOG("Failed to install filter!");
2705                 }
2706
2707                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2708                         TH_LOG("Failed to read() sync from parent");
2709                 }
2710                 EXPECT_EQ('.', buf) {
2711                         TH_LOG("Failed to get sync data from read()");
2712                 }
2713
2714                 /* Start nanosleep to be interrupted. */
2715                 timeout.tv_sec = 1;
2716                 errno = 0;
2717                 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2718                         TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2719                 }
2720
2721                 /* Read final sync from parent. */
2722                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2723                         TH_LOG("Failed final read() from parent");
2724                 }
2725                 EXPECT_EQ('!', buf) {
2726                         TH_LOG("Failed to get final data from read()");
2727                 }
2728
2729                 /* Directly report the status of our test harness results. */
2730                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2731                                                      : EXIT_FAILURE);
2732         }
2733         EXPECT_EQ(0, close(pipefd[0]));
2734
2735         /* Attach to child, setup options, and release. */
2736         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2737         ASSERT_EQ(true, WIFSTOPPED(status));
2738         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2739                             PTRACE_O_TRACESECCOMP));
2740         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2741         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2742
2743         /* Wait for nanosleep() to start. */
2744         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2745         ASSERT_EQ(true, WIFSTOPPED(status));
2746         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2747         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2748         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2749         ASSERT_EQ(0x100, msg);
2750         EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2751
2752         /* Might as well check siginfo for sanity while we're here. */
2753         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2754         ASSERT_EQ(SIGTRAP, info.si_signo);
2755         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2756         EXPECT_EQ(0, info.si_errno);
2757         EXPECT_EQ(getuid(), info.si_uid);
2758         /* Verify signal delivery came from child (seccomp-triggered). */
2759         EXPECT_EQ(child_pid, info.si_pid);
2760
2761         /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2762         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2763         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2764         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2765         ASSERT_EQ(true, WIFSTOPPED(status));
2766         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2767         /* Verify signal delivery came from parent now. */
2768         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2769         EXPECT_EQ(getpid(), info.si_pid);
2770
2771         /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2772         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2773         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2774         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2775         ASSERT_EQ(true, WIFSTOPPED(status));
2776         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2777         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2778
2779         /* Wait for restart_syscall() to start. */
2780         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2781         ASSERT_EQ(true, WIFSTOPPED(status));
2782         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2783         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2784         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2785
2786         ASSERT_EQ(0x200, msg);
2787         ret = get_syscall(_metadata, child_pid);
2788 #if defined(__arm__)
2789         /*
2790          * FIXME:
2791          * - native ARM registers do NOT expose true syscall.
2792          * - compat ARM registers on ARM64 DO expose true syscall.
2793          */
2794         ASSERT_EQ(0, uname(&utsbuf));
2795         if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2796                 EXPECT_EQ(__NR_nanosleep, ret);
2797         } else
2798 #endif
2799         {
2800                 EXPECT_EQ(__NR_restart_syscall, ret);
2801         }
2802
2803         /* Write again to end test. */
2804         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2805         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2806         EXPECT_EQ(0, close(pipefd[1]));
2807
2808         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2809         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2810                 _metadata->passed = 0;
2811 }
2812
2813 TEST_SIGNAL(filter_flag_log, SIGSYS)
2814 {
2815         struct sock_filter allow_filter[] = {
2816                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2817         };
2818         struct sock_filter kill_filter[] = {
2819                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2820                         offsetof(struct seccomp_data, nr)),
2821                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2822                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2823                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2824         };
2825         struct sock_fprog allow_prog = {
2826                 .len = (unsigned short)ARRAY_SIZE(allow_filter),
2827                 .filter = allow_filter,
2828         };
2829         struct sock_fprog kill_prog = {
2830                 .len = (unsigned short)ARRAY_SIZE(kill_filter),
2831                 .filter = kill_filter,
2832         };
2833         long ret;
2834         pid_t parent = getppid();
2835
2836         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2837         ASSERT_EQ(0, ret);
2838
2839         /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2840         ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2841                       &allow_prog);
2842         ASSERT_NE(ENOSYS, errno) {
2843                 TH_LOG("Kernel does not support seccomp syscall!");
2844         }
2845         EXPECT_NE(0, ret) {
2846                 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2847         }
2848         EXPECT_EQ(EINVAL, errno) {
2849                 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2850         }
2851
2852         /* Verify that a simple, permissive filter can be added with no flags */
2853         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2854         EXPECT_EQ(0, ret);
2855
2856         /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2857         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2858                       &allow_prog);
2859         ASSERT_NE(EINVAL, errno) {
2860                 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2861         }
2862         EXPECT_EQ(0, ret);
2863
2864         /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2865         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2866                       &kill_prog);
2867         EXPECT_EQ(0, ret);
2868
2869         EXPECT_EQ(parent, syscall(__NR_getppid));
2870         /* getpid() should never return. */
2871         EXPECT_EQ(0, syscall(__NR_getpid));
2872 }
2873
2874 TEST(get_action_avail)
2875 {
2876         __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
2877                             SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
2878                             SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
2879         __u32 unknown_action = 0x10000000U;
2880         int i;
2881         long ret;
2882
2883         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
2884         ASSERT_NE(ENOSYS, errno) {
2885                 TH_LOG("Kernel does not support seccomp syscall!");
2886         }
2887         ASSERT_NE(EINVAL, errno) {
2888                 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
2889         }
2890         EXPECT_EQ(ret, 0);
2891
2892         for (i = 0; i < ARRAY_SIZE(actions); i++) {
2893                 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
2894                 EXPECT_EQ(ret, 0) {
2895                         TH_LOG("Expected action (0x%X) not available!",
2896                                actions[i]);
2897                 }
2898         }
2899
2900         /* Check that an unknown action is handled properly (EOPNOTSUPP) */
2901         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
2902         EXPECT_EQ(ret, -1);
2903         EXPECT_EQ(errno, EOPNOTSUPP);
2904 }
2905
2906 /*
2907  * TODO:
2908  * - add microbenchmarks
2909  * - expand NNP testing
2910  * - better arch-specific TRACE and TRAP handlers.
2911  * - endianness checking when appropriate
2912  * - 64-bit arg prodding
2913  * - arch value testing (x86 modes especially)
2914  * - verify that FILTER_FLAG_LOG filters generate log messages
2915  * - verify that RET_LOG generates log messages
2916  * - ...
2917  */
2918
2919 TEST_HARNESS_MAIN