GNU Linux-libre 4.19.264-gnu1
[releases.git] / drivers / gpu / drm / i915 / selftests / i915_timeline.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include "../i915_selftest.h"
8 #include "i915_random.h"
9
10 #include "mock_gem_device.h"
11 #include "mock_timeline.h"
12
13 struct __igt_sync {
14         const char *name;
15         u32 seqno;
16         bool expected;
17         bool set;
18 };
19
20 static int __igt_sync(struct i915_timeline *tl,
21                       u64 ctx,
22                       const struct __igt_sync *p,
23                       const char *name)
24 {
25         int ret;
26
27         if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
28                 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
29                        name, p->name, ctx, p->seqno, yesno(p->expected));
30                 return -EINVAL;
31         }
32
33         if (p->set) {
34                 ret = __i915_timeline_sync_set(tl, ctx, p->seqno);
35                 if (ret)
36                         return ret;
37         }
38
39         return 0;
40 }
41
42 static int igt_sync(void *arg)
43 {
44         const struct __igt_sync pass[] = {
45                 { "unset", 0, false, false },
46                 { "new", 0, false, true },
47                 { "0a", 0, true, true },
48                 { "1a", 1, false, true },
49                 { "1b", 1, true, true },
50                 { "0b", 0, true, false },
51                 { "2a", 2, false, true },
52                 { "4", 4, false, true },
53                 { "INT_MAX", INT_MAX, false, true },
54                 { "INT_MAX-1", INT_MAX-1, true, false },
55                 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
56                 { "INT_MAX", INT_MAX, true, false },
57                 { "UINT_MAX", UINT_MAX, false, true },
58                 { "wrap", 0, false, true },
59                 { "unwrap", UINT_MAX, true, false },
60                 {},
61         }, *p;
62         struct i915_timeline tl;
63         int order, offset;
64         int ret = -ENODEV;
65
66         mock_timeline_init(&tl, 0);
67         for (p = pass; p->name; p++) {
68                 for (order = 1; order < 64; order++) {
69                         for (offset = -1; offset <= (order > 1); offset++) {
70                                 u64 ctx = BIT_ULL(order) + offset;
71
72                                 ret = __igt_sync(&tl, ctx, p, "1");
73                                 if (ret)
74                                         goto out;
75                         }
76                 }
77         }
78         mock_timeline_fini(&tl);
79
80         mock_timeline_init(&tl, 0);
81         for (order = 1; order < 64; order++) {
82                 for (offset = -1; offset <= (order > 1); offset++) {
83                         u64 ctx = BIT_ULL(order) + offset;
84
85                         for (p = pass; p->name; p++) {
86                                 ret = __igt_sync(&tl, ctx, p, "2");
87                                 if (ret)
88                                         goto out;
89                         }
90                 }
91         }
92
93 out:
94         mock_timeline_fini(&tl);
95         return ret;
96 }
97
98 static unsigned int random_engine(struct rnd_state *rnd)
99 {
100         return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
101 }
102
103 static int bench_sync(void *arg)
104 {
105         struct rnd_state prng;
106         struct i915_timeline tl;
107         unsigned long end_time, count;
108         u64 prng32_1M;
109         ktime_t kt;
110         int order, last_order;
111
112         mock_timeline_init(&tl, 0);
113
114         /* Lookups from cache are very fast and so the random number generation
115          * and the loop itself becomes a significant factor in the per-iteration
116          * timings. We try to compensate the results by measuring the overhead
117          * of the prng and subtract it from the reported results.
118          */
119         prandom_seed_state(&prng, i915_selftest.random_seed);
120         count = 0;
121         kt = ktime_get();
122         end_time = jiffies + HZ/10;
123         do {
124                 u32 x;
125
126                 /* Make sure the compiler doesn't optimise away the prng call */
127                 WRITE_ONCE(x, prandom_u32_state(&prng));
128
129                 count++;
130         } while (!time_after(jiffies, end_time));
131         kt = ktime_sub(ktime_get(), kt);
132         pr_debug("%s: %lu random evaluations, %lluns/prng\n",
133                  __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
134         prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
135
136         /* Benchmark (only) setting random context ids */
137         prandom_seed_state(&prng, i915_selftest.random_seed);
138         count = 0;
139         kt = ktime_get();
140         end_time = jiffies + HZ/10;
141         do {
142                 u64 id = i915_prandom_u64_state(&prng);
143
144                 __i915_timeline_sync_set(&tl, id, 0);
145                 count++;
146         } while (!time_after(jiffies, end_time));
147         kt = ktime_sub(ktime_get(), kt);
148         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
149         pr_info("%s: %lu random insertions, %lluns/insert\n",
150                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
151
152         /* Benchmark looking up the exact same context ids as we just set */
153         prandom_seed_state(&prng, i915_selftest.random_seed);
154         end_time = count;
155         kt = ktime_get();
156         while (end_time--) {
157                 u64 id = i915_prandom_u64_state(&prng);
158
159                 if (!__i915_timeline_sync_is_later(&tl, id, 0)) {
160                         mock_timeline_fini(&tl);
161                         pr_err("Lookup of %llu failed\n", id);
162                         return -EINVAL;
163                 }
164         }
165         kt = ktime_sub(ktime_get(), kt);
166         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
167         pr_info("%s: %lu random lookups, %lluns/lookup\n",
168                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
169
170         mock_timeline_fini(&tl);
171         cond_resched();
172
173         mock_timeline_init(&tl, 0);
174
175         /* Benchmark setting the first N (in order) contexts */
176         count = 0;
177         kt = ktime_get();
178         end_time = jiffies + HZ/10;
179         do {
180                 __i915_timeline_sync_set(&tl, count++, 0);
181         } while (!time_after(jiffies, end_time));
182         kt = ktime_sub(ktime_get(), kt);
183         pr_info("%s: %lu in-order insertions, %lluns/insert\n",
184                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
185
186         /* Benchmark looking up the exact same context ids as we just set */
187         end_time = count;
188         kt = ktime_get();
189         while (end_time--) {
190                 if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) {
191                         pr_err("Lookup of %lu failed\n", end_time);
192                         mock_timeline_fini(&tl);
193                         return -EINVAL;
194                 }
195         }
196         kt = ktime_sub(ktime_get(), kt);
197         pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
198                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
199
200         mock_timeline_fini(&tl);
201         cond_resched();
202
203         mock_timeline_init(&tl, 0);
204
205         /* Benchmark searching for a random context id and maybe changing it */
206         prandom_seed_state(&prng, i915_selftest.random_seed);
207         count = 0;
208         kt = ktime_get();
209         end_time = jiffies + HZ/10;
210         do {
211                 u32 id = random_engine(&prng);
212                 u32 seqno = prandom_u32_state(&prng);
213
214                 if (!__i915_timeline_sync_is_later(&tl, id, seqno))
215                         __i915_timeline_sync_set(&tl, id, seqno);
216
217                 count++;
218         } while (!time_after(jiffies, end_time));
219         kt = ktime_sub(ktime_get(), kt);
220         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
221         pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
222                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
223         mock_timeline_fini(&tl);
224         cond_resched();
225
226         /* Benchmark searching for a known context id and changing the seqno */
227         for (last_order = 1, order = 1; order < 32;
228              ({ int tmp = last_order; last_order = order; order += tmp; })) {
229                 unsigned int mask = BIT(order) - 1;
230
231                 mock_timeline_init(&tl, 0);
232
233                 count = 0;
234                 kt = ktime_get();
235                 end_time = jiffies + HZ/10;
236                 do {
237                         /* Without assuming too many details of the underlying
238                          * implementation, try to identify its phase-changes
239                          * (if any)!
240                          */
241                         u64 id = (u64)(count & mask) << order;
242
243                         __i915_timeline_sync_is_later(&tl, id, 0);
244                         __i915_timeline_sync_set(&tl, id, 0);
245
246                         count++;
247                 } while (!time_after(jiffies, end_time));
248                 kt = ktime_sub(ktime_get(), kt);
249                 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
250                         __func__, count, order,
251                         (long long)div64_ul(ktime_to_ns(kt), count));
252                 mock_timeline_fini(&tl);
253                 cond_resched();
254         }
255
256         return 0;
257 }
258
259 int i915_gem_timeline_mock_selftests(void)
260 {
261         static const struct i915_subtest tests[] = {
262                 SUBTEST(igt_sync),
263                 SUBTEST(bench_sync),
264         };
265
266         return i915_subtests(tests, NULL);
267 }