GNU Linux-libre 4.19.286-gnu1
[releases.git] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25
26
27 /*      Parameters, settable by user:
28         -----------------------------
29
30         limit           - bytes (must be > qth_max + burst)
31
32         Hard limit on queue length, should be chosen >qth_max
33         to allow packet bursts. This parameter does not
34         affect the algorithms behaviour and can be chosen
35         arbitrarily high (well, less than ram size)
36         Really, this limit will never be reached
37         if RED works correctly.
38  */
39
40 struct red_sched_data {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct timer_list       adapt_timer;
44         struct Qdisc            *sch;
45         struct red_parms        parms;
46         struct red_vars         vars;
47         struct red_stats        stats;
48         struct Qdisc            *qdisc;
49 };
50
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53         return q->flags & TC_RED_ECN;
54 }
55
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58         return q->flags & TC_RED_HARDDROP;
59 }
60
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62                        struct sk_buff **to_free)
63 {
64         struct red_sched_data *q = qdisc_priv(sch);
65         struct Qdisc *child = q->qdisc;
66         unsigned int len;
67         int ret;
68
69         q->vars.qavg = red_calc_qavg(&q->parms,
70                                      &q->vars,
71                                      child->qstats.backlog);
72
73         if (red_is_idling(&q->vars))
74                 red_end_of_idle_period(&q->vars);
75
76         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
77         case RED_DONT_MARK:
78                 break;
79
80         case RED_PROB_MARK:
81                 qdisc_qstats_overlimit(sch);
82                 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
83                         q->stats.prob_drop++;
84                         goto congestion_drop;
85                 }
86
87                 q->stats.prob_mark++;
88                 break;
89
90         case RED_HARD_MARK:
91                 qdisc_qstats_overlimit(sch);
92                 if (red_use_harddrop(q) || !red_use_ecn(q) ||
93                     !INET_ECN_set_ce(skb)) {
94                         q->stats.forced_drop++;
95                         goto congestion_drop;
96                 }
97
98                 q->stats.forced_mark++;
99                 break;
100         }
101
102         len = qdisc_pkt_len(skb);
103         ret = qdisc_enqueue(skb, child, to_free);
104         if (likely(ret == NET_XMIT_SUCCESS)) {
105                 sch->qstats.backlog += len;
106                 sch->q.qlen++;
107         } else if (net_xmit_drop_count(ret)) {
108                 q->stats.pdrop++;
109                 qdisc_qstats_drop(sch);
110         }
111         return ret;
112
113 congestion_drop:
114         qdisc_drop(skb, sch, to_free);
115         return NET_XMIT_CN;
116 }
117
118 static struct sk_buff *red_dequeue(struct Qdisc *sch)
119 {
120         struct sk_buff *skb;
121         struct red_sched_data *q = qdisc_priv(sch);
122         struct Qdisc *child = q->qdisc;
123
124         skb = child->dequeue(child);
125         if (skb) {
126                 qdisc_bstats_update(sch, skb);
127                 qdisc_qstats_backlog_dec(sch, skb);
128                 sch->q.qlen--;
129         } else {
130                 if (!red_is_idling(&q->vars))
131                         red_start_of_idle_period(&q->vars);
132         }
133         return skb;
134 }
135
136 static struct sk_buff *red_peek(struct Qdisc *sch)
137 {
138         struct red_sched_data *q = qdisc_priv(sch);
139         struct Qdisc *child = q->qdisc;
140
141         return child->ops->peek(child);
142 }
143
144 static void red_reset(struct Qdisc *sch)
145 {
146         struct red_sched_data *q = qdisc_priv(sch);
147
148         qdisc_reset(q->qdisc);
149         sch->qstats.backlog = 0;
150         sch->q.qlen = 0;
151         red_restart(&q->vars);
152 }
153
154 static int red_offload(struct Qdisc *sch, bool enable)
155 {
156         struct red_sched_data *q = qdisc_priv(sch);
157         struct net_device *dev = qdisc_dev(sch);
158         struct tc_red_qopt_offload opt = {
159                 .handle = sch->handle,
160                 .parent = sch->parent,
161         };
162
163         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
164                 return -EOPNOTSUPP;
165
166         if (enable) {
167                 opt.command = TC_RED_REPLACE;
168                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
169                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
170                 opt.set.probability = q->parms.max_P;
171                 opt.set.is_ecn = red_use_ecn(q);
172                 opt.set.qstats = &sch->qstats;
173         } else {
174                 opt.command = TC_RED_DESTROY;
175         }
176
177         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
178 }
179
180 static void red_destroy(struct Qdisc *sch)
181 {
182         struct red_sched_data *q = qdisc_priv(sch);
183
184         del_timer_sync(&q->adapt_timer);
185         red_offload(sch, false);
186         qdisc_put(q->qdisc);
187 }
188
189 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
190         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
191         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
192         [TCA_RED_MAX_P] = { .type = NLA_U32 },
193 };
194
195 static int red_change(struct Qdisc *sch, struct nlattr *opt,
196                       struct netlink_ext_ack *extack)
197 {
198         struct red_sched_data *q = qdisc_priv(sch);
199         struct nlattr *tb[TCA_RED_MAX + 1];
200         struct tc_red_qopt *ctl;
201         struct Qdisc *child = NULL;
202         int err;
203         u32 max_P;
204         u8 *stab;
205
206         if (opt == NULL)
207                 return -EINVAL;
208
209         err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
210         if (err < 0)
211                 return err;
212
213         if (tb[TCA_RED_PARMS] == NULL ||
214             tb[TCA_RED_STAB] == NULL)
215                 return -EINVAL;
216
217         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
218
219         ctl = nla_data(tb[TCA_RED_PARMS]);
220         stab = nla_data(tb[TCA_RED_STAB]);
221         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
222                               ctl->Scell_log, stab))
223                 return -EINVAL;
224
225         if (ctl->limit > 0) {
226                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
227                                          extack);
228                 if (IS_ERR(child))
229                         return PTR_ERR(child);
230
231                 /* child is fifo, no need to check for noop_qdisc */
232                 qdisc_hash_add(child, true);
233         }
234
235         sch_tree_lock(sch);
236         q->flags = ctl->flags;
237         q->limit = ctl->limit;
238         if (child) {
239                 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
240                                           q->qdisc->qstats.backlog);
241                 qdisc_put(q->qdisc);
242                 q->qdisc = child;
243         }
244
245         red_set_parms(&q->parms,
246                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
247                       ctl->Plog, ctl->Scell_log,
248                       stab,
249                       max_P);
250         red_set_vars(&q->vars);
251
252         del_timer(&q->adapt_timer);
253         if (ctl->flags & TC_RED_ADAPTATIVE)
254                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
255
256         if (!q->qdisc->q.qlen)
257                 red_start_of_idle_period(&q->vars);
258
259         sch_tree_unlock(sch);
260         red_offload(sch, true);
261         return 0;
262 }
263
264 static inline void red_adaptative_timer(struct timer_list *t)
265 {
266         struct red_sched_data *q = from_timer(q, t, adapt_timer);
267         struct Qdisc *sch = q->sch;
268         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
269
270         spin_lock(root_lock);
271         red_adaptative_algo(&q->parms, &q->vars);
272         mod_timer(&q->adapt_timer, jiffies + HZ/2);
273         spin_unlock(root_lock);
274 }
275
276 static int red_init(struct Qdisc *sch, struct nlattr *opt,
277                     struct netlink_ext_ack *extack)
278 {
279         struct red_sched_data *q = qdisc_priv(sch);
280
281         q->qdisc = &noop_qdisc;
282         q->sch = sch;
283         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
284         return red_change(sch, opt, extack);
285 }
286
287 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
288 {
289         struct net_device *dev = qdisc_dev(sch);
290         struct tc_red_qopt_offload hw_stats = {
291                 .command = TC_RED_STATS,
292                 .handle = sch->handle,
293                 .parent = sch->parent,
294                 {
295                         .stats.bstats = &sch->bstats,
296                         .stats.qstats = &sch->qstats,
297                 },
298         };
299         int err;
300
301         sch->flags &= ~TCQ_F_OFFLOADED;
302
303         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
304                 return 0;
305
306         err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
307                                             &hw_stats);
308         if (err == -EOPNOTSUPP)
309                 return 0;
310
311         if (!err)
312                 sch->flags |= TCQ_F_OFFLOADED;
313
314         return err;
315 }
316
317 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
318 {
319         struct red_sched_data *q = qdisc_priv(sch);
320         struct nlattr *opts = NULL;
321         struct tc_red_qopt opt = {
322                 .limit          = q->limit,
323                 .flags          = q->flags,
324                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
325                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
326                 .Wlog           = q->parms.Wlog,
327                 .Plog           = q->parms.Plog,
328                 .Scell_log      = q->parms.Scell_log,
329         };
330         int err;
331
332         err = red_dump_offload_stats(sch, &opt);
333         if (err)
334                 goto nla_put_failure;
335
336         opts = nla_nest_start(skb, TCA_OPTIONS);
337         if (opts == NULL)
338                 goto nla_put_failure;
339         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
340             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
341                 goto nla_put_failure;
342         return nla_nest_end(skb, opts);
343
344 nla_put_failure:
345         nla_nest_cancel(skb, opts);
346         return -EMSGSIZE;
347 }
348
349 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
350 {
351         struct red_sched_data *q = qdisc_priv(sch);
352         struct net_device *dev = qdisc_dev(sch);
353         struct tc_red_xstats st = {0};
354
355         if (sch->flags & TCQ_F_OFFLOADED) {
356                 struct tc_red_qopt_offload hw_stats_request = {
357                         .command = TC_RED_XSTATS,
358                         .handle = sch->handle,
359                         .parent = sch->parent,
360                         {
361                                 .xstats = &q->stats,
362                         },
363                 };
364                 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
365                                               &hw_stats_request);
366         }
367         st.early = q->stats.prob_drop + q->stats.forced_drop;
368         st.pdrop = q->stats.pdrop;
369         st.other = q->stats.other;
370         st.marked = q->stats.prob_mark + q->stats.forced_mark;
371
372         return gnet_stats_copy_app(d, &st, sizeof(st));
373 }
374
375 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
376                           struct sk_buff *skb, struct tcmsg *tcm)
377 {
378         struct red_sched_data *q = qdisc_priv(sch);
379
380         tcm->tcm_handle |= TC_H_MIN(1);
381         tcm->tcm_info = q->qdisc->handle;
382         return 0;
383 }
384
385 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
386                      struct Qdisc **old, struct netlink_ext_ack *extack)
387 {
388         struct red_sched_data *q = qdisc_priv(sch);
389
390         if (new == NULL)
391                 new = &noop_qdisc;
392
393         *old = qdisc_replace(sch, new, &q->qdisc);
394         return 0;
395 }
396
397 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
398 {
399         struct red_sched_data *q = qdisc_priv(sch);
400         return q->qdisc;
401 }
402
403 static unsigned long red_find(struct Qdisc *sch, u32 classid)
404 {
405         return 1;
406 }
407
408 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
409 {
410         if (!walker->stop) {
411                 if (walker->count >= walker->skip)
412                         if (walker->fn(sch, 1, walker) < 0) {
413                                 walker->stop = 1;
414                                 return;
415                         }
416                 walker->count++;
417         }
418 }
419
420 static const struct Qdisc_class_ops red_class_ops = {
421         .graft          =       red_graft,
422         .leaf           =       red_leaf,
423         .find           =       red_find,
424         .walk           =       red_walk,
425         .dump           =       red_dump_class,
426 };
427
428 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
429         .id             =       "red",
430         .priv_size      =       sizeof(struct red_sched_data),
431         .cl_ops         =       &red_class_ops,
432         .enqueue        =       red_enqueue,
433         .dequeue        =       red_dequeue,
434         .peek           =       red_peek,
435         .init           =       red_init,
436         .reset          =       red_reset,
437         .destroy        =       red_destroy,
438         .change         =       red_change,
439         .dump           =       red_dump,
440         .dump_stats     =       red_dump_stats,
441         .owner          =       THIS_MODULE,
442 };
443
444 static int __init red_module_init(void)
445 {
446         return register_qdisc(&red_qdisc_ops);
447 }
448
449 static void __exit red_module_exit(void)
450 {
451         unregister_qdisc(&red_qdisc_ops);
452 }
453
454 module_init(red_module_init)
455 module_exit(red_module_exit)
456
457 MODULE_LICENSE("GPL");