GNU Linux-libre 4.14.266-gnu1
[releases.git] / net / sched / sch_api.c
1 /*
2  * net/sched/sch_api.c  Packet scheduler API.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 #include <net/pkt_cls.h>
39
40 /*
41
42    Short review.
43    -------------
44
45    This file consists of two interrelated parts:
46
47    1. queueing disciplines manager frontend.
48    2. traffic classes manager frontend.
49
50    Generally, queueing discipline ("qdisc") is a black box,
51    which is able to enqueue packets and to dequeue them (when
52    device is ready to send something) in order and at times
53    determined by algorithm hidden in it.
54
55    qdisc's are divided to two categories:
56    - "queues", which have no internal structure visible from outside.
57    - "schedulers", which split all the packets to "traffic classes",
58      using "packet classifiers" (look at cls_api.c)
59
60    In turn, classes may have child qdiscs (as rule, queues)
61    attached to them etc. etc. etc.
62
63    The goal of the routines in this file is to translate
64    information supplied by user in the form of handles
65    to more intelligible for kernel form, to make some sanity
66    checks and part of work, which is common to all qdiscs
67    and to provide rtnetlink notifications.
68
69    All real intelligent work is done inside qdisc modules.
70
71
72
73    Every discipline has two major routines: enqueue and dequeue.
74
75    ---dequeue
76
77    dequeue usually returns a skb to send. It is allowed to return NULL,
78    but it does not mean that queue is empty, it just means that
79    discipline does not want to send anything this time.
80    Queue is really empty if q->q.qlen == 0.
81    For complicated disciplines with multiple queues q->q is not
82    real packet queue, but however q->q.qlen must be valid.
83
84    ---enqueue
85
86    enqueue returns 0, if packet was enqueued successfully.
87    If packet (this one or another one) was dropped, it returns
88    not zero error code.
89    NET_XMIT_DROP        - this packet dropped
90      Expected action: do not backoff, but wait until queue will clear.
91    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
92      Expected action: backoff or ignore
93
94    Auxiliary routines:
95
96    ---peek
97
98    like dequeue but without removing a packet from the queue
99
100    ---reset
101
102    returns qdisc to initial state: purge all buffers, clear all
103    timers, counters (except for statistics) etc.
104
105    ---init
106
107    initializes newly created qdisc.
108
109    ---destroy
110
111    destroys resources allocated by init and during lifetime of qdisc.
112
113    ---change
114
115    changes qdisc parameters.
116  */
117
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120
121
122 /************************************************
123  *      Queueing disciplines manipulation.      *
124  ************************************************/
125
126
127 /* The list of all installed queueing disciplines. */
128
129 static struct Qdisc_ops *qdisc_base;
130
131 /* Register/unregister queueing discipline */
132
133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135         struct Qdisc_ops *q, **qp;
136         int rc = -EEXIST;
137
138         write_lock(&qdisc_mod_lock);
139         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140                 if (!strcmp(qops->id, q->id))
141                         goto out;
142
143         if (qops->enqueue == NULL)
144                 qops->enqueue = noop_qdisc_ops.enqueue;
145         if (qops->peek == NULL) {
146                 if (qops->dequeue == NULL)
147                         qops->peek = noop_qdisc_ops.peek;
148                 else
149                         goto out_einval;
150         }
151         if (qops->dequeue == NULL)
152                 qops->dequeue = noop_qdisc_ops.dequeue;
153
154         if (qops->cl_ops) {
155                 const struct Qdisc_class_ops *cops = qops->cl_ops;
156
157                 if (!(cops->find && cops->walk && cops->leaf))
158                         goto out_einval;
159
160                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161                         goto out_einval;
162         }
163
164         qops->next = NULL;
165         *qp = qops;
166         rc = 0;
167 out:
168         write_unlock(&qdisc_mod_lock);
169         return rc;
170
171 out_einval:
172         rc = -EINVAL;
173         goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176
177 int unregister_qdisc(struct Qdisc_ops *qops)
178 {
179         struct Qdisc_ops *q, **qp;
180         int err = -ENOENT;
181
182         write_lock(&qdisc_mod_lock);
183         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184                 if (q == qops)
185                         break;
186         if (q) {
187                 *qp = q->next;
188                 q->next = NULL;
189                 err = 0;
190         }
191         write_unlock(&qdisc_mod_lock);
192         return err;
193 }
194 EXPORT_SYMBOL(unregister_qdisc);
195
196 /* Get default qdisc if not otherwise specified */
197 void qdisc_get_default(char *name, size_t len)
198 {
199         read_lock(&qdisc_mod_lock);
200         strlcpy(name, default_qdisc_ops->id, len);
201         read_unlock(&qdisc_mod_lock);
202 }
203
204 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
205 {
206         struct Qdisc_ops *q = NULL;
207
208         for (q = qdisc_base; q; q = q->next) {
209                 if (!strcmp(name, q->id)) {
210                         if (!try_module_get(q->owner))
211                                 q = NULL;
212                         break;
213                 }
214         }
215
216         return q;
217 }
218
219 /* Set new default qdisc to use */
220 int qdisc_set_default(const char *name)
221 {
222         const struct Qdisc_ops *ops;
223
224         if (!capable(CAP_NET_ADMIN))
225                 return -EPERM;
226
227         write_lock(&qdisc_mod_lock);
228         ops = qdisc_lookup_default(name);
229         if (!ops) {
230                 /* Not found, drop lock and try to load module */
231                 write_unlock(&qdisc_mod_lock);
232                 request_module("sch_%s", name);
233                 write_lock(&qdisc_mod_lock);
234
235                 ops = qdisc_lookup_default(name);
236         }
237
238         if (ops) {
239                 /* Set new default */
240                 module_put(default_qdisc_ops->owner);
241                 default_qdisc_ops = ops;
242         }
243         write_unlock(&qdisc_mod_lock);
244
245         return ops ? 0 : -ENOENT;
246 }
247
248 #ifdef CONFIG_NET_SCH_DEFAULT
249 /* Set default value from kernel config */
250 static int __init sch_default_qdisc(void)
251 {
252         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
253 }
254 late_initcall(sch_default_qdisc);
255 #endif
256
257 /* We know handle. Find qdisc among all qdisc's attached to device
258  * (root qdisc, all its children, children of children etc.)
259  * Note: caller either uses rtnl or rcu_read_lock()
260  */
261
262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
263 {
264         struct Qdisc *q;
265
266         if (!qdisc_dev(root))
267                 return (root->handle == handle ? root : NULL);
268
269         if (!(root->flags & TCQ_F_BUILTIN) &&
270             root->handle == handle)
271                 return root;
272
273         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
274                 if (q->handle == handle)
275                         return q;
276         }
277         return NULL;
278 }
279
280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
281 {
282         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
283                 ASSERT_RTNL();
284                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285                 if (invisible)
286                         q->flags |= TCQ_F_INVISIBLE;
287         }
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290
291 void qdisc_hash_del(struct Qdisc *q)
292 {
293         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294                 ASSERT_RTNL();
295                 hash_del_rcu(&q->hash);
296         }
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302         struct Qdisc *q;
303
304         if (!handle)
305                 return NULL;
306         q = qdisc_match_from_root(dev->qdisc, handle);
307         if (q)
308                 goto out;
309
310         if (dev_ingress_queue(dev))
311                 q = qdisc_match_from_root(
312                         dev_ingress_queue(dev)->qdisc_sleeping,
313                         handle);
314 out:
315         return q;
316 }
317
318 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
319 {
320         unsigned long cl;
321         struct Qdisc *leaf;
322         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
323
324         if (cops == NULL)
325                 return NULL;
326         cl = cops->find(p, classid);
327
328         if (cl == 0)
329                 return NULL;
330         leaf = cops->leaf(p, cl);
331         return leaf;
332 }
333
334 /* Find queueing discipline by name */
335
336 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
337 {
338         struct Qdisc_ops *q = NULL;
339
340         if (kind) {
341                 read_lock(&qdisc_mod_lock);
342                 for (q = qdisc_base; q; q = q->next) {
343                         if (nla_strcmp(kind, q->id) == 0) {
344                                 if (!try_module_get(q->owner))
345                                         q = NULL;
346                                 break;
347                         }
348                 }
349                 read_unlock(&qdisc_mod_lock);
350         }
351         return q;
352 }
353
354 /* The linklayer setting were not transferred from iproute2, in older
355  * versions, and the rate tables lookup systems have been dropped in
356  * the kernel. To keep backward compatible with older iproute2 tc
357  * utils, we detect the linklayer setting by detecting if the rate
358  * table were modified.
359  *
360  * For linklayer ATM table entries, the rate table will be aligned to
361  * 48 bytes, thus some table entries will contain the same value.  The
362  * mpu (min packet unit) is also encoded into the old rate table, thus
363  * starting from the mpu, we find low and high table entries for
364  * mapping this cell.  If these entries contain the same value, when
365  * the rate tables have been modified for linklayer ATM.
366  *
367  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
368  * and then roundup to the next cell, calc the table entry one below,
369  * and compare.
370  */
371 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
372 {
373         int low       = roundup(r->mpu, 48);
374         int high      = roundup(low+1, 48);
375         int cell_low  = low >> r->cell_log;
376         int cell_high = (high >> r->cell_log) - 1;
377
378         /* rtab is too inaccurate at rates > 100Mbit/s */
379         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
380                 pr_debug("TC linklayer: Giving up ATM detection\n");
381                 return TC_LINKLAYER_ETHERNET;
382         }
383
384         if ((cell_high > cell_low) && (cell_high < 256)
385             && (rtab[cell_low] == rtab[cell_high])) {
386                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
387                          cell_low, cell_high, rtab[cell_high]);
388                 return TC_LINKLAYER_ATM;
389         }
390         return TC_LINKLAYER_ETHERNET;
391 }
392
393 static struct qdisc_rate_table *qdisc_rtab_list;
394
395 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
396                                         struct nlattr *tab)
397 {
398         struct qdisc_rate_table *rtab;
399
400         if (tab == NULL || r->rate == 0 ||
401             r->cell_log == 0 || r->cell_log >= 32 ||
402             nla_len(tab) != TC_RTAB_SIZE)
403                 return NULL;
404
405         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
406                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
407                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
408                         rtab->refcnt++;
409                         return rtab;
410                 }
411         }
412
413         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
414         if (rtab) {
415                 rtab->rate = *r;
416                 rtab->refcnt = 1;
417                 memcpy(rtab->data, nla_data(tab), 1024);
418                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
419                         r->linklayer = __detect_linklayer(r, rtab->data);
420                 rtab->next = qdisc_rtab_list;
421                 qdisc_rtab_list = rtab;
422         }
423         return rtab;
424 }
425 EXPORT_SYMBOL(qdisc_get_rtab);
426
427 void qdisc_put_rtab(struct qdisc_rate_table *tab)
428 {
429         struct qdisc_rate_table *rtab, **rtabp;
430
431         if (!tab || --tab->refcnt)
432                 return;
433
434         for (rtabp = &qdisc_rtab_list;
435              (rtab = *rtabp) != NULL;
436              rtabp = &rtab->next) {
437                 if (rtab == tab) {
438                         *rtabp = rtab->next;
439                         kfree(rtab);
440                         return;
441                 }
442         }
443 }
444 EXPORT_SYMBOL(qdisc_put_rtab);
445
446 static LIST_HEAD(qdisc_stab_list);
447
448 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
449         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
450         [TCA_STAB_DATA] = { .type = NLA_BINARY },
451 };
452
453 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
454 {
455         struct nlattr *tb[TCA_STAB_MAX + 1];
456         struct qdisc_size_table *stab;
457         struct tc_sizespec *s;
458         unsigned int tsize = 0;
459         u16 *tab = NULL;
460         int err;
461
462         err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
463         if (err < 0)
464                 return ERR_PTR(err);
465         if (!tb[TCA_STAB_BASE])
466                 return ERR_PTR(-EINVAL);
467
468         s = nla_data(tb[TCA_STAB_BASE]);
469
470         if (s->tsize > 0) {
471                 if (!tb[TCA_STAB_DATA])
472                         return ERR_PTR(-EINVAL);
473                 tab = nla_data(tb[TCA_STAB_DATA]);
474                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
475         }
476
477         if (tsize != s->tsize || (!tab && tsize > 0))
478                 return ERR_PTR(-EINVAL);
479
480         list_for_each_entry(stab, &qdisc_stab_list, list) {
481                 if (memcmp(&stab->szopts, s, sizeof(*s)))
482                         continue;
483                 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
484                         continue;
485                 stab->refcnt++;
486                 return stab;
487         }
488
489         stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
490         if (!stab)
491                 return ERR_PTR(-ENOMEM);
492
493         stab->refcnt = 1;
494         stab->szopts = *s;
495         if (tsize > 0)
496                 memcpy(stab->data, tab, tsize * sizeof(u16));
497
498         list_add_tail(&stab->list, &qdisc_stab_list);
499
500         return stab;
501 }
502
503 static void stab_kfree_rcu(struct rcu_head *head)
504 {
505         kfree(container_of(head, struct qdisc_size_table, rcu));
506 }
507
508 void qdisc_put_stab(struct qdisc_size_table *tab)
509 {
510         if (!tab)
511                 return;
512
513         if (--tab->refcnt == 0) {
514                 list_del(&tab->list);
515                 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
516         }
517 }
518 EXPORT_SYMBOL(qdisc_put_stab);
519
520 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
521 {
522         struct nlattr *nest;
523
524         nest = nla_nest_start(skb, TCA_STAB);
525         if (nest == NULL)
526                 goto nla_put_failure;
527         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
528                 goto nla_put_failure;
529         nla_nest_end(skb, nest);
530
531         return skb->len;
532
533 nla_put_failure:
534         return -1;
535 }
536
537 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
538                                const struct qdisc_size_table *stab)
539 {
540         int pkt_len, slot;
541
542         pkt_len = skb->len + stab->szopts.overhead;
543         if (unlikely(!stab->szopts.tsize))
544                 goto out;
545
546         slot = pkt_len + stab->szopts.cell_align;
547         if (unlikely(slot < 0))
548                 slot = 0;
549
550         slot >>= stab->szopts.cell_log;
551         if (likely(slot < stab->szopts.tsize))
552                 pkt_len = stab->data[slot];
553         else
554                 pkt_len = stab->data[stab->szopts.tsize - 1] *
555                                 (slot / stab->szopts.tsize) +
556                                 stab->data[slot % stab->szopts.tsize];
557
558         pkt_len <<= stab->szopts.size_log;
559 out:
560         if (unlikely(pkt_len < 1))
561                 pkt_len = 1;
562         qdisc_skb_cb(skb)->pkt_len = pkt_len;
563 }
564 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
565
566 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
567 {
568         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
569                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
570                         txt, qdisc->ops->id, qdisc->handle >> 16);
571                 qdisc->flags |= TCQ_F_WARN_NONWC;
572         }
573 }
574 EXPORT_SYMBOL(qdisc_warn_nonwc);
575
576 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
577 {
578         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
579                                                  timer);
580
581         rcu_read_lock();
582         __netif_schedule(qdisc_root(wd->qdisc));
583         rcu_read_unlock();
584
585         return HRTIMER_NORESTART;
586 }
587
588 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
589 {
590         hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
591         wd->timer.function = qdisc_watchdog;
592         wd->qdisc = qdisc;
593 }
594 EXPORT_SYMBOL(qdisc_watchdog_init);
595
596 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
597 {
598         if (test_bit(__QDISC_STATE_DEACTIVATED,
599                      &qdisc_root_sleeping(wd->qdisc)->state))
600                 return;
601
602         if (wd->last_expires == expires)
603                 return;
604
605         wd->last_expires = expires;
606         hrtimer_start(&wd->timer,
607                       ns_to_ktime(expires),
608                       HRTIMER_MODE_ABS_PINNED);
609 }
610 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
611
612 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
613 {
614         hrtimer_cancel(&wd->timer);
615 }
616 EXPORT_SYMBOL(qdisc_watchdog_cancel);
617
618 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
619 {
620         struct hlist_head *h;
621         unsigned int i;
622
623         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
624
625         if (h != NULL) {
626                 for (i = 0; i < n; i++)
627                         INIT_HLIST_HEAD(&h[i]);
628         }
629         return h;
630 }
631
632 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
633 {
634         struct Qdisc_class_common *cl;
635         struct hlist_node *next;
636         struct hlist_head *nhash, *ohash;
637         unsigned int nsize, nmask, osize;
638         unsigned int i, h;
639
640         /* Rehash when load factor exceeds 0.75 */
641         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
642                 return;
643         nsize = clhash->hashsize * 2;
644         nmask = nsize - 1;
645         nhash = qdisc_class_hash_alloc(nsize);
646         if (nhash == NULL)
647                 return;
648
649         ohash = clhash->hash;
650         osize = clhash->hashsize;
651
652         sch_tree_lock(sch);
653         for (i = 0; i < osize; i++) {
654                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
655                         h = qdisc_class_hash(cl->classid, nmask);
656                         hlist_add_head(&cl->hnode, &nhash[h]);
657                 }
658         }
659         clhash->hash     = nhash;
660         clhash->hashsize = nsize;
661         clhash->hashmask = nmask;
662         sch_tree_unlock(sch);
663
664         kvfree(ohash);
665 }
666 EXPORT_SYMBOL(qdisc_class_hash_grow);
667
668 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
669 {
670         unsigned int size = 4;
671
672         clhash->hash = qdisc_class_hash_alloc(size);
673         if (clhash->hash == NULL)
674                 return -ENOMEM;
675         clhash->hashsize  = size;
676         clhash->hashmask  = size - 1;
677         clhash->hashelems = 0;
678         return 0;
679 }
680 EXPORT_SYMBOL(qdisc_class_hash_init);
681
682 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
683 {
684         kvfree(clhash->hash);
685 }
686 EXPORT_SYMBOL(qdisc_class_hash_destroy);
687
688 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
689                              struct Qdisc_class_common *cl)
690 {
691         unsigned int h;
692
693         INIT_HLIST_NODE(&cl->hnode);
694         h = qdisc_class_hash(cl->classid, clhash->hashmask);
695         hlist_add_head(&cl->hnode, &clhash->hash[h]);
696         clhash->hashelems++;
697 }
698 EXPORT_SYMBOL(qdisc_class_hash_insert);
699
700 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
701                              struct Qdisc_class_common *cl)
702 {
703         hlist_del(&cl->hnode);
704         clhash->hashelems--;
705 }
706 EXPORT_SYMBOL(qdisc_class_hash_remove);
707
708 /* Allocate an unique handle from space managed by kernel
709  * Possible range is [8000-FFFF]:0000 (0x8000 values)
710  */
711 static u32 qdisc_alloc_handle(struct net_device *dev)
712 {
713         int i = 0x8000;
714         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
715
716         do {
717                 autohandle += TC_H_MAKE(0x10000U, 0);
718                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
719                         autohandle = TC_H_MAKE(0x80000000U, 0);
720                 if (!qdisc_lookup(dev, autohandle))
721                         return autohandle;
722                 cond_resched();
723         } while (--i > 0);
724
725         return 0;
726 }
727
728 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
729                                unsigned int len)
730 {
731         const struct Qdisc_class_ops *cops;
732         unsigned long cl;
733         u32 parentid;
734         bool notify;
735         int drops;
736
737         if (n == 0 && len == 0)
738                 return;
739         drops = max_t(int, n, 0);
740         rcu_read_lock();
741         while ((parentid = sch->parent)) {
742                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
743                         break;
744
745                 if (sch->flags & TCQ_F_NOPARENT)
746                         break;
747                 /* Notify parent qdisc only if child qdisc becomes empty.
748                  *
749                  * If child was empty even before update then backlog
750                  * counter is screwed and we skip notification because
751                  * parent class is already passive.
752                  */
753                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
754                 /* TODO: perform the search on a per txq basis */
755                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
756                 if (sch == NULL) {
757                         WARN_ON_ONCE(parentid != TC_H_ROOT);
758                         break;
759                 }
760                 cops = sch->ops->cl_ops;
761                 if (notify && cops->qlen_notify) {
762                         cl = cops->find(sch, parentid);
763                         cops->qlen_notify(sch, cl);
764                 }
765                 sch->q.qlen -= n;
766                 sch->qstats.backlog -= len;
767                 __qdisc_qstats_drop(sch, drops);
768         }
769         rcu_read_unlock();
770 }
771 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
772
773 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
774                          u32 portid, u32 seq, u16 flags, int event)
775 {
776         struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
777         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
778         struct tcmsg *tcm;
779         struct nlmsghdr  *nlh;
780         unsigned char *b = skb_tail_pointer(skb);
781         struct gnet_dump d;
782         struct qdisc_size_table *stab;
783         __u32 qlen;
784
785         cond_resched();
786         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
787         if (!nlh)
788                 goto out_nlmsg_trim;
789         tcm = nlmsg_data(nlh);
790         tcm->tcm_family = AF_UNSPEC;
791         tcm->tcm__pad1 = 0;
792         tcm->tcm__pad2 = 0;
793         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
794         tcm->tcm_parent = clid;
795         tcm->tcm_handle = q->handle;
796         tcm->tcm_info = refcount_read(&q->refcnt);
797         if (nla_put_string(skb, TCA_KIND, q->ops->id))
798                 goto nla_put_failure;
799         if (q->ops->dump && q->ops->dump(q, skb) < 0)
800                 goto nla_put_failure;
801         qlen = q->q.qlen;
802
803         stab = rtnl_dereference(q->stab);
804         if (stab && qdisc_dump_stab(skb, stab) < 0)
805                 goto nla_put_failure;
806
807         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
808                                          NULL, &d, TCA_PAD) < 0)
809                 goto nla_put_failure;
810
811         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
812                 goto nla_put_failure;
813
814         if (qdisc_is_percpu_stats(q)) {
815                 cpu_bstats = q->cpu_bstats;
816                 cpu_qstats = q->cpu_qstats;
817         }
818
819         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
820                                   &d, cpu_bstats, &q->bstats) < 0 ||
821             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
822             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
823                 goto nla_put_failure;
824
825         if (gnet_stats_finish_copy(&d) < 0)
826                 goto nla_put_failure;
827
828         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
829         return skb->len;
830
831 out_nlmsg_trim:
832 nla_put_failure:
833         nlmsg_trim(skb, b);
834         return -1;
835 }
836
837 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
838 {
839         if (q->flags & TCQ_F_BUILTIN)
840                 return true;
841         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
842                 return true;
843
844         return false;
845 }
846
847 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
848                         struct nlmsghdr *n, u32 clid,
849                         struct Qdisc *old, struct Qdisc *new)
850 {
851         struct sk_buff *skb;
852         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
853
854         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
855         if (!skb)
856                 return -ENOBUFS;
857
858         if (old && !tc_qdisc_dump_ignore(old, false)) {
859                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
860                                   0, RTM_DELQDISC) < 0)
861                         goto err_out;
862         }
863         if (new && !tc_qdisc_dump_ignore(new, false)) {
864                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
865                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
866                         goto err_out;
867         }
868
869         if (skb->len)
870                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
871                                       n->nlmsg_flags & NLM_F_ECHO);
872
873 err_out:
874         kfree_skb(skb);
875         return -EINVAL;
876 }
877
878 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
879                                struct nlmsghdr *n, u32 clid,
880                                struct Qdisc *old, struct Qdisc *new)
881 {
882         if (new || old)
883                 qdisc_notify(net, skb, n, clid, old, new);
884
885         if (old)
886                 qdisc_destroy(old);
887 }
888
889 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
890  * to device "dev".
891  *
892  * When appropriate send a netlink notification using 'skb'
893  * and "n".
894  *
895  * On success, destroy old qdisc.
896  */
897
898 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
899                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
900                        struct Qdisc *new, struct Qdisc *old)
901 {
902         struct Qdisc *q = old;
903         struct net *net = dev_net(dev);
904         int err = 0;
905
906         if (parent == NULL) {
907                 unsigned int i, num_q, ingress;
908
909                 ingress = 0;
910                 num_q = dev->num_tx_queues;
911                 if ((q && q->flags & TCQ_F_INGRESS) ||
912                     (new && new->flags & TCQ_F_INGRESS)) {
913                         num_q = 1;
914                         ingress = 1;
915                         if (!dev_ingress_queue(dev))
916                                 return -ENOENT;
917                 }
918
919                 if (dev->flags & IFF_UP)
920                         dev_deactivate(dev);
921
922                 if (new && new->ops->attach)
923                         goto skip;
924
925                 for (i = 0; i < num_q; i++) {
926                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
927
928                         if (!ingress)
929                                 dev_queue = netdev_get_tx_queue(dev, i);
930
931                         old = dev_graft_qdisc(dev_queue, new);
932                         if (new && i > 0)
933                                 qdisc_refcount_inc(new);
934
935                         if (!ingress)
936                                 qdisc_destroy(old);
937                 }
938
939 skip:
940                 if (!ingress) {
941                         notify_and_destroy(net, skb, n, classid,
942                                            dev->qdisc, new);
943                         if (new && !new->ops->attach)
944                                 qdisc_refcount_inc(new);
945                         dev->qdisc = new ? : &noop_qdisc;
946
947                         if (new && new->ops->attach)
948                                 new->ops->attach(new);
949                 } else {
950                         notify_and_destroy(net, skb, n, classid, old, new);
951                 }
952
953                 if (dev->flags & IFF_UP)
954                         dev_activate(dev);
955         } else {
956                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
957
958                 err = -EOPNOTSUPP;
959                 if (cops && cops->graft) {
960                         unsigned long cl = cops->find(parent, classid);
961
962                         if (cl)
963                                 err = cops->graft(parent, cl, new, &old);
964                         else
965                                 err = -ENOENT;
966                 }
967                 if (!err)
968                         notify_and_destroy(net, skb, n, classid, old, new);
969         }
970         return err;
971 }
972
973 /* lockdep annotation is needed for ingress; egress gets it only for name */
974 static struct lock_class_key qdisc_tx_lock;
975 static struct lock_class_key qdisc_rx_lock;
976
977 /*
978    Allocate and initialize new qdisc.
979
980    Parameters are passed via opt.
981  */
982
983 static struct Qdisc *qdisc_create(struct net_device *dev,
984                                   struct netdev_queue *dev_queue,
985                                   struct Qdisc *p, u32 parent, u32 handle,
986                                   struct nlattr **tca, int *errp)
987 {
988         int err;
989         struct nlattr *kind = tca[TCA_KIND];
990         struct Qdisc *sch;
991         struct Qdisc_ops *ops;
992         struct qdisc_size_table *stab;
993
994         ops = qdisc_lookup_ops(kind);
995 #ifdef CONFIG_MODULES
996         if (ops == NULL && kind != NULL) {
997                 char name[IFNAMSIZ];
998                 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
999                         /* We dropped the RTNL semaphore in order to
1000                          * perform the module load.  So, even if we
1001                          * succeeded in loading the module we have to
1002                          * tell the caller to replay the request.  We
1003                          * indicate this using -EAGAIN.
1004                          * We replay the request because the device may
1005                          * go away in the mean time.
1006                          */
1007                         rtnl_unlock();
1008                         request_module("sch_%s", name);
1009                         rtnl_lock();
1010                         ops = qdisc_lookup_ops(kind);
1011                         if (ops != NULL) {
1012                                 /* We will try again qdisc_lookup_ops,
1013                                  * so don't keep a reference.
1014                                  */
1015                                 module_put(ops->owner);
1016                                 err = -EAGAIN;
1017                                 goto err_out;
1018                         }
1019                 }
1020         }
1021 #endif
1022
1023         err = -ENOENT;
1024         if (ops == NULL)
1025                 goto err_out;
1026
1027         sch = qdisc_alloc(dev_queue, ops);
1028         if (IS_ERR(sch)) {
1029                 err = PTR_ERR(sch);
1030                 goto err_out2;
1031         }
1032
1033         sch->parent = parent;
1034
1035         if (handle == TC_H_INGRESS) {
1036                 sch->flags |= TCQ_F_INGRESS;
1037                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1038                 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
1039         } else {
1040                 if (handle == 0) {
1041                         handle = qdisc_alloc_handle(dev);
1042                         err = -ENOMEM;
1043                         if (handle == 0)
1044                                 goto err_out3;
1045                 }
1046                 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1047                 if (!netif_is_multiqueue(dev))
1048                         sch->flags |= TCQ_F_ONETXQUEUE;
1049         }
1050
1051         sch->handle = handle;
1052
1053         /* This exist to keep backward compatible with a userspace
1054          * loophole, what allowed userspace to get IFF_NO_QUEUE
1055          * facility on older kernels by setting tx_queue_len=0 (prior
1056          * to qdisc init), and then forgot to reinit tx_queue_len
1057          * before again attaching a qdisc.
1058          */
1059         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1060                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1061                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1062         }
1063
1064         if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
1065                 if (qdisc_is_percpu_stats(sch)) {
1066                         sch->cpu_bstats =
1067                                 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1068                         if (!sch->cpu_bstats)
1069                                 goto err_out4;
1070
1071                         sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1072                         if (!sch->cpu_qstats)
1073                                 goto err_out4;
1074                 }
1075
1076                 if (tca[TCA_STAB]) {
1077                         stab = qdisc_get_stab(tca[TCA_STAB]);
1078                         if (IS_ERR(stab)) {
1079                                 err = PTR_ERR(stab);
1080                                 goto err_out4;
1081                         }
1082                         rcu_assign_pointer(sch->stab, stab);
1083                 }
1084                 if (tca[TCA_RATE]) {
1085                         seqcount_t *running;
1086
1087                         err = -EOPNOTSUPP;
1088                         if (sch->flags & TCQ_F_MQROOT)
1089                                 goto err_out4;
1090
1091                         if ((sch->parent != TC_H_ROOT) &&
1092                             !(sch->flags & TCQ_F_INGRESS) &&
1093                             (!p || !(p->flags & TCQ_F_MQROOT)))
1094                                 running = qdisc_root_sleeping_running(sch);
1095                         else
1096                                 running = &sch->running;
1097
1098                         err = gen_new_estimator(&sch->bstats,
1099                                                 sch->cpu_bstats,
1100                                                 &sch->rate_est,
1101                                                 NULL,
1102                                                 running,
1103                                                 tca[TCA_RATE]);
1104                         if (err)
1105                                 goto err_out4;
1106                 }
1107
1108                 qdisc_hash_add(sch, false);
1109
1110                 return sch;
1111         }
1112         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1113         if (ops->destroy)
1114                 ops->destroy(sch);
1115 err_out3:
1116         dev_put(dev);
1117         kfree((char *) sch - sch->padded);
1118 err_out2:
1119         module_put(ops->owner);
1120 err_out:
1121         *errp = err;
1122         return NULL;
1123
1124 err_out4:
1125         free_percpu(sch->cpu_bstats);
1126         free_percpu(sch->cpu_qstats);
1127         /*
1128          * Any broken qdiscs that would require a ops->reset() here?
1129          * The qdisc was never in action so it shouldn't be necessary.
1130          */
1131         qdisc_put_stab(rtnl_dereference(sch->stab));
1132         if (ops->destroy)
1133                 ops->destroy(sch);
1134         goto err_out3;
1135 }
1136
1137 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1138 {
1139         struct qdisc_size_table *ostab, *stab = NULL;
1140         int err = 0;
1141
1142         if (tca[TCA_OPTIONS]) {
1143                 if (sch->ops->change == NULL)
1144                         return -EINVAL;
1145                 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1146                 if (err)
1147                         return err;
1148         }
1149
1150         if (tca[TCA_STAB]) {
1151                 stab = qdisc_get_stab(tca[TCA_STAB]);
1152                 if (IS_ERR(stab))
1153                         return PTR_ERR(stab);
1154         }
1155
1156         ostab = rtnl_dereference(sch->stab);
1157         rcu_assign_pointer(sch->stab, stab);
1158         qdisc_put_stab(ostab);
1159
1160         if (tca[TCA_RATE]) {
1161                 /* NB: ignores errors from replace_estimator
1162                    because change can't be undone. */
1163                 if (sch->flags & TCQ_F_MQROOT)
1164                         goto out;
1165                 gen_replace_estimator(&sch->bstats,
1166                                       sch->cpu_bstats,
1167                                       &sch->rate_est,
1168                                       NULL,
1169                                       qdisc_root_sleeping_running(sch),
1170                                       tca[TCA_RATE]);
1171         }
1172 out:
1173         return 0;
1174 }
1175
1176 struct check_loop_arg {
1177         struct qdisc_walker     w;
1178         struct Qdisc            *p;
1179         int                     depth;
1180 };
1181
1182 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1183                          struct qdisc_walker *w);
1184
1185 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1186 {
1187         struct check_loop_arg   arg;
1188
1189         if (q->ops->cl_ops == NULL)
1190                 return 0;
1191
1192         arg.w.stop = arg.w.skip = arg.w.count = 0;
1193         arg.w.fn = check_loop_fn;
1194         arg.depth = depth;
1195         arg.p = p;
1196         q->ops->cl_ops->walk(q, &arg.w);
1197         return arg.w.stop ? -ELOOP : 0;
1198 }
1199
1200 static int
1201 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1202 {
1203         struct Qdisc *leaf;
1204         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1205         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1206
1207         leaf = cops->leaf(q, cl);
1208         if (leaf) {
1209                 if (leaf == arg->p || arg->depth > 7)
1210                         return -ELOOP;
1211                 return check_loop(leaf, arg->p, arg->depth + 1);
1212         }
1213         return 0;
1214 }
1215
1216 /*
1217  * Delete/get qdisc.
1218  */
1219
1220 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1221         [TCA_KIND]              = { .type = NLA_NUL_STRING,
1222                                     .len = IFNAMSIZ - 1 },
1223         [TCA_RATE]              = { .type = NLA_BINARY,
1224                                     .len = sizeof(struct tc_estimator) },
1225         [TCA_STAB]              = { .type = NLA_NESTED },
1226         [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1227         [TCA_CHAIN]             = { .type = NLA_U32 },
1228 };
1229
1230 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1231                         struct netlink_ext_ack *extack)
1232 {
1233         struct net *net = sock_net(skb->sk);
1234         struct tcmsg *tcm = nlmsg_data(n);
1235         struct nlattr *tca[TCA_MAX + 1];
1236         struct net_device *dev;
1237         u32 clid;
1238         struct Qdisc *q = NULL;
1239         struct Qdisc *p = NULL;
1240         int err;
1241
1242         if ((n->nlmsg_type != RTM_GETQDISC) &&
1243             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1244                 return -EPERM;
1245
1246         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1247                           extack);
1248         if (err < 0)
1249                 return err;
1250
1251         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1252         if (!dev)
1253                 return -ENODEV;
1254
1255         clid = tcm->tcm_parent;
1256         if (clid) {
1257                 if (clid != TC_H_ROOT) {
1258                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1259                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1260                                 if (!p)
1261                                         return -ENOENT;
1262                                 q = qdisc_leaf(p, clid);
1263                         } else if (dev_ingress_queue(dev)) {
1264                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1265                         }
1266                 } else {
1267                         q = dev->qdisc;
1268                 }
1269                 if (!q)
1270                         return -ENOENT;
1271
1272                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1273                         return -EINVAL;
1274         } else {
1275                 q = qdisc_lookup(dev, tcm->tcm_handle);
1276                 if (!q)
1277                         return -ENOENT;
1278         }
1279
1280         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1281                 return -EINVAL;
1282
1283         if (n->nlmsg_type == RTM_DELQDISC) {
1284                 if (!clid)
1285                         return -EINVAL;
1286                 if (q->handle == 0)
1287                         return -ENOENT;
1288                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1289                 if (err != 0)
1290                         return err;
1291         } else {
1292                 qdisc_notify(net, skb, n, clid, NULL, q);
1293         }
1294         return 0;
1295 }
1296
1297 /*
1298  * Create/change qdisc.
1299  */
1300
1301 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1302                            struct netlink_ext_ack *extack)
1303 {
1304         struct net *net = sock_net(skb->sk);
1305         struct tcmsg *tcm;
1306         struct nlattr *tca[TCA_MAX + 1];
1307         struct net_device *dev;
1308         u32 clid;
1309         struct Qdisc *q, *p;
1310         int err;
1311
1312         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1313                 return -EPERM;
1314
1315 replay:
1316         /* Reinit, just in case something touches this. */
1317         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1318                           extack);
1319         if (err < 0)
1320                 return err;
1321
1322         tcm = nlmsg_data(n);
1323         clid = tcm->tcm_parent;
1324         q = p = NULL;
1325
1326         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1327         if (!dev)
1328                 return -ENODEV;
1329
1330
1331         if (clid) {
1332                 if (clid != TC_H_ROOT) {
1333                         if (clid != TC_H_INGRESS) {
1334                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1335                                 if (!p)
1336                                         return -ENOENT;
1337                                 q = qdisc_leaf(p, clid);
1338                         } else if (dev_ingress_queue_create(dev)) {
1339                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1340                         }
1341                 } else {
1342                         q = dev->qdisc;
1343                 }
1344
1345                 /* It may be default qdisc, ignore it */
1346                 if (q && q->handle == 0)
1347                         q = NULL;
1348
1349                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1350                         if (tcm->tcm_handle) {
1351                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1352                                         return -EEXIST;
1353                                 if (TC_H_MIN(tcm->tcm_handle))
1354                                         return -EINVAL;
1355                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1356                                 if (!q)
1357                                         goto create_n_graft;
1358                                 if (n->nlmsg_flags & NLM_F_EXCL)
1359                                         return -EEXIST;
1360                                 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1361                                         return -EINVAL;
1362                                 if (q == p ||
1363                                     (p && check_loop(q, p, 0)))
1364                                         return -ELOOP;
1365                                 qdisc_refcount_inc(q);
1366                                 goto graft;
1367                         } else {
1368                                 if (!q)
1369                                         goto create_n_graft;
1370
1371                                 /* This magic test requires explanation.
1372                                  *
1373                                  *   We know, that some child q is already
1374                                  *   attached to this parent and have choice:
1375                                  *   either to change it or to create/graft new one.
1376                                  *
1377                                  *   1. We are allowed to create/graft only
1378                                  *   if CREATE and REPLACE flags are set.
1379                                  *
1380                                  *   2. If EXCL is set, requestor wanted to say,
1381                                  *   that qdisc tcm_handle is not expected
1382                                  *   to exist, so that we choose create/graft too.
1383                                  *
1384                                  *   3. The last case is when no flags are set.
1385                                  *   Alas, it is sort of hole in API, we
1386                                  *   cannot decide what to do unambiguously.
1387                                  *   For now we select create/graft, if
1388                                  *   user gave KIND, which does not match existing.
1389                                  */
1390                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1391                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1392                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1393                                      (tca[TCA_KIND] &&
1394                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1395                                         goto create_n_graft;
1396                         }
1397                 }
1398         } else {
1399                 if (!tcm->tcm_handle)
1400                         return -EINVAL;
1401                 q = qdisc_lookup(dev, tcm->tcm_handle);
1402         }
1403
1404         /* Change qdisc parameters */
1405         if (q == NULL)
1406                 return -ENOENT;
1407         if (n->nlmsg_flags & NLM_F_EXCL)
1408                 return -EEXIST;
1409         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1410                 return -EINVAL;
1411         err = qdisc_change(q, tca);
1412         if (err == 0)
1413                 qdisc_notify(net, skb, n, clid, NULL, q);
1414         return err;
1415
1416 create_n_graft:
1417         if (!(n->nlmsg_flags & NLM_F_CREATE))
1418                 return -ENOENT;
1419         if (clid == TC_H_INGRESS) {
1420                 if (dev_ingress_queue(dev))
1421                         q = qdisc_create(dev, dev_ingress_queue(dev), p,
1422                                          tcm->tcm_parent, tcm->tcm_parent,
1423                                          tca, &err);
1424                 else
1425                         err = -ENOENT;
1426         } else {
1427                 struct netdev_queue *dev_queue;
1428
1429                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1430                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1431                 else if (p)
1432                         dev_queue = p->dev_queue;
1433                 else
1434                         dev_queue = netdev_get_tx_queue(dev, 0);
1435
1436                 q = qdisc_create(dev, dev_queue, p,
1437                                  tcm->tcm_parent, tcm->tcm_handle,
1438                                  tca, &err);
1439         }
1440         if (q == NULL) {
1441                 if (err == -EAGAIN)
1442                         goto replay;
1443                 return err;
1444         }
1445
1446 graft:
1447         err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1448         if (err) {
1449                 if (q)
1450                         qdisc_destroy(q);
1451                 return err;
1452         }
1453
1454         return 0;
1455 }
1456
1457 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1458                               struct netlink_callback *cb,
1459                               int *q_idx_p, int s_q_idx, bool recur,
1460                               bool dump_invisible)
1461 {
1462         int ret = 0, q_idx = *q_idx_p;
1463         struct Qdisc *q;
1464         int b;
1465
1466         if (!root)
1467                 return 0;
1468
1469         q = root;
1470         if (q_idx < s_q_idx) {
1471                 q_idx++;
1472         } else {
1473                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1474                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1475                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1476                                   RTM_NEWQDISC) <= 0)
1477                         goto done;
1478                 q_idx++;
1479         }
1480
1481         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1482          * itself has already been dumped.
1483          *
1484          * If we've already dumped the top-level (ingress) qdisc above and the global
1485          * qdisc hashtable, we don't want to hit it again
1486          */
1487         if (!qdisc_dev(root) || !recur)
1488                 goto out;
1489
1490         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1491                 if (q_idx < s_q_idx) {
1492                         q_idx++;
1493                         continue;
1494                 }
1495                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1496                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1497                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1498                                   RTM_NEWQDISC) <= 0)
1499                         goto done;
1500                 q_idx++;
1501         }
1502
1503 out:
1504         *q_idx_p = q_idx;
1505         return ret;
1506 done:
1507         ret = -1;
1508         goto out;
1509 }
1510
1511 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1512 {
1513         struct net *net = sock_net(skb->sk);
1514         int idx, q_idx;
1515         int s_idx, s_q_idx;
1516         struct net_device *dev;
1517         const struct nlmsghdr *nlh = cb->nlh;
1518         struct tcmsg *tcm = nlmsg_data(nlh);
1519         struct nlattr *tca[TCA_MAX + 1];
1520         int err;
1521
1522         s_idx = cb->args[0];
1523         s_q_idx = q_idx = cb->args[1];
1524
1525         idx = 0;
1526         ASSERT_RTNL();
1527
1528         err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX,
1529                           rtm_tca_policy, NULL);
1530         if (err < 0)
1531                 return err;
1532
1533         for_each_netdev(net, dev) {
1534                 struct netdev_queue *dev_queue;
1535
1536                 if (idx < s_idx)
1537                         goto cont;
1538                 if (idx > s_idx)
1539                         s_q_idx = 0;
1540                 q_idx = 0;
1541
1542                 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1543                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1544                         goto done;
1545
1546                 dev_queue = dev_ingress_queue(dev);
1547                 if (dev_queue &&
1548                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1549                                        &q_idx, s_q_idx, false,
1550                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1551                         goto done;
1552
1553 cont:
1554                 idx++;
1555         }
1556
1557 done:
1558         cb->args[0] = idx;
1559         cb->args[1] = q_idx;
1560
1561         return skb->len;
1562 }
1563
1564
1565
1566 /************************************************
1567  *      Traffic classes manipulation.           *
1568  ************************************************/
1569
1570 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1571                           unsigned long cl,
1572                           u32 portid, u32 seq, u16 flags, int event)
1573 {
1574         struct tcmsg *tcm;
1575         struct nlmsghdr  *nlh;
1576         unsigned char *b = skb_tail_pointer(skb);
1577         struct gnet_dump d;
1578         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1579
1580         cond_resched();
1581         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1582         if (!nlh)
1583                 goto out_nlmsg_trim;
1584         tcm = nlmsg_data(nlh);
1585         tcm->tcm_family = AF_UNSPEC;
1586         tcm->tcm__pad1 = 0;
1587         tcm->tcm__pad2 = 0;
1588         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1589         tcm->tcm_parent = q->handle;
1590         tcm->tcm_handle = q->handle;
1591         tcm->tcm_info = 0;
1592         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1593                 goto nla_put_failure;
1594         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1595                 goto nla_put_failure;
1596
1597         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1598                                          NULL, &d, TCA_PAD) < 0)
1599                 goto nla_put_failure;
1600
1601         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1602                 goto nla_put_failure;
1603
1604         if (gnet_stats_finish_copy(&d) < 0)
1605                 goto nla_put_failure;
1606
1607         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1608         return skb->len;
1609
1610 out_nlmsg_trim:
1611 nla_put_failure:
1612         nlmsg_trim(skb, b);
1613         return -1;
1614 }
1615
1616 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1617                          struct nlmsghdr *n, struct Qdisc *q,
1618                          unsigned long cl, int event)
1619 {
1620         struct sk_buff *skb;
1621         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1622
1623         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1624         if (!skb)
1625                 return -ENOBUFS;
1626
1627         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1628                 kfree_skb(skb);
1629                 return -EINVAL;
1630         }
1631
1632         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1633                               n->nlmsg_flags & NLM_F_ECHO);
1634 }
1635
1636 static int tclass_del_notify(struct net *net,
1637                              const struct Qdisc_class_ops *cops,
1638                              struct sk_buff *oskb, struct nlmsghdr *n,
1639                              struct Qdisc *q, unsigned long cl)
1640 {
1641         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1642         struct sk_buff *skb;
1643         int err = 0;
1644
1645         if (!cops->delete)
1646                 return -EOPNOTSUPP;
1647
1648         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1649         if (!skb)
1650                 return -ENOBUFS;
1651
1652         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1653                            RTM_DELTCLASS) < 0) {
1654                 kfree_skb(skb);
1655                 return -EINVAL;
1656         }
1657
1658         err = cops->delete(q, cl);
1659         if (err) {
1660                 kfree_skb(skb);
1661                 return err;
1662         }
1663
1664         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1665                               n->nlmsg_flags & NLM_F_ECHO);
1666 }
1667
1668 #ifdef CONFIG_NET_CLS
1669
1670 struct tcf_bind_args {
1671         struct tcf_walker w;
1672         u32 classid;
1673         unsigned long cl;
1674 };
1675
1676 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1677 {
1678         struct tcf_bind_args *a = (void *)arg;
1679
1680         if (tp->ops->bind_class) {
1681                 tcf_tree_lock(tp);
1682                 tp->ops->bind_class(n, a->classid, a->cl);
1683                 tcf_tree_unlock(tp);
1684         }
1685         return 0;
1686 }
1687
1688 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1689                            unsigned long new_cl)
1690 {
1691         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1692         struct tcf_block *block;
1693         struct tcf_chain *chain;
1694         unsigned long cl;
1695
1696         cl = cops->find(q, portid);
1697         if (!cl)
1698                 return;
1699         if (!cops->tcf_block)
1700                 return;
1701         block = cops->tcf_block(q, cl);
1702         if (!block)
1703                 return;
1704         list_for_each_entry(chain, &block->chain_list, list) {
1705                 struct tcf_proto *tp;
1706
1707                 for (tp = rtnl_dereference(chain->filter_chain);
1708                      tp; tp = rtnl_dereference(tp->next)) {
1709                         struct tcf_bind_args arg = {};
1710
1711                         arg.w.fn = tcf_node_bind;
1712                         arg.classid = clid;
1713                         arg.cl = new_cl;
1714                         tp->ops->walk(tp, &arg.w);
1715                 }
1716         }
1717 }
1718
1719 #else
1720
1721 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1722                            unsigned long new_cl)
1723 {
1724 }
1725
1726 #endif
1727
1728 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1729                          struct netlink_ext_ack *extack)
1730 {
1731         struct net *net = sock_net(skb->sk);
1732         struct tcmsg *tcm = nlmsg_data(n);
1733         struct nlattr *tca[TCA_MAX + 1];
1734         struct net_device *dev;
1735         struct Qdisc *q = NULL;
1736         const struct Qdisc_class_ops *cops;
1737         unsigned long cl = 0;
1738         unsigned long new_cl;
1739         u32 portid;
1740         u32 clid;
1741         u32 qid;
1742         int err;
1743
1744         if ((n->nlmsg_type != RTM_GETTCLASS) &&
1745             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1746                 return -EPERM;
1747
1748         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1749                           extack);
1750         if (err < 0)
1751                 return err;
1752
1753         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1754         if (!dev)
1755                 return -ENODEV;
1756
1757         /*
1758            parent == TC_H_UNSPEC - unspecified parent.
1759            parent == TC_H_ROOT   - class is root, which has no parent.
1760            parent == X:0         - parent is root class.
1761            parent == X:Y         - parent is a node in hierarchy.
1762            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
1763
1764            handle == 0:0         - generate handle from kernel pool.
1765            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
1766            handle == X:Y         - clear.
1767            handle == X:0         - root class.
1768          */
1769
1770         /* Step 1. Determine qdisc handle X:0 */
1771
1772         portid = tcm->tcm_parent;
1773         clid = tcm->tcm_handle;
1774         qid = TC_H_MAJ(clid);
1775
1776         if (portid != TC_H_ROOT) {
1777                 u32 qid1 = TC_H_MAJ(portid);
1778
1779                 if (qid && qid1) {
1780                         /* If both majors are known, they must be identical. */
1781                         if (qid != qid1)
1782                                 return -EINVAL;
1783                 } else if (qid1) {
1784                         qid = qid1;
1785                 } else if (qid == 0)
1786                         qid = dev->qdisc->handle;
1787
1788                 /* Now qid is genuine qdisc handle consistent
1789                  * both with parent and child.
1790                  *
1791                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
1792                  */
1793                 if (portid)
1794                         portid = TC_H_MAKE(qid, portid);
1795         } else {
1796                 if (qid == 0)
1797                         qid = dev->qdisc->handle;
1798         }
1799
1800         /* OK. Locate qdisc */
1801         q = qdisc_lookup(dev, qid);
1802         if (!q)
1803                 return -ENOENT;
1804
1805         /* An check that it supports classes */
1806         cops = q->ops->cl_ops;
1807         if (cops == NULL)
1808                 return -EINVAL;
1809
1810         /* Now try to get class */
1811         if (clid == 0) {
1812                 if (portid == TC_H_ROOT)
1813                         clid = qid;
1814         } else
1815                 clid = TC_H_MAKE(qid, clid);
1816
1817         if (clid)
1818                 cl = cops->find(q, clid);
1819
1820         if (cl == 0) {
1821                 err = -ENOENT;
1822                 if (n->nlmsg_type != RTM_NEWTCLASS ||
1823                     !(n->nlmsg_flags & NLM_F_CREATE))
1824                         goto out;
1825         } else {
1826                 switch (n->nlmsg_type) {
1827                 case RTM_NEWTCLASS:
1828                         err = -EEXIST;
1829                         if (n->nlmsg_flags & NLM_F_EXCL)
1830                                 goto out;
1831                         break;
1832                 case RTM_DELTCLASS:
1833                         err = tclass_del_notify(net, cops, skb, n, q, cl);
1834                         /* Unbind the class with flilters with 0 */
1835                         tc_bind_tclass(q, portid, clid, 0);
1836                         goto out;
1837                 case RTM_GETTCLASS:
1838                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1839                         goto out;
1840                 default:
1841                         err = -EINVAL;
1842                         goto out;
1843                 }
1844         }
1845
1846         new_cl = cl;
1847         err = -EOPNOTSUPP;
1848         if (cops->change)
1849                 err = cops->change(q, clid, portid, tca, &new_cl);
1850         if (err == 0) {
1851                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1852                 /* We just create a new class, need to do reverse binding. */
1853                 if (cl != new_cl)
1854                         tc_bind_tclass(q, portid, clid, new_cl);
1855         }
1856 out:
1857         return err;
1858 }
1859
1860 struct qdisc_dump_args {
1861         struct qdisc_walker     w;
1862         struct sk_buff          *skb;
1863         struct netlink_callback *cb;
1864 };
1865
1866 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1867                             struct qdisc_walker *arg)
1868 {
1869         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1870
1871         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1872                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1873                               RTM_NEWTCLASS);
1874 }
1875
1876 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1877                                 struct tcmsg *tcm, struct netlink_callback *cb,
1878                                 int *t_p, int s_t)
1879 {
1880         struct qdisc_dump_args arg;
1881
1882         if (tc_qdisc_dump_ignore(q, false) ||
1883             *t_p < s_t || !q->ops->cl_ops ||
1884             (tcm->tcm_parent &&
1885              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1886                 (*t_p)++;
1887                 return 0;
1888         }
1889         if (*t_p > s_t)
1890                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1891         arg.w.fn = qdisc_class_dump;
1892         arg.skb = skb;
1893         arg.cb = cb;
1894         arg.w.stop  = 0;
1895         arg.w.skip = cb->args[1];
1896         arg.w.count = 0;
1897         q->ops->cl_ops->walk(q, &arg.w);
1898         cb->args[1] = arg.w.count;
1899         if (arg.w.stop)
1900                 return -1;
1901         (*t_p)++;
1902         return 0;
1903 }
1904
1905 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1906                                struct tcmsg *tcm, struct netlink_callback *cb,
1907                                int *t_p, int s_t, bool recur)
1908 {
1909         struct Qdisc *q;
1910         int b;
1911
1912         if (!root)
1913                 return 0;
1914
1915         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1916                 return -1;
1917
1918         if (!qdisc_dev(root) || !recur)
1919                 return 0;
1920
1921         if (tcm->tcm_parent) {
1922                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1923                 if (q && q != root &&
1924                     tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1925                         return -1;
1926                 return 0;
1927         }
1928         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1929                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1930                         return -1;
1931         }
1932
1933         return 0;
1934 }
1935
1936 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1937 {
1938         struct tcmsg *tcm = nlmsg_data(cb->nlh);
1939         struct net *net = sock_net(skb->sk);
1940         struct netdev_queue *dev_queue;
1941         struct net_device *dev;
1942         int t, s_t;
1943
1944         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1945                 return 0;
1946         dev = dev_get_by_index(net, tcm->tcm_ifindex);
1947         if (!dev)
1948                 return 0;
1949
1950         s_t = cb->args[0];
1951         t = 0;
1952
1953         if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
1954                 goto done;
1955
1956         dev_queue = dev_ingress_queue(dev);
1957         if (dev_queue &&
1958             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1959                                 &t, s_t, false) < 0)
1960                 goto done;
1961
1962 done:
1963         cb->args[0] = t;
1964
1965         dev_put(dev);
1966         return skb->len;
1967 }
1968
1969 #ifdef CONFIG_PROC_FS
1970 static int psched_show(struct seq_file *seq, void *v)
1971 {
1972         seq_printf(seq, "%08x %08x %08x %08x\n",
1973                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1974                    1000000,
1975                    (u32)NSEC_PER_SEC / hrtimer_resolution);
1976
1977         return 0;
1978 }
1979
1980 static int psched_open(struct inode *inode, struct file *file)
1981 {
1982         return single_open(file, psched_show, NULL);
1983 }
1984
1985 static const struct file_operations psched_fops = {
1986         .owner = THIS_MODULE,
1987         .open = psched_open,
1988         .read  = seq_read,
1989         .llseek = seq_lseek,
1990         .release = single_release,
1991 };
1992
1993 static int __net_init psched_net_init(struct net *net)
1994 {
1995         struct proc_dir_entry *e;
1996
1997         e = proc_create("psched", 0, net->proc_net, &psched_fops);
1998         if (e == NULL)
1999                 return -ENOMEM;
2000
2001         return 0;
2002 }
2003
2004 static void __net_exit psched_net_exit(struct net *net)
2005 {
2006         remove_proc_entry("psched", net->proc_net);
2007 }
2008 #else
2009 static int __net_init psched_net_init(struct net *net)
2010 {
2011         return 0;
2012 }
2013
2014 static void __net_exit psched_net_exit(struct net *net)
2015 {
2016 }
2017 #endif
2018
2019 static struct pernet_operations psched_net_ops = {
2020         .init = psched_net_init,
2021         .exit = psched_net_exit,
2022 };
2023
2024 static int __init pktsched_init(void)
2025 {
2026         int err;
2027
2028         err = register_pernet_subsys(&psched_net_ops);
2029         if (err) {
2030                 pr_err("pktsched_init: "
2031                        "cannot initialize per netns operations\n");
2032                 return err;
2033         }
2034
2035         register_qdisc(&pfifo_fast_ops);
2036         register_qdisc(&pfifo_qdisc_ops);
2037         register_qdisc(&bfifo_qdisc_ops);
2038         register_qdisc(&pfifo_head_drop_qdisc_ops);
2039         register_qdisc(&mq_qdisc_ops);
2040         register_qdisc(&noqueue_qdisc_ops);
2041
2042         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2043         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2044         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2045                       0);
2046         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2047         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2048         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2049                       0);
2050
2051         return 0;
2052 }
2053
2054 subsys_initcall(pktsched_init);