2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
34 #include <net/net_namespace.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 #include <net/pkt_cls.h>
45 This file consists of two interrelated parts:
47 1. queueing disciplines manager frontend.
48 2. traffic classes manager frontend.
50 Generally, queueing discipline ("qdisc") is a black box,
51 which is able to enqueue packets and to dequeue them (when
52 device is ready to send something) in order and at times
53 determined by algorithm hidden in it.
55 qdisc's are divided to two categories:
56 - "queues", which have no internal structure visible from outside.
57 - "schedulers", which split all the packets to "traffic classes",
58 using "packet classifiers" (look at cls_api.c)
60 In turn, classes may have child qdiscs (as rule, queues)
61 attached to them etc. etc. etc.
63 The goal of the routines in this file is to translate
64 information supplied by user in the form of handles
65 to more intelligible for kernel form, to make some sanity
66 checks and part of work, which is common to all qdiscs
67 and to provide rtnetlink notifications.
69 All real intelligent work is done inside qdisc modules.
73 Every discipline has two major routines: enqueue and dequeue.
77 dequeue usually returns a skb to send. It is allowed to return NULL,
78 but it does not mean that queue is empty, it just means that
79 discipline does not want to send anything this time.
80 Queue is really empty if q->q.qlen == 0.
81 For complicated disciplines with multiple queues q->q is not
82 real packet queue, but however q->q.qlen must be valid.
86 enqueue returns 0, if packet was enqueued successfully.
87 If packet (this one or another one) was dropped, it returns
89 NET_XMIT_DROP - this packet dropped
90 Expected action: do not backoff, but wait until queue will clear.
91 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
92 Expected action: backoff or ignore
98 like dequeue but without removing a packet from the queue
102 returns qdisc to initial state: purge all buffers, clear all
103 timers, counters (except for statistics) etc.
107 initializes newly created qdisc.
111 destroys resources allocated by init and during lifetime of qdisc.
115 changes qdisc parameters.
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
122 /************************************************
123 * Queueing disciplines manipulation. *
124 ************************************************/
127 /* The list of all installed queueing disciplines. */
129 static struct Qdisc_ops *qdisc_base;
131 /* Register/unregister queueing discipline */
133 int register_qdisc(struct Qdisc_ops *qops)
135 struct Qdisc_ops *q, **qp;
138 write_lock(&qdisc_mod_lock);
139 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 if (!strcmp(qops->id, q->id))
143 if (qops->enqueue == NULL)
144 qops->enqueue = noop_qdisc_ops.enqueue;
145 if (qops->peek == NULL) {
146 if (qops->dequeue == NULL)
147 qops->peek = noop_qdisc_ops.peek;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
155 const struct Qdisc_class_ops *cops = qops->cl_ops;
157 if (!(cops->find && cops->walk && cops->leaf))
160 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
168 write_unlock(&qdisc_mod_lock);
175 EXPORT_SYMBOL(register_qdisc);
177 int unregister_qdisc(struct Qdisc_ops *qops)
179 struct Qdisc_ops *q, **qp;
182 write_lock(&qdisc_mod_lock);
183 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
191 write_unlock(&qdisc_mod_lock);
194 EXPORT_SYMBOL(unregister_qdisc);
196 /* Get default qdisc if not otherwise specified */
197 void qdisc_get_default(char *name, size_t len)
199 read_lock(&qdisc_mod_lock);
200 strlcpy(name, default_qdisc_ops->id, len);
201 read_unlock(&qdisc_mod_lock);
204 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
206 struct Qdisc_ops *q = NULL;
208 for (q = qdisc_base; q; q = q->next) {
209 if (!strcmp(name, q->id)) {
210 if (!try_module_get(q->owner))
219 /* Set new default qdisc to use */
220 int qdisc_set_default(const char *name)
222 const struct Qdisc_ops *ops;
224 if (!capable(CAP_NET_ADMIN))
227 write_lock(&qdisc_mod_lock);
228 ops = qdisc_lookup_default(name);
230 /* Not found, drop lock and try to load module */
231 write_unlock(&qdisc_mod_lock);
232 request_module("sch_%s", name);
233 write_lock(&qdisc_mod_lock);
235 ops = qdisc_lookup_default(name);
239 /* Set new default */
240 module_put(default_qdisc_ops->owner);
241 default_qdisc_ops = ops;
243 write_unlock(&qdisc_mod_lock);
245 return ops ? 0 : -ENOENT;
248 #ifdef CONFIG_NET_SCH_DEFAULT
249 /* Set default value from kernel config */
250 static int __init sch_default_qdisc(void)
252 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
254 late_initcall(sch_default_qdisc);
257 /* We know handle. Find qdisc among all qdisc's attached to device
258 * (root qdisc, all its children, children of children etc.)
259 * Note: caller either uses rtnl or rcu_read_lock()
262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
266 if (!qdisc_dev(root))
267 return (root->handle == handle ? root : NULL);
269 if (!(root->flags & TCQ_F_BUILTIN) &&
270 root->handle == handle)
273 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
274 if (q->handle == handle)
280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
284 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
286 q->flags |= TCQ_F_INVISIBLE;
289 EXPORT_SYMBOL(qdisc_hash_add);
291 void qdisc_hash_del(struct Qdisc *q)
293 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
295 hash_del_rcu(&q->hash);
298 EXPORT_SYMBOL(qdisc_hash_del);
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
306 q = qdisc_match_from_root(dev->qdisc, handle);
310 if (dev_ingress_queue(dev))
311 q = qdisc_match_from_root(
312 dev_ingress_queue(dev)->qdisc_sleeping,
318 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
322 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
326 cl = cops->find(p, classid);
330 leaf = cops->leaf(p, cl);
334 /* Find queueing discipline by name */
336 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
338 struct Qdisc_ops *q = NULL;
341 read_lock(&qdisc_mod_lock);
342 for (q = qdisc_base; q; q = q->next) {
343 if (nla_strcmp(kind, q->id) == 0) {
344 if (!try_module_get(q->owner))
349 read_unlock(&qdisc_mod_lock);
354 /* The linklayer setting were not transferred from iproute2, in older
355 * versions, and the rate tables lookup systems have been dropped in
356 * the kernel. To keep backward compatible with older iproute2 tc
357 * utils, we detect the linklayer setting by detecting if the rate
358 * table were modified.
360 * For linklayer ATM table entries, the rate table will be aligned to
361 * 48 bytes, thus some table entries will contain the same value. The
362 * mpu (min packet unit) is also encoded into the old rate table, thus
363 * starting from the mpu, we find low and high table entries for
364 * mapping this cell. If these entries contain the same value, when
365 * the rate tables have been modified for linklayer ATM.
367 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
368 * and then roundup to the next cell, calc the table entry one below,
371 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
373 int low = roundup(r->mpu, 48);
374 int high = roundup(low+1, 48);
375 int cell_low = low >> r->cell_log;
376 int cell_high = (high >> r->cell_log) - 1;
378 /* rtab is too inaccurate at rates > 100Mbit/s */
379 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
380 pr_debug("TC linklayer: Giving up ATM detection\n");
381 return TC_LINKLAYER_ETHERNET;
384 if ((cell_high > cell_low) && (cell_high < 256)
385 && (rtab[cell_low] == rtab[cell_high])) {
386 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
387 cell_low, cell_high, rtab[cell_high]);
388 return TC_LINKLAYER_ATM;
390 return TC_LINKLAYER_ETHERNET;
393 static struct qdisc_rate_table *qdisc_rtab_list;
395 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
398 struct qdisc_rate_table *rtab;
400 if (tab == NULL || r->rate == 0 ||
401 r->cell_log == 0 || r->cell_log >= 32 ||
402 nla_len(tab) != TC_RTAB_SIZE)
405 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
406 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
407 !memcmp(&rtab->data, nla_data(tab), 1024)) {
413 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
417 memcpy(rtab->data, nla_data(tab), 1024);
418 if (r->linklayer == TC_LINKLAYER_UNAWARE)
419 r->linklayer = __detect_linklayer(r, rtab->data);
420 rtab->next = qdisc_rtab_list;
421 qdisc_rtab_list = rtab;
425 EXPORT_SYMBOL(qdisc_get_rtab);
427 void qdisc_put_rtab(struct qdisc_rate_table *tab)
429 struct qdisc_rate_table *rtab, **rtabp;
431 if (!tab || --tab->refcnt)
434 for (rtabp = &qdisc_rtab_list;
435 (rtab = *rtabp) != NULL;
436 rtabp = &rtab->next) {
444 EXPORT_SYMBOL(qdisc_put_rtab);
446 static LIST_HEAD(qdisc_stab_list);
448 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
449 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
450 [TCA_STAB_DATA] = { .type = NLA_BINARY },
453 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
455 struct nlattr *tb[TCA_STAB_MAX + 1];
456 struct qdisc_size_table *stab;
457 struct tc_sizespec *s;
458 unsigned int tsize = 0;
462 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
465 if (!tb[TCA_STAB_BASE])
466 return ERR_PTR(-EINVAL);
468 s = nla_data(tb[TCA_STAB_BASE]);
471 if (!tb[TCA_STAB_DATA])
472 return ERR_PTR(-EINVAL);
473 tab = nla_data(tb[TCA_STAB_DATA]);
474 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
477 if (tsize != s->tsize || (!tab && tsize > 0))
478 return ERR_PTR(-EINVAL);
480 list_for_each_entry(stab, &qdisc_stab_list, list) {
481 if (memcmp(&stab->szopts, s, sizeof(*s)))
483 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
489 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
491 return ERR_PTR(-ENOMEM);
496 memcpy(stab->data, tab, tsize * sizeof(u16));
498 list_add_tail(&stab->list, &qdisc_stab_list);
503 static void stab_kfree_rcu(struct rcu_head *head)
505 kfree(container_of(head, struct qdisc_size_table, rcu));
508 void qdisc_put_stab(struct qdisc_size_table *tab)
513 if (--tab->refcnt == 0) {
514 list_del(&tab->list);
515 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
518 EXPORT_SYMBOL(qdisc_put_stab);
520 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
524 nest = nla_nest_start(skb, TCA_STAB);
526 goto nla_put_failure;
527 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
528 goto nla_put_failure;
529 nla_nest_end(skb, nest);
537 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
538 const struct qdisc_size_table *stab)
542 pkt_len = skb->len + stab->szopts.overhead;
543 if (unlikely(!stab->szopts.tsize))
546 slot = pkt_len + stab->szopts.cell_align;
547 if (unlikely(slot < 0))
550 slot >>= stab->szopts.cell_log;
551 if (likely(slot < stab->szopts.tsize))
552 pkt_len = stab->data[slot];
554 pkt_len = stab->data[stab->szopts.tsize - 1] *
555 (slot / stab->szopts.tsize) +
556 stab->data[slot % stab->szopts.tsize];
558 pkt_len <<= stab->szopts.size_log;
560 if (unlikely(pkt_len < 1))
562 qdisc_skb_cb(skb)->pkt_len = pkt_len;
564 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
566 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
568 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
569 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
570 txt, qdisc->ops->id, qdisc->handle >> 16);
571 qdisc->flags |= TCQ_F_WARN_NONWC;
574 EXPORT_SYMBOL(qdisc_warn_nonwc);
576 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
578 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
582 __netif_schedule(qdisc_root(wd->qdisc));
585 return HRTIMER_NORESTART;
588 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
590 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
591 wd->timer.function = qdisc_watchdog;
594 EXPORT_SYMBOL(qdisc_watchdog_init);
596 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
598 if (test_bit(__QDISC_STATE_DEACTIVATED,
599 &qdisc_root_sleeping(wd->qdisc)->state))
602 if (wd->last_expires == expires)
605 wd->last_expires = expires;
606 hrtimer_start(&wd->timer,
607 ns_to_ktime(expires),
608 HRTIMER_MODE_ABS_PINNED);
610 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
612 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
614 hrtimer_cancel(&wd->timer);
616 EXPORT_SYMBOL(qdisc_watchdog_cancel);
618 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
620 struct hlist_head *h;
623 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
626 for (i = 0; i < n; i++)
627 INIT_HLIST_HEAD(&h[i]);
632 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
634 struct Qdisc_class_common *cl;
635 struct hlist_node *next;
636 struct hlist_head *nhash, *ohash;
637 unsigned int nsize, nmask, osize;
640 /* Rehash when load factor exceeds 0.75 */
641 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
643 nsize = clhash->hashsize * 2;
645 nhash = qdisc_class_hash_alloc(nsize);
649 ohash = clhash->hash;
650 osize = clhash->hashsize;
653 for (i = 0; i < osize; i++) {
654 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
655 h = qdisc_class_hash(cl->classid, nmask);
656 hlist_add_head(&cl->hnode, &nhash[h]);
659 clhash->hash = nhash;
660 clhash->hashsize = nsize;
661 clhash->hashmask = nmask;
662 sch_tree_unlock(sch);
666 EXPORT_SYMBOL(qdisc_class_hash_grow);
668 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
670 unsigned int size = 4;
672 clhash->hash = qdisc_class_hash_alloc(size);
673 if (clhash->hash == NULL)
675 clhash->hashsize = size;
676 clhash->hashmask = size - 1;
677 clhash->hashelems = 0;
680 EXPORT_SYMBOL(qdisc_class_hash_init);
682 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
684 kvfree(clhash->hash);
686 EXPORT_SYMBOL(qdisc_class_hash_destroy);
688 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
689 struct Qdisc_class_common *cl)
693 INIT_HLIST_NODE(&cl->hnode);
694 h = qdisc_class_hash(cl->classid, clhash->hashmask);
695 hlist_add_head(&cl->hnode, &clhash->hash[h]);
698 EXPORT_SYMBOL(qdisc_class_hash_insert);
700 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
701 struct Qdisc_class_common *cl)
703 hlist_del(&cl->hnode);
706 EXPORT_SYMBOL(qdisc_class_hash_remove);
708 /* Allocate an unique handle from space managed by kernel
709 * Possible range is [8000-FFFF]:0000 (0x8000 values)
711 static u32 qdisc_alloc_handle(struct net_device *dev)
714 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
717 autohandle += TC_H_MAKE(0x10000U, 0);
718 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
719 autohandle = TC_H_MAKE(0x80000000U, 0);
720 if (!qdisc_lookup(dev, autohandle))
728 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
731 const struct Qdisc_class_ops *cops;
737 if (n == 0 && len == 0)
739 drops = max_t(int, n, 0);
741 while ((parentid = sch->parent)) {
742 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
745 if (sch->flags & TCQ_F_NOPARENT)
747 /* Notify parent qdisc only if child qdisc becomes empty.
749 * If child was empty even before update then backlog
750 * counter is screwed and we skip notification because
751 * parent class is already passive.
753 notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
754 /* TODO: perform the search on a per txq basis */
755 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
757 WARN_ON_ONCE(parentid != TC_H_ROOT);
760 cops = sch->ops->cl_ops;
761 if (notify && cops->qlen_notify) {
762 cl = cops->find(sch, parentid);
763 cops->qlen_notify(sch, cl);
766 sch->qstats.backlog -= len;
767 __qdisc_qstats_drop(sch, drops);
771 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
773 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
774 u32 portid, u32 seq, u16 flags, int event)
776 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
777 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
779 struct nlmsghdr *nlh;
780 unsigned char *b = skb_tail_pointer(skb);
782 struct qdisc_size_table *stab;
786 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
789 tcm = nlmsg_data(nlh);
790 tcm->tcm_family = AF_UNSPEC;
793 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
794 tcm->tcm_parent = clid;
795 tcm->tcm_handle = q->handle;
796 tcm->tcm_info = refcount_read(&q->refcnt);
797 if (nla_put_string(skb, TCA_KIND, q->ops->id))
798 goto nla_put_failure;
799 if (q->ops->dump && q->ops->dump(q, skb) < 0)
800 goto nla_put_failure;
803 stab = rtnl_dereference(q->stab);
804 if (stab && qdisc_dump_stab(skb, stab) < 0)
805 goto nla_put_failure;
807 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
808 NULL, &d, TCA_PAD) < 0)
809 goto nla_put_failure;
811 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
812 goto nla_put_failure;
814 if (qdisc_is_percpu_stats(q)) {
815 cpu_bstats = q->cpu_bstats;
816 cpu_qstats = q->cpu_qstats;
819 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
820 &d, cpu_bstats, &q->bstats) < 0 ||
821 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
822 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
823 goto nla_put_failure;
825 if (gnet_stats_finish_copy(&d) < 0)
826 goto nla_put_failure;
828 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
837 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
839 if (q->flags & TCQ_F_BUILTIN)
841 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
847 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
848 struct nlmsghdr *n, u32 clid,
849 struct Qdisc *old, struct Qdisc *new)
852 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
854 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
858 if (old && !tc_qdisc_dump_ignore(old, false)) {
859 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
860 0, RTM_DELQDISC) < 0)
863 if (new && !tc_qdisc_dump_ignore(new, false)) {
864 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
865 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
870 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
871 n->nlmsg_flags & NLM_F_ECHO);
878 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
879 struct nlmsghdr *n, u32 clid,
880 struct Qdisc *old, struct Qdisc *new)
883 qdisc_notify(net, skb, n, clid, old, new);
889 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
892 * When appropriate send a netlink notification using 'skb'
895 * On success, destroy old qdisc.
898 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
899 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
900 struct Qdisc *new, struct Qdisc *old)
902 struct Qdisc *q = old;
903 struct net *net = dev_net(dev);
906 if (parent == NULL) {
907 unsigned int i, num_q, ingress;
910 num_q = dev->num_tx_queues;
911 if ((q && q->flags & TCQ_F_INGRESS) ||
912 (new && new->flags & TCQ_F_INGRESS)) {
915 if (!dev_ingress_queue(dev))
919 if (dev->flags & IFF_UP)
922 if (new && new->ops->attach)
925 for (i = 0; i < num_q; i++) {
926 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
929 dev_queue = netdev_get_tx_queue(dev, i);
931 old = dev_graft_qdisc(dev_queue, new);
933 qdisc_refcount_inc(new);
941 notify_and_destroy(net, skb, n, classid,
943 if (new && !new->ops->attach)
944 qdisc_refcount_inc(new);
945 dev->qdisc = new ? : &noop_qdisc;
947 if (new && new->ops->attach)
948 new->ops->attach(new);
950 notify_and_destroy(net, skb, n, classid, old, new);
953 if (dev->flags & IFF_UP)
956 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
959 if (cops && cops->graft) {
960 unsigned long cl = cops->find(parent, classid);
963 err = cops->graft(parent, cl, new, &old);
968 notify_and_destroy(net, skb, n, classid, old, new);
973 /* lockdep annotation is needed for ingress; egress gets it only for name */
974 static struct lock_class_key qdisc_tx_lock;
975 static struct lock_class_key qdisc_rx_lock;
978 Allocate and initialize new qdisc.
980 Parameters are passed via opt.
983 static struct Qdisc *qdisc_create(struct net_device *dev,
984 struct netdev_queue *dev_queue,
985 struct Qdisc *p, u32 parent, u32 handle,
986 struct nlattr **tca, int *errp)
989 struct nlattr *kind = tca[TCA_KIND];
991 struct Qdisc_ops *ops;
992 struct qdisc_size_table *stab;
994 ops = qdisc_lookup_ops(kind);
995 #ifdef CONFIG_MODULES
996 if (ops == NULL && kind != NULL) {
998 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
999 /* We dropped the RTNL semaphore in order to
1000 * perform the module load. So, even if we
1001 * succeeded in loading the module we have to
1002 * tell the caller to replay the request. We
1003 * indicate this using -EAGAIN.
1004 * We replay the request because the device may
1005 * go away in the mean time.
1008 request_module("sch_%s", name);
1010 ops = qdisc_lookup_ops(kind);
1012 /* We will try again qdisc_lookup_ops,
1013 * so don't keep a reference.
1015 module_put(ops->owner);
1027 sch = qdisc_alloc(dev_queue, ops);
1033 sch->parent = parent;
1035 if (handle == TC_H_INGRESS) {
1036 sch->flags |= TCQ_F_INGRESS;
1037 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1038 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
1041 handle = qdisc_alloc_handle(dev);
1046 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1047 if (!netif_is_multiqueue(dev))
1048 sch->flags |= TCQ_F_ONETXQUEUE;
1051 sch->handle = handle;
1053 /* This exist to keep backward compatible with a userspace
1054 * loophole, what allowed userspace to get IFF_NO_QUEUE
1055 * facility on older kernels by setting tx_queue_len=0 (prior
1056 * to qdisc init), and then forgot to reinit tx_queue_len
1057 * before again attaching a qdisc.
1059 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1060 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1061 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1064 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
1065 if (qdisc_is_percpu_stats(sch)) {
1067 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1068 if (!sch->cpu_bstats)
1071 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1072 if (!sch->cpu_qstats)
1076 if (tca[TCA_STAB]) {
1077 stab = qdisc_get_stab(tca[TCA_STAB]);
1079 err = PTR_ERR(stab);
1082 rcu_assign_pointer(sch->stab, stab);
1084 if (tca[TCA_RATE]) {
1085 seqcount_t *running;
1088 if (sch->flags & TCQ_F_MQROOT)
1091 if ((sch->parent != TC_H_ROOT) &&
1092 !(sch->flags & TCQ_F_INGRESS) &&
1093 (!p || !(p->flags & TCQ_F_MQROOT)))
1094 running = qdisc_root_sleeping_running(sch);
1096 running = &sch->running;
1098 err = gen_new_estimator(&sch->bstats,
1108 qdisc_hash_add(sch, false);
1112 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1117 kfree((char *) sch - sch->padded);
1119 module_put(ops->owner);
1125 free_percpu(sch->cpu_bstats);
1126 free_percpu(sch->cpu_qstats);
1128 * Any broken qdiscs that would require a ops->reset() here?
1129 * The qdisc was never in action so it shouldn't be necessary.
1131 qdisc_put_stab(rtnl_dereference(sch->stab));
1137 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1139 struct qdisc_size_table *ostab, *stab = NULL;
1142 if (tca[TCA_OPTIONS]) {
1143 if (sch->ops->change == NULL)
1145 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1150 if (tca[TCA_STAB]) {
1151 stab = qdisc_get_stab(tca[TCA_STAB]);
1153 return PTR_ERR(stab);
1156 ostab = rtnl_dereference(sch->stab);
1157 rcu_assign_pointer(sch->stab, stab);
1158 qdisc_put_stab(ostab);
1160 if (tca[TCA_RATE]) {
1161 /* NB: ignores errors from replace_estimator
1162 because change can't be undone. */
1163 if (sch->flags & TCQ_F_MQROOT)
1165 gen_replace_estimator(&sch->bstats,
1169 qdisc_root_sleeping_running(sch),
1176 struct check_loop_arg {
1177 struct qdisc_walker w;
1182 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1183 struct qdisc_walker *w);
1185 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1187 struct check_loop_arg arg;
1189 if (q->ops->cl_ops == NULL)
1192 arg.w.stop = arg.w.skip = arg.w.count = 0;
1193 arg.w.fn = check_loop_fn;
1196 q->ops->cl_ops->walk(q, &arg.w);
1197 return arg.w.stop ? -ELOOP : 0;
1201 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1204 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1205 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1207 leaf = cops->leaf(q, cl);
1209 if (leaf == arg->p || arg->depth > 7)
1211 return check_loop(leaf, arg->p, arg->depth + 1);
1220 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1221 [TCA_KIND] = { .type = NLA_NUL_STRING,
1222 .len = IFNAMSIZ - 1 },
1223 [TCA_RATE] = { .type = NLA_BINARY,
1224 .len = sizeof(struct tc_estimator) },
1225 [TCA_STAB] = { .type = NLA_NESTED },
1226 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1227 [TCA_CHAIN] = { .type = NLA_U32 },
1230 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1231 struct netlink_ext_ack *extack)
1233 struct net *net = sock_net(skb->sk);
1234 struct tcmsg *tcm = nlmsg_data(n);
1235 struct nlattr *tca[TCA_MAX + 1];
1236 struct net_device *dev;
1238 struct Qdisc *q = NULL;
1239 struct Qdisc *p = NULL;
1242 if ((n->nlmsg_type != RTM_GETQDISC) &&
1243 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1246 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1251 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1255 clid = tcm->tcm_parent;
1257 if (clid != TC_H_ROOT) {
1258 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1259 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1262 q = qdisc_leaf(p, clid);
1263 } else if (dev_ingress_queue(dev)) {
1264 q = dev_ingress_queue(dev)->qdisc_sleeping;
1272 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1275 q = qdisc_lookup(dev, tcm->tcm_handle);
1280 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1283 if (n->nlmsg_type == RTM_DELQDISC) {
1288 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1292 qdisc_notify(net, skb, n, clid, NULL, q);
1298 * Create/change qdisc.
1301 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1302 struct netlink_ext_ack *extack)
1304 struct net *net = sock_net(skb->sk);
1306 struct nlattr *tca[TCA_MAX + 1];
1307 struct net_device *dev;
1309 struct Qdisc *q, *p;
1312 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1316 /* Reinit, just in case something touches this. */
1317 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1322 tcm = nlmsg_data(n);
1323 clid = tcm->tcm_parent;
1326 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1332 if (clid != TC_H_ROOT) {
1333 if (clid != TC_H_INGRESS) {
1334 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1337 q = qdisc_leaf(p, clid);
1338 } else if (dev_ingress_queue_create(dev)) {
1339 q = dev_ingress_queue(dev)->qdisc_sleeping;
1345 /* It may be default qdisc, ignore it */
1346 if (q && q->handle == 0)
1349 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1350 if (tcm->tcm_handle) {
1351 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1353 if (TC_H_MIN(tcm->tcm_handle))
1355 q = qdisc_lookup(dev, tcm->tcm_handle);
1357 goto create_n_graft;
1358 if (n->nlmsg_flags & NLM_F_EXCL)
1360 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1363 (p && check_loop(q, p, 0)))
1365 qdisc_refcount_inc(q);
1369 goto create_n_graft;
1371 /* This magic test requires explanation.
1373 * We know, that some child q is already
1374 * attached to this parent and have choice:
1375 * either to change it or to create/graft new one.
1377 * 1. We are allowed to create/graft only
1378 * if CREATE and REPLACE flags are set.
1380 * 2. If EXCL is set, requestor wanted to say,
1381 * that qdisc tcm_handle is not expected
1382 * to exist, so that we choose create/graft too.
1384 * 3. The last case is when no flags are set.
1385 * Alas, it is sort of hole in API, we
1386 * cannot decide what to do unambiguously.
1387 * For now we select create/graft, if
1388 * user gave KIND, which does not match existing.
1390 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1391 (n->nlmsg_flags & NLM_F_REPLACE) &&
1392 ((n->nlmsg_flags & NLM_F_EXCL) ||
1394 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1395 goto create_n_graft;
1399 if (!tcm->tcm_handle)
1401 q = qdisc_lookup(dev, tcm->tcm_handle);
1404 /* Change qdisc parameters */
1407 if (n->nlmsg_flags & NLM_F_EXCL)
1409 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1411 err = qdisc_change(q, tca);
1413 qdisc_notify(net, skb, n, clid, NULL, q);
1417 if (!(n->nlmsg_flags & NLM_F_CREATE))
1419 if (clid == TC_H_INGRESS) {
1420 if (dev_ingress_queue(dev))
1421 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1422 tcm->tcm_parent, tcm->tcm_parent,
1427 struct netdev_queue *dev_queue;
1429 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1430 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1432 dev_queue = p->dev_queue;
1434 dev_queue = netdev_get_tx_queue(dev, 0);
1436 q = qdisc_create(dev, dev_queue, p,
1437 tcm->tcm_parent, tcm->tcm_handle,
1447 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1457 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1458 struct netlink_callback *cb,
1459 int *q_idx_p, int s_q_idx, bool recur,
1460 bool dump_invisible)
1462 int ret = 0, q_idx = *q_idx_p;
1470 if (q_idx < s_q_idx) {
1473 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1474 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1475 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1481 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1482 * itself has already been dumped.
1484 * If we've already dumped the top-level (ingress) qdisc above and the global
1485 * qdisc hashtable, we don't want to hit it again
1487 if (!qdisc_dev(root) || !recur)
1490 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1491 if (q_idx < s_q_idx) {
1495 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1496 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1497 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1511 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1513 struct net *net = sock_net(skb->sk);
1516 struct net_device *dev;
1517 const struct nlmsghdr *nlh = cb->nlh;
1518 struct tcmsg *tcm = nlmsg_data(nlh);
1519 struct nlattr *tca[TCA_MAX + 1];
1522 s_idx = cb->args[0];
1523 s_q_idx = q_idx = cb->args[1];
1528 err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX,
1529 rtm_tca_policy, NULL);
1533 for_each_netdev(net, dev) {
1534 struct netdev_queue *dev_queue;
1542 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1543 true, tca[TCA_DUMP_INVISIBLE]) < 0)
1546 dev_queue = dev_ingress_queue(dev);
1548 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1549 &q_idx, s_q_idx, false,
1550 tca[TCA_DUMP_INVISIBLE]) < 0)
1559 cb->args[1] = q_idx;
1566 /************************************************
1567 * Traffic classes manipulation. *
1568 ************************************************/
1570 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1572 u32 portid, u32 seq, u16 flags, int event)
1575 struct nlmsghdr *nlh;
1576 unsigned char *b = skb_tail_pointer(skb);
1578 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1581 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1583 goto out_nlmsg_trim;
1584 tcm = nlmsg_data(nlh);
1585 tcm->tcm_family = AF_UNSPEC;
1588 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1589 tcm->tcm_parent = q->handle;
1590 tcm->tcm_handle = q->handle;
1592 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1593 goto nla_put_failure;
1594 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1595 goto nla_put_failure;
1597 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1598 NULL, &d, TCA_PAD) < 0)
1599 goto nla_put_failure;
1601 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1602 goto nla_put_failure;
1604 if (gnet_stats_finish_copy(&d) < 0)
1605 goto nla_put_failure;
1607 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1616 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1617 struct nlmsghdr *n, struct Qdisc *q,
1618 unsigned long cl, int event)
1620 struct sk_buff *skb;
1621 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1623 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1627 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1632 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1633 n->nlmsg_flags & NLM_F_ECHO);
1636 static int tclass_del_notify(struct net *net,
1637 const struct Qdisc_class_ops *cops,
1638 struct sk_buff *oskb, struct nlmsghdr *n,
1639 struct Qdisc *q, unsigned long cl)
1641 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1642 struct sk_buff *skb;
1648 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1652 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1653 RTM_DELTCLASS) < 0) {
1658 err = cops->delete(q, cl);
1664 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1665 n->nlmsg_flags & NLM_F_ECHO);
1668 #ifdef CONFIG_NET_CLS
1670 struct tcf_bind_args {
1671 struct tcf_walker w;
1676 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1678 struct tcf_bind_args *a = (void *)arg;
1680 if (tp->ops->bind_class) {
1682 tp->ops->bind_class(n, a->classid, a->cl);
1683 tcf_tree_unlock(tp);
1688 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1689 unsigned long new_cl)
1691 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1692 struct tcf_block *block;
1693 struct tcf_chain *chain;
1696 cl = cops->find(q, portid);
1699 if (!cops->tcf_block)
1701 block = cops->tcf_block(q, cl);
1704 list_for_each_entry(chain, &block->chain_list, list) {
1705 struct tcf_proto *tp;
1707 for (tp = rtnl_dereference(chain->filter_chain);
1708 tp; tp = rtnl_dereference(tp->next)) {
1709 struct tcf_bind_args arg = {};
1711 arg.w.fn = tcf_node_bind;
1714 tp->ops->walk(tp, &arg.w);
1721 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1722 unsigned long new_cl)
1728 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1729 struct netlink_ext_ack *extack)
1731 struct net *net = sock_net(skb->sk);
1732 struct tcmsg *tcm = nlmsg_data(n);
1733 struct nlattr *tca[TCA_MAX + 1];
1734 struct net_device *dev;
1735 struct Qdisc *q = NULL;
1736 const struct Qdisc_class_ops *cops;
1737 unsigned long cl = 0;
1738 unsigned long new_cl;
1744 if ((n->nlmsg_type != RTM_GETTCLASS) &&
1745 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1748 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1753 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1758 parent == TC_H_UNSPEC - unspecified parent.
1759 parent == TC_H_ROOT - class is root, which has no parent.
1760 parent == X:0 - parent is root class.
1761 parent == X:Y - parent is a node in hierarchy.
1762 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1764 handle == 0:0 - generate handle from kernel pool.
1765 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1766 handle == X:Y - clear.
1767 handle == X:0 - root class.
1770 /* Step 1. Determine qdisc handle X:0 */
1772 portid = tcm->tcm_parent;
1773 clid = tcm->tcm_handle;
1774 qid = TC_H_MAJ(clid);
1776 if (portid != TC_H_ROOT) {
1777 u32 qid1 = TC_H_MAJ(portid);
1780 /* If both majors are known, they must be identical. */
1785 } else if (qid == 0)
1786 qid = dev->qdisc->handle;
1788 /* Now qid is genuine qdisc handle consistent
1789 * both with parent and child.
1791 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1794 portid = TC_H_MAKE(qid, portid);
1797 qid = dev->qdisc->handle;
1800 /* OK. Locate qdisc */
1801 q = qdisc_lookup(dev, qid);
1805 /* An check that it supports classes */
1806 cops = q->ops->cl_ops;
1810 /* Now try to get class */
1812 if (portid == TC_H_ROOT)
1815 clid = TC_H_MAKE(qid, clid);
1818 cl = cops->find(q, clid);
1822 if (n->nlmsg_type != RTM_NEWTCLASS ||
1823 !(n->nlmsg_flags & NLM_F_CREATE))
1826 switch (n->nlmsg_type) {
1829 if (n->nlmsg_flags & NLM_F_EXCL)
1833 err = tclass_del_notify(net, cops, skb, n, q, cl);
1834 /* Unbind the class with flilters with 0 */
1835 tc_bind_tclass(q, portid, clid, 0);
1838 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1849 err = cops->change(q, clid, portid, tca, &new_cl);
1851 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1852 /* We just create a new class, need to do reverse binding. */
1854 tc_bind_tclass(q, portid, clid, new_cl);
1860 struct qdisc_dump_args {
1861 struct qdisc_walker w;
1862 struct sk_buff *skb;
1863 struct netlink_callback *cb;
1866 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1867 struct qdisc_walker *arg)
1869 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1871 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1872 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1876 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1877 struct tcmsg *tcm, struct netlink_callback *cb,
1880 struct qdisc_dump_args arg;
1882 if (tc_qdisc_dump_ignore(q, false) ||
1883 *t_p < s_t || !q->ops->cl_ops ||
1885 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1890 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1891 arg.w.fn = qdisc_class_dump;
1895 arg.w.skip = cb->args[1];
1897 q->ops->cl_ops->walk(q, &arg.w);
1898 cb->args[1] = arg.w.count;
1905 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1906 struct tcmsg *tcm, struct netlink_callback *cb,
1907 int *t_p, int s_t, bool recur)
1915 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1918 if (!qdisc_dev(root) || !recur)
1921 if (tcm->tcm_parent) {
1922 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1923 if (q && q != root &&
1924 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1928 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1929 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1936 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1938 struct tcmsg *tcm = nlmsg_data(cb->nlh);
1939 struct net *net = sock_net(skb->sk);
1940 struct netdev_queue *dev_queue;
1941 struct net_device *dev;
1944 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1946 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1953 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
1956 dev_queue = dev_ingress_queue(dev);
1958 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1959 &t, s_t, false) < 0)
1969 #ifdef CONFIG_PROC_FS
1970 static int psched_show(struct seq_file *seq, void *v)
1972 seq_printf(seq, "%08x %08x %08x %08x\n",
1973 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1975 (u32)NSEC_PER_SEC / hrtimer_resolution);
1980 static int psched_open(struct inode *inode, struct file *file)
1982 return single_open(file, psched_show, NULL);
1985 static const struct file_operations psched_fops = {
1986 .owner = THIS_MODULE,
1987 .open = psched_open,
1989 .llseek = seq_lseek,
1990 .release = single_release,
1993 static int __net_init psched_net_init(struct net *net)
1995 struct proc_dir_entry *e;
1997 e = proc_create("psched", 0, net->proc_net, &psched_fops);
2004 static void __net_exit psched_net_exit(struct net *net)
2006 remove_proc_entry("psched", net->proc_net);
2009 static int __net_init psched_net_init(struct net *net)
2014 static void __net_exit psched_net_exit(struct net *net)
2019 static struct pernet_operations psched_net_ops = {
2020 .init = psched_net_init,
2021 .exit = psched_net_exit,
2024 static int __init pktsched_init(void)
2028 err = register_pernet_subsys(&psched_net_ops);
2030 pr_err("pktsched_init: "
2031 "cannot initialize per netns operations\n");
2035 register_qdisc(&pfifo_fast_ops);
2036 register_qdisc(&pfifo_qdisc_ops);
2037 register_qdisc(&bfifo_qdisc_ops);
2038 register_qdisc(&pfifo_head_drop_qdisc_ops);
2039 register_qdisc(&mq_qdisc_ops);
2040 register_qdisc(&noqueue_qdisc_ops);
2042 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2043 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2044 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2046 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2047 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2048 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2054 subsys_initcall(pktsched_init);