GNU Linux-libre 4.14.266-gnu1
[releases.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         unsigned long   tx_bytes;
62         unsigned long   tx_packets;
63         unsigned long   tx_errors;
64         unsigned long   tx_dropped;
65 };
66
67 struct teql_sched_data {
68         struct Qdisc *next;
69         struct teql_master *m;
70         struct sk_buff_head q;
71 };
72
73 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
74
75 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
76
77 /* "teql*" qdisc routines */
78
79 static int
80 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
81 {
82         struct net_device *dev = qdisc_dev(sch);
83         struct teql_sched_data *q = qdisc_priv(sch);
84
85         if (q->q.qlen < dev->tx_queue_len) {
86                 __skb_queue_tail(&q->q, skb);
87                 return NET_XMIT_SUCCESS;
88         }
89
90         return qdisc_drop(skb, sch, to_free);
91 }
92
93 static struct sk_buff *
94 teql_dequeue(struct Qdisc *sch)
95 {
96         struct teql_sched_data *dat = qdisc_priv(sch);
97         struct netdev_queue *dat_queue;
98         struct sk_buff *skb;
99         struct Qdisc *q;
100
101         skb = __skb_dequeue(&dat->q);
102         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103         q = rcu_dereference_bh(dat_queue->qdisc);
104
105         if (skb == NULL) {
106                 struct net_device *m = qdisc_dev(q);
107                 if (m) {
108                         dat->m->slaves = sch;
109                         netif_wake_queue(m);
110                 }
111         } else {
112                 qdisc_bstats_update(sch, skb);
113         }
114         sch->q.qlen = dat->q.qlen + q->q.qlen;
115         return skb;
116 }
117
118 static struct sk_buff *
119 teql_peek(struct Qdisc *sch)
120 {
121         /* teql is meant to be used as root qdisc */
122         return NULL;
123 }
124
125 static void
126 teql_reset(struct Qdisc *sch)
127 {
128         struct teql_sched_data *dat = qdisc_priv(sch);
129
130         skb_queue_purge(&dat->q);
131         sch->q.qlen = 0;
132 }
133
134 static void
135 teql_destroy(struct Qdisc *sch)
136 {
137         struct Qdisc *q, *prev;
138         struct teql_sched_data *dat = qdisc_priv(sch);
139         struct teql_master *master = dat->m;
140
141         if (!master)
142                 return;
143
144         prev = master->slaves;
145         if (prev) {
146                 do {
147                         q = NEXT_SLAVE(prev);
148                         if (q == sch) {
149                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
150                                 if (q == master->slaves) {
151                                         master->slaves = NEXT_SLAVE(q);
152                                         if (q == master->slaves) {
153                                                 struct netdev_queue *txq;
154                                                 spinlock_t *root_lock;
155
156                                                 txq = netdev_get_tx_queue(master->dev, 0);
157                                                 master->slaves = NULL;
158
159                                                 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
160                                                 spin_lock_bh(root_lock);
161                                                 qdisc_reset(rtnl_dereference(txq->qdisc));
162                                                 spin_unlock_bh(root_lock);
163                                         }
164                                 }
165                                 skb_queue_purge(&dat->q);
166                                 break;
167                         }
168
169                 } while ((prev = q) != master->slaves);
170         }
171 }
172
173 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
174 {
175         struct net_device *dev = qdisc_dev(sch);
176         struct teql_master *m = (struct teql_master *)sch->ops;
177         struct teql_sched_data *q = qdisc_priv(sch);
178
179         if (dev->hard_header_len > m->dev->hard_header_len)
180                 return -EINVAL;
181
182         if (m->dev == dev)
183                 return -ELOOP;
184
185         q->m = m;
186
187         skb_queue_head_init(&q->q);
188
189         if (m->slaves) {
190                 if (m->dev->flags & IFF_UP) {
191                         if ((m->dev->flags & IFF_POINTOPOINT &&
192                              !(dev->flags & IFF_POINTOPOINT)) ||
193                             (m->dev->flags & IFF_BROADCAST &&
194                              !(dev->flags & IFF_BROADCAST)) ||
195                             (m->dev->flags & IFF_MULTICAST &&
196                              !(dev->flags & IFF_MULTICAST)) ||
197                             dev->mtu < m->dev->mtu)
198                                 return -EINVAL;
199                 } else {
200                         if (!(dev->flags&IFF_POINTOPOINT))
201                                 m->dev->flags &= ~IFF_POINTOPOINT;
202                         if (!(dev->flags&IFF_BROADCAST))
203                                 m->dev->flags &= ~IFF_BROADCAST;
204                         if (!(dev->flags&IFF_MULTICAST))
205                                 m->dev->flags &= ~IFF_MULTICAST;
206                         if (dev->mtu < m->dev->mtu)
207                                 m->dev->mtu = dev->mtu;
208                 }
209                 q->next = NEXT_SLAVE(m->slaves);
210                 NEXT_SLAVE(m->slaves) = sch;
211         } else {
212                 q->next = sch;
213                 m->slaves = sch;
214                 m->dev->mtu = dev->mtu;
215                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
216         }
217         return 0;
218 }
219
220
221 static int
222 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
223                struct net_device *dev, struct netdev_queue *txq,
224                struct dst_entry *dst)
225 {
226         struct neighbour *n;
227         int err = 0;
228
229         n = dst_neigh_lookup_skb(dst, skb);
230         if (!n)
231                 return -ENOENT;
232
233         if (dst->dev != dev) {
234                 struct neighbour *mn;
235
236                 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
237                 neigh_release(n);
238                 if (IS_ERR(mn))
239                         return PTR_ERR(mn);
240                 n = mn;
241         }
242
243         if (neigh_event_send(n, skb_res) == 0) {
244                 int err;
245                 char haddr[MAX_ADDR_LEN];
246
247                 neigh_ha_snapshot(haddr, n, dev);
248                 err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
249                                       haddr, NULL, skb->len);
250
251                 if (err < 0)
252                         err = -EINVAL;
253         } else {
254                 err = (skb_res == NULL) ? -EAGAIN : 1;
255         }
256         neigh_release(n);
257         return err;
258 }
259
260 static inline int teql_resolve(struct sk_buff *skb,
261                                struct sk_buff *skb_res,
262                                struct net_device *dev,
263                                struct netdev_queue *txq)
264 {
265         struct dst_entry *dst = skb_dst(skb);
266         int res;
267
268         if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
269                 return -ENODEV;
270
271         if (!dev->header_ops || !dst)
272                 return 0;
273
274         rcu_read_lock();
275         res = __teql_resolve(skb, skb_res, dev, txq, dst);
276         rcu_read_unlock();
277
278         return res;
279 }
280
281 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
282 {
283         struct teql_master *master = netdev_priv(dev);
284         struct Qdisc *start, *q;
285         int busy;
286         int nores;
287         int subq = skb_get_queue_mapping(skb);
288         struct sk_buff *skb_res = NULL;
289
290         start = master->slaves;
291
292 restart:
293         nores = 0;
294         busy = 0;
295
296         q = start;
297         if (!q)
298                 goto drop;
299
300         do {
301                 struct net_device *slave = qdisc_dev(q);
302                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
303
304                 if (slave_txq->qdisc_sleeping != q)
305                         continue;
306                 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
307                     !netif_running(slave)) {
308                         busy = 1;
309                         continue;
310                 }
311
312                 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
313                 case 0:
314                         if (__netif_tx_trylock(slave_txq)) {
315                                 unsigned int length = qdisc_pkt_len(skb);
316
317                                 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
318                                     netdev_start_xmit(skb, slave, slave_txq, false) ==
319                                     NETDEV_TX_OK) {
320                                         __netif_tx_unlock(slave_txq);
321                                         master->slaves = NEXT_SLAVE(q);
322                                         netif_wake_queue(dev);
323                                         master->tx_packets++;
324                                         master->tx_bytes += length;
325                                         return NETDEV_TX_OK;
326                                 }
327                                 __netif_tx_unlock(slave_txq);
328                         }
329                         if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
330                                 busy = 1;
331                         break;
332                 case 1:
333                         master->slaves = NEXT_SLAVE(q);
334                         return NETDEV_TX_OK;
335                 default:
336                         nores = 1;
337                         break;
338                 }
339                 __skb_pull(skb, skb_network_offset(skb));
340         } while ((q = NEXT_SLAVE(q)) != start);
341
342         if (nores && skb_res == NULL) {
343                 skb_res = skb;
344                 goto restart;
345         }
346
347         if (busy) {
348                 netif_stop_queue(dev);
349                 return NETDEV_TX_BUSY;
350         }
351         master->tx_errors++;
352
353 drop:
354         master->tx_dropped++;
355         dev_kfree_skb(skb);
356         return NETDEV_TX_OK;
357 }
358
359 static int teql_master_open(struct net_device *dev)
360 {
361         struct Qdisc *q;
362         struct teql_master *m = netdev_priv(dev);
363         int mtu = 0xFFFE;
364         unsigned int flags = IFF_NOARP | IFF_MULTICAST;
365
366         if (m->slaves == NULL)
367                 return -EUNATCH;
368
369         flags = FMASK;
370
371         q = m->slaves;
372         do {
373                 struct net_device *slave = qdisc_dev(q);
374
375                 if (slave == NULL)
376                         return -EUNATCH;
377
378                 if (slave->mtu < mtu)
379                         mtu = slave->mtu;
380                 if (slave->hard_header_len > LL_MAX_HEADER)
381                         return -EINVAL;
382
383                 /* If all the slaves are BROADCAST, master is BROADCAST
384                    If all the slaves are PtP, master is PtP
385                    Otherwise, master is NBMA.
386                  */
387                 if (!(slave->flags&IFF_POINTOPOINT))
388                         flags &= ~IFF_POINTOPOINT;
389                 if (!(slave->flags&IFF_BROADCAST))
390                         flags &= ~IFF_BROADCAST;
391                 if (!(slave->flags&IFF_MULTICAST))
392                         flags &= ~IFF_MULTICAST;
393         } while ((q = NEXT_SLAVE(q)) != m->slaves);
394
395         m->dev->mtu = mtu;
396         m->dev->flags = (m->dev->flags&~FMASK) | flags;
397         netif_start_queue(m->dev);
398         return 0;
399 }
400
401 static int teql_master_close(struct net_device *dev)
402 {
403         netif_stop_queue(dev);
404         return 0;
405 }
406
407 static void teql_master_stats64(struct net_device *dev,
408                                 struct rtnl_link_stats64 *stats)
409 {
410         struct teql_master *m = netdev_priv(dev);
411
412         stats->tx_packets       = m->tx_packets;
413         stats->tx_bytes         = m->tx_bytes;
414         stats->tx_errors        = m->tx_errors;
415         stats->tx_dropped       = m->tx_dropped;
416 }
417
418 static int teql_master_mtu(struct net_device *dev, int new_mtu)
419 {
420         struct teql_master *m = netdev_priv(dev);
421         struct Qdisc *q;
422
423         q = m->slaves;
424         if (q) {
425                 do {
426                         if (new_mtu > qdisc_dev(q)->mtu)
427                                 return -EINVAL;
428                 } while ((q = NEXT_SLAVE(q)) != m->slaves);
429         }
430
431         dev->mtu = new_mtu;
432         return 0;
433 }
434
435 static const struct net_device_ops teql_netdev_ops = {
436         .ndo_open       = teql_master_open,
437         .ndo_stop       = teql_master_close,
438         .ndo_start_xmit = teql_master_xmit,
439         .ndo_get_stats64 = teql_master_stats64,
440         .ndo_change_mtu = teql_master_mtu,
441 };
442
443 static __init void teql_master_setup(struct net_device *dev)
444 {
445         struct teql_master *master = netdev_priv(dev);
446         struct Qdisc_ops *ops = &master->qops;
447
448         master->dev     = dev;
449         ops->priv_size  = sizeof(struct teql_sched_data);
450
451         ops->enqueue    =       teql_enqueue;
452         ops->dequeue    =       teql_dequeue;
453         ops->peek       =       teql_peek;
454         ops->init       =       teql_qdisc_init;
455         ops->reset      =       teql_reset;
456         ops->destroy    =       teql_destroy;
457         ops->owner      =       THIS_MODULE;
458
459         dev->netdev_ops =       &teql_netdev_ops;
460         dev->type               = ARPHRD_VOID;
461         dev->mtu                = 1500;
462         dev->min_mtu            = 68;
463         dev->max_mtu            = 65535;
464         dev->tx_queue_len       = 100;
465         dev->flags              = IFF_NOARP;
466         dev->hard_header_len    = LL_MAX_HEADER;
467         netif_keep_dst(dev);
468 }
469
470 static LIST_HEAD(master_dev_list);
471 static int max_equalizers = 1;
472 module_param(max_equalizers, int, 0);
473 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
474
475 static int __init teql_init(void)
476 {
477         int i;
478         int err = -ENODEV;
479
480         for (i = 0; i < max_equalizers; i++) {
481                 struct net_device *dev;
482                 struct teql_master *master;
483
484                 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
485                                    NET_NAME_UNKNOWN, teql_master_setup);
486                 if (!dev) {
487                         err = -ENOMEM;
488                         break;
489                 }
490
491                 if ((err = register_netdev(dev))) {
492                         free_netdev(dev);
493                         break;
494                 }
495
496                 master = netdev_priv(dev);
497
498                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
499                 err = register_qdisc(&master->qops);
500
501                 if (err) {
502                         unregister_netdev(dev);
503                         free_netdev(dev);
504                         break;
505                 }
506
507                 list_add_tail(&master->master_list, &master_dev_list);
508         }
509         return i ? 0 : err;
510 }
511
512 static void __exit teql_exit(void)
513 {
514         struct teql_master *master, *nxt;
515
516         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
517
518                 list_del(&master->master_list);
519
520                 unregister_qdisc(&master->qops);
521                 unregister_netdev(master->dev);
522                 free_netdev(master->dev);
523         }
524 }
525
526 module_init(teql_init);
527 module_exit(teql_exit);
528
529 MODULE_LICENSE("GPL");