2 * Copyright (C) 2018 Netronome Systems, Inc.
4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this
6 * source tree or the BSD 2-Clause License provided below. You have the
7 * option to license this software under the complete terms of either license.
9 * The BSD 2-Clause License:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * 1. Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * 2. Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 /* LAG group config flags. */
37 #define NFP_FL_LAG_LAST BIT(1)
38 #define NFP_FL_LAG_FIRST BIT(2)
39 #define NFP_FL_LAG_DATA BIT(3)
40 #define NFP_FL_LAG_XON BIT(4)
41 #define NFP_FL_LAG_SYNC BIT(5)
42 #define NFP_FL_LAG_SWITCH BIT(6)
43 #define NFP_FL_LAG_RESET BIT(7)
45 /* LAG port state flags. */
46 #define NFP_PORT_LAG_LINK_UP BIT(0)
47 #define NFP_PORT_LAG_TX_ENABLED BIT(1)
48 #define NFP_PORT_LAG_CHANGED BIT(2)
50 enum nfp_fl_lag_batch {
51 NFP_FL_LAG_BATCH_FIRST,
52 NFP_FL_LAG_BATCH_MEMBER,
53 NFP_FL_LAG_BATCH_FINISHED
57 * struct nfp_flower_cmsg_lag_config - control message payload for LAG config
58 * @ctrl_flags: Configuration flags
59 * @reserved: Reserved for future use
60 * @ttl: Time to live of packet - host always sets to 0xff
61 * @pkt_number: Config message packet number - increment for each message
62 * @batch_ver: Batch version of messages - increment for each batch of messages
63 * @group_id: Group ID applicable
64 * @group_inst: Group instance number - increment when group is reused
65 * @members: Array of 32-bit words listing all active group members
67 struct nfp_flower_cmsg_lag_config {
79 * struct nfp_fl_lag_group - list entry for each LAG group
80 * @group_id: Assigned group ID for host/kernel sync
81 * @group_inst: Group instance in case of ID reuse
83 * @master_ndev: Group master Netdev
84 * @dirty: Marked if the group needs synced to HW
85 * @offloaded: Marked if the group is currently offloaded to NIC
86 * @to_remove: Marked if the group should be removed from NIC
87 * @to_destroy: Marked if the group should be removed from driver
88 * @slave_cnt: Number of slaves in group
90 struct nfp_fl_lag_group {
91 unsigned int group_id;
93 struct list_head list;
94 struct net_device *master_ndev;
99 unsigned int slave_cnt;
102 #define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0)
103 #define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0)
104 #define NFP_FL_LAG_HOST_TTL 0xff
106 /* Use this ID with zero members to ack a batch config */
107 #define NFP_FL_LAG_SYNC_ID 0
108 #define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */
109 #define NFP_FL_LAG_GROUP_MAX 32 /* IDs 1 to 31 are valid */
111 /* wait for more config */
112 #define NFP_FL_LAG_DELAY (msecs_to_jiffies(2))
114 #define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */
116 static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag)
119 lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK;
124 static void nfp_fl_increment_version(struct nfp_fl_lag *lag)
126 /* LSB is not considered by firmware so add 2 for each increment. */
128 lag->batch_ver &= NFP_FL_LAG_VERSION_MASK;
130 /* Zero is reserved by firmware. */
135 static struct nfp_fl_lag_group *
136 nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master)
138 struct nfp_fl_lag_group *group;
139 struct nfp_flower_priv *priv;
142 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
144 id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN,
145 NFP_FL_LAG_GROUP_MAX, GFP_KERNEL);
147 nfp_flower_cmsg_warn(priv->app,
148 "No more bonding groups available\n");
152 group = kmalloc(sizeof(*group), GFP_KERNEL);
154 ida_simple_remove(&lag->ida_handle, id);
155 return ERR_PTR(-ENOMEM);
158 group->group_id = id;
159 group->master_ndev = master;
161 group->offloaded = false;
162 group->to_remove = false;
163 group->to_destroy = false;
164 group->slave_cnt = 0;
165 group->group_inst = ++lag->global_inst;
166 list_add_tail(&group->list, &lag->group_list);
171 static struct nfp_fl_lag_group *
172 nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag,
173 struct net_device *master)
175 struct nfp_fl_lag_group *entry;
180 list_for_each_entry(entry, &lag->group_list, list)
181 if (entry->master_ndev == master)
187 int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
188 struct net_device *master,
189 struct nfp_fl_pre_lag *pre_act)
191 struct nfp_flower_priv *priv = app->priv;
192 struct nfp_fl_lag_group *group = NULL;
195 mutex_lock(&priv->nfp_lag.lock);
196 group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag,
199 mutex_unlock(&priv->nfp_lag.lock);
203 pre_act->group_id = cpu_to_be16(group->group_id);
204 temp_vers = cpu_to_be32(priv->nfp_lag.batch_ver <<
205 NFP_FL_PRE_LAG_VER_OFF);
206 memcpy(pre_act->lag_version, &temp_vers, 3);
207 pre_act->instance = group->group_inst;
208 mutex_unlock(&priv->nfp_lag.lock);
213 int nfp_flower_lag_get_output_id(struct nfp_app *app, struct net_device *master)
215 struct nfp_flower_priv *priv = app->priv;
216 struct nfp_fl_lag_group *group = NULL;
217 int group_id = -ENOENT;
219 mutex_lock(&priv->nfp_lag.lock);
220 group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag,
223 group_id = group->group_id;
224 mutex_unlock(&priv->nfp_lag.lock);
230 nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group,
231 struct net_device **active_members,
232 unsigned int member_cnt, enum nfp_fl_lag_batch *batch)
234 struct nfp_flower_cmsg_lag_config *cmsg_payload;
235 struct nfp_flower_priv *priv;
236 unsigned long int flags;
237 unsigned int size, i;
240 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
241 size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt;
242 skb = nfp_flower_cmsg_alloc(priv->app, size,
243 NFP_FLOWER_CMSG_TYPE_LAG_CONFIG,
248 cmsg_payload = nfp_flower_cmsg_get_data(skb);
251 /* Increment batch version for each new batch of config messages. */
252 if (*batch == NFP_FL_LAG_BATCH_FIRST) {
253 flags |= NFP_FL_LAG_FIRST;
254 nfp_fl_increment_version(lag);
255 *batch = NFP_FL_LAG_BATCH_MEMBER;
258 /* If it is a reset msg then it is also the end of the batch. */
260 flags |= NFP_FL_LAG_RESET;
261 *batch = NFP_FL_LAG_BATCH_FINISHED;
264 /* To signal the end of a batch, both the switch and last flags are set
265 * and the the reserved SYNC group ID is used.
267 if (*batch == NFP_FL_LAG_BATCH_FINISHED) {
268 flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST;
269 lag->rst_cfg = false;
270 cmsg_payload->group_id = cpu_to_be32(NFP_FL_LAG_SYNC_ID);
271 cmsg_payload->group_inst = 0;
273 cmsg_payload->group_id = cpu_to_be32(group->group_id);
274 cmsg_payload->group_inst = cpu_to_be32(group->group_inst);
277 cmsg_payload->reserved[0] = 0;
278 cmsg_payload->reserved[1] = 0;
279 cmsg_payload->ttl = NFP_FL_LAG_HOST_TTL;
280 cmsg_payload->ctrl_flags = flags;
281 cmsg_payload->batch_ver = cpu_to_be32(lag->batch_ver);
282 cmsg_payload->pkt_number = cpu_to_be32(nfp_fl_get_next_pkt_number(lag));
284 for (i = 0; i < member_cnt; i++)
285 cmsg_payload->members[i] =
286 cpu_to_be32(nfp_repr_get_port_id(active_members[i]));
288 nfp_ctrl_tx(priv->app->ctrl, skb);
292 static void nfp_fl_lag_do_work(struct work_struct *work)
294 enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
295 struct nfp_fl_lag_group *entry, *storage;
296 struct delayed_work *delayed_work;
297 struct nfp_flower_priv *priv;
298 struct nfp_fl_lag *lag;
301 delayed_work = to_delayed_work(work);
302 lag = container_of(delayed_work, struct nfp_fl_lag, work);
303 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
305 mutex_lock(&lag->lock);
306 list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
307 struct net_device *iter_netdev, **acti_netdevs;
308 struct nfp_flower_repr_priv *repr_priv;
309 int active_count = 0, slaves = 0;
310 struct nfp_repr *repr;
311 unsigned long *flags;
313 if (entry->to_remove) {
314 /* Active count of 0 deletes group on hw. */
315 err = nfp_fl_lag_config_group(lag, entry, NULL, 0,
318 entry->to_remove = false;
319 entry->offloaded = false;
321 nfp_flower_cmsg_warn(priv->app,
322 "group delete failed\n");
323 schedule_delayed_work(&lag->work,
328 if (entry->to_destroy) {
329 ida_simple_remove(&lag->ida_handle,
331 list_del(&entry->list);
337 acti_netdevs = kmalloc_array(entry->slave_cnt,
338 sizeof(*acti_netdevs), GFP_KERNEL);
340 /* Include sanity check in the loop. It may be that a bond has
341 * changed between processing the last notification and the
342 * work queue triggering. If the number of slaves has changed
343 * or it now contains netdevs that cannot be offloaded, ignore
344 * the group until pending notifications are processed.
347 for_each_netdev_in_bond_rcu(entry->master_ndev, iter_netdev) {
348 if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
353 repr = netdev_priv(iter_netdev);
355 if (repr->app != priv->app) {
361 if (slaves > entry->slave_cnt)
364 /* Check the ports for state changes. */
365 repr_priv = repr->app_priv;
366 flags = &repr_priv->lag_port_flags;
368 if (*flags & NFP_PORT_LAG_CHANGED) {
369 *flags &= ~NFP_PORT_LAG_CHANGED;
373 if ((*flags & NFP_PORT_LAG_TX_ENABLED) &&
374 (*flags & NFP_PORT_LAG_LINK_UP))
375 acti_netdevs[active_count++] = iter_netdev;
379 if (slaves != entry->slave_cnt || !entry->dirty) {
384 err = nfp_fl_lag_config_group(lag, entry, acti_netdevs,
385 active_count, &batch);
387 entry->offloaded = true;
388 entry->dirty = false;
390 nfp_flower_cmsg_warn(priv->app,
391 "group offload failed\n");
392 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
398 /* End the config batch if at least one packet has been batched. */
399 if (batch == NFP_FL_LAG_BATCH_MEMBER) {
400 batch = NFP_FL_LAG_BATCH_FINISHED;
401 err = nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
403 nfp_flower_cmsg_warn(priv->app,
404 "group batch end cmsg failed\n");
407 mutex_unlock(&lag->lock);
411 nfp_fl_lag_put_unprocessed(struct nfp_fl_lag *lag, struct sk_buff *skb)
413 struct nfp_flower_cmsg_lag_config *cmsg_payload;
415 cmsg_payload = nfp_flower_cmsg_get_data(skb);
416 if (be32_to_cpu(cmsg_payload->group_id) >= NFP_FL_LAG_GROUP_MAX)
419 /* Drop cmsg retrans if storage limit is exceeded to prevent
420 * overloading. If the fw notices that expected messages have not been
421 * received in a given time block, it will request a full resync.
423 if (skb_queue_len(&lag->retrans_skbs) >= NFP_FL_LAG_RETRANS_LIMIT)
426 __skb_queue_tail(&lag->retrans_skbs, skb);
431 static void nfp_fl_send_unprocessed(struct nfp_fl_lag *lag)
433 struct nfp_flower_priv *priv;
436 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
438 while ((skb = __skb_dequeue(&lag->retrans_skbs)))
439 nfp_ctrl_tx(priv->app->ctrl, skb);
442 bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb)
444 struct nfp_flower_cmsg_lag_config *cmsg_payload;
445 struct nfp_flower_priv *priv = app->priv;
446 struct nfp_fl_lag_group *group_entry;
447 unsigned long int flags;
448 bool store_skb = false;
451 cmsg_payload = nfp_flower_cmsg_get_data(skb);
452 flags = cmsg_payload->ctrl_flags;
454 /* Note the intentional fall through below. If DATA and XON are both
455 * set, the message will stored and sent again with the rest of the
456 * unprocessed messages list.
460 if (flags & NFP_FL_LAG_DATA)
461 if (!nfp_fl_lag_put_unprocessed(&priv->nfp_lag, skb))
465 if (flags & NFP_FL_LAG_XON)
466 nfp_fl_send_unprocessed(&priv->nfp_lag);
469 if (flags & NFP_FL_LAG_SYNC) {
470 /* To resend all config:
471 * 1) Clear all unprocessed messages
472 * 2) Mark all groups dirty
473 * 3) Reset NFP group config
474 * 4) Schedule a LAG config update
477 __skb_queue_purge(&priv->nfp_lag.retrans_skbs);
479 mutex_lock(&priv->nfp_lag.lock);
480 list_for_each_entry(group_entry, &priv->nfp_lag.group_list,
482 group_entry->dirty = true;
484 err = nfp_flower_lag_reset(&priv->nfp_lag);
486 nfp_flower_cmsg_warn(priv->app,
487 "mem err in group reset msg\n");
488 mutex_unlock(&priv->nfp_lag.lock);
490 schedule_delayed_work(&priv->nfp_lag.work, 0);
497 nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag *lag,
498 struct nfp_fl_lag_group *group)
500 group->to_remove = true;
502 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
506 nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag,
507 struct net_device *master)
509 struct nfp_fl_lag_group *group;
511 mutex_lock(&lag->lock);
512 group = nfp_fl_lag_find_group_for_master_with_lag(lag, master);
514 mutex_unlock(&lag->lock);
518 group->to_remove = true;
519 group->to_destroy = true;
520 mutex_unlock(&lag->lock);
522 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
527 nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag,
528 struct netdev_notifier_changeupper_info *info)
530 struct net_device *upper = info->upper_dev, *iter_netdev;
531 struct netdev_lag_upper_info *lag_upper_info;
532 struct nfp_fl_lag_group *group;
533 struct nfp_flower_priv *priv;
534 unsigned int slave_count = 0;
535 bool can_offload = true;
536 struct nfp_repr *repr;
538 if (!netif_is_lag_master(upper))
541 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
544 for_each_netdev_in_bond_rcu(upper, iter_netdev) {
545 if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
549 repr = netdev_priv(iter_netdev);
551 /* Ensure all ports are created by the same app/on same card. */
552 if (repr->app != priv->app) {
561 lag_upper_info = info->upper_info;
563 /* Firmware supports active/backup and L3/L4 hash bonds. */
564 if (lag_upper_info &&
565 lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
566 (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH ||
567 (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 &&
568 lag_upper_info->hash_type != NETDEV_LAG_HASH_E34 &&
569 lag_upper_info->hash_type != NETDEV_LAG_HASH_UNKNOWN))) {
571 nfp_flower_cmsg_warn(priv->app,
572 "Unable to offload tx_type %u hash %u\n",
573 lag_upper_info->tx_type,
574 lag_upper_info->hash_type);
577 mutex_lock(&lag->lock);
578 group = nfp_fl_lag_find_group_for_master_with_lag(lag, upper);
580 if (slave_count == 0 || !can_offload) {
581 /* Cannot offload the group - remove if previously offloaded. */
582 if (group && group->offloaded)
583 nfp_fl_lag_schedule_group_remove(lag, group);
585 mutex_unlock(&lag->lock);
590 group = nfp_fl_lag_group_create(lag, upper);
592 mutex_unlock(&lag->lock);
593 return PTR_ERR(group);
598 group->slave_cnt = slave_count;
600 /* Group may have been on queue for removal but is now offfloable. */
601 group->to_remove = false;
602 mutex_unlock(&lag->lock);
604 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
609 nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev,
610 struct netdev_notifier_changelowerstate_info *info)
612 struct netdev_lag_lower_state_info *lag_lower_info;
613 struct nfp_flower_repr_priv *repr_priv;
614 struct nfp_flower_priv *priv;
615 struct nfp_repr *repr;
616 unsigned long *flags;
618 if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev))
621 lag_lower_info = info->lower_state_info;
625 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
626 repr = netdev_priv(netdev);
628 /* Verify that the repr is associated with this app. */
629 if (repr->app != priv->app)
632 repr_priv = repr->app_priv;
633 flags = &repr_priv->lag_port_flags;
635 mutex_lock(&lag->lock);
636 if (lag_lower_info->link_up)
637 *flags |= NFP_PORT_LAG_LINK_UP;
639 *flags &= ~NFP_PORT_LAG_LINK_UP;
641 if (lag_lower_info->tx_enabled)
642 *flags |= NFP_PORT_LAG_TX_ENABLED;
644 *flags &= ~NFP_PORT_LAG_TX_ENABLED;
646 *flags |= NFP_PORT_LAG_CHANGED;
647 mutex_unlock(&lag->lock);
649 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
654 nfp_fl_lag_netdev_event(struct notifier_block *nb, unsigned long event,
657 struct net_device *netdev;
658 struct nfp_fl_lag *lag;
661 netdev = netdev_notifier_info_to_dev(ptr);
662 lag = container_of(nb, struct nfp_fl_lag, lag_nb);
665 case NETDEV_CHANGEUPPER:
666 err = nfp_fl_lag_changeupper_event(lag, ptr);
670 case NETDEV_CHANGELOWERSTATE:
671 err = nfp_fl_lag_changels_event(lag, netdev, ptr);
675 case NETDEV_UNREGISTER:
676 if (netif_is_bond_master(netdev)) {
677 err = nfp_fl_lag_schedule_group_delete(lag, netdev);
687 int nfp_flower_lag_reset(struct nfp_fl_lag *lag)
689 enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
692 return nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
695 void nfp_flower_lag_init(struct nfp_fl_lag *lag)
697 INIT_DELAYED_WORK(&lag->work, nfp_fl_lag_do_work);
698 INIT_LIST_HEAD(&lag->group_list);
699 mutex_init(&lag->lock);
700 ida_init(&lag->ida_handle);
702 __skb_queue_head_init(&lag->retrans_skbs);
704 /* 0 is a reserved batch version so increment to first valid value. */
705 nfp_fl_increment_version(lag);
707 lag->lag_nb.notifier_call = nfp_fl_lag_netdev_event;
710 void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag)
712 struct nfp_fl_lag_group *entry, *storage;
714 cancel_delayed_work_sync(&lag->work);
716 __skb_queue_purge(&lag->retrans_skbs);
718 /* Remove all groups. */
719 mutex_lock(&lag->lock);
720 list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
721 list_del(&entry->list);
724 mutex_unlock(&lag->lock);
725 mutex_destroy(&lag->lock);
726 ida_destroy(&lag->ida_handle);