GNU Linux-libre 4.14.266-gnu1
[releases.git] / drivers / staging / lustre / lnet / klnds / socklnd / socklnd.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lnet/klnds/socklnd/socklnd.c
33  *
34  * Author: Zach Brown <zab@zabbo.net>
35  * Author: Peter J. Braam <braam@clusterfs.com>
36  * Author: Phil Schwan <phil@clusterfs.com>
37  * Author: Eric Barton <eric@bartonsoftware.com>
38  */
39
40 #include "socklnd.h"
41
42 static struct lnet_lnd the_ksocklnd;
43 struct ksock_nal_data ksocknal_data;
44
45 static struct ksock_interface *
46 ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip)
47 {
48         struct ksock_net *net = ni->ni_data;
49         int i;
50         struct ksock_interface *iface;
51
52         for (i = 0; i < net->ksnn_ninterfaces; i++) {
53                 LASSERT(i < LNET_MAX_INTERFACES);
54                 iface = &net->ksnn_interfaces[i];
55
56                 if (iface->ksni_ipaddr == ip)
57                         return iface;
58         }
59
60         return NULL;
61 }
62
63 static struct ksock_route *
64 ksocknal_create_route(__u32 ipaddr, int port)
65 {
66         struct ksock_route *route;
67
68         LIBCFS_ALLOC(route, sizeof(*route));
69         if (!route)
70                 return NULL;
71
72         atomic_set(&route->ksnr_refcount, 1);
73         route->ksnr_peer = NULL;
74         route->ksnr_retry_interval = 0;  /* OK to connect at any time */
75         route->ksnr_ipaddr = ipaddr;
76         route->ksnr_port = port;
77         route->ksnr_scheduled = 0;
78         route->ksnr_connecting = 0;
79         route->ksnr_connected = 0;
80         route->ksnr_deleted = 0;
81         route->ksnr_conn_count = 0;
82         route->ksnr_share_count = 0;
83
84         return route;
85 }
86
87 void
88 ksocknal_destroy_route(struct ksock_route *route)
89 {
90         LASSERT(!atomic_read(&route->ksnr_refcount));
91
92         if (route->ksnr_peer)
93                 ksocknal_peer_decref(route->ksnr_peer);
94
95         LIBCFS_FREE(route, sizeof(*route));
96 }
97
98 static int
99 ksocknal_create_peer(struct ksock_peer **peerp, struct lnet_ni *ni,
100                      struct lnet_process_id id)
101 {
102         int cpt = lnet_cpt_of_nid(id.nid);
103         struct ksock_net *net = ni->ni_data;
104         struct ksock_peer *peer;
105
106         LASSERT(id.nid != LNET_NID_ANY);
107         LASSERT(id.pid != LNET_PID_ANY);
108         LASSERT(!in_interrupt());
109
110         LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer));
111         if (!peer)
112                 return -ENOMEM;
113
114         peer->ksnp_ni = ni;
115         peer->ksnp_id = id;
116         atomic_set(&peer->ksnp_refcount, 1);   /* 1 ref for caller */
117         peer->ksnp_closing = 0;
118         peer->ksnp_accepting = 0;
119         peer->ksnp_proto = NULL;
120         peer->ksnp_last_alive = 0;
121         peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
122
123         INIT_LIST_HEAD(&peer->ksnp_conns);
124         INIT_LIST_HEAD(&peer->ksnp_routes);
125         INIT_LIST_HEAD(&peer->ksnp_tx_queue);
126         INIT_LIST_HEAD(&peer->ksnp_zc_req_list);
127         spin_lock_init(&peer->ksnp_lock);
128
129         spin_lock_bh(&net->ksnn_lock);
130
131         if (net->ksnn_shutdown) {
132                 spin_unlock_bh(&net->ksnn_lock);
133
134                 LIBCFS_FREE(peer, sizeof(*peer));
135                 CERROR("Can't create peer: network shutdown\n");
136                 return -ESHUTDOWN;
137         }
138
139         net->ksnn_npeers++;
140
141         spin_unlock_bh(&net->ksnn_lock);
142
143         *peerp = peer;
144         return 0;
145 }
146
147 void
148 ksocknal_destroy_peer(struct ksock_peer *peer)
149 {
150         struct ksock_net *net = peer->ksnp_ni->ni_data;
151
152         CDEBUG(D_NET, "peer %s %p deleted\n",
153                libcfs_id2str(peer->ksnp_id), peer);
154
155         LASSERT(!atomic_read(&peer->ksnp_refcount));
156         LASSERT(!peer->ksnp_accepting);
157         LASSERT(list_empty(&peer->ksnp_conns));
158         LASSERT(list_empty(&peer->ksnp_routes));
159         LASSERT(list_empty(&peer->ksnp_tx_queue));
160         LASSERT(list_empty(&peer->ksnp_zc_req_list));
161
162         LIBCFS_FREE(peer, sizeof(*peer));
163
164         /*
165          * NB a peer's connections and routes keep a reference on their peer
166          * until they are destroyed, so we can be assured that _all_ state to
167          * do with this peer has been cleaned up when its refcount drops to
168          * zero.
169          */
170         spin_lock_bh(&net->ksnn_lock);
171         net->ksnn_npeers--;
172         spin_unlock_bh(&net->ksnn_lock);
173 }
174
175 struct ksock_peer *
176 ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id)
177 {
178         struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
179         struct list_head *tmp;
180         struct ksock_peer *peer;
181
182         list_for_each(tmp, peer_list) {
183                 peer = list_entry(tmp, struct ksock_peer, ksnp_list);
184
185                 LASSERT(!peer->ksnp_closing);
186
187                 if (peer->ksnp_ni != ni)
188                         continue;
189
190                 if (peer->ksnp_id.nid != id.nid ||
191                     peer->ksnp_id.pid != id.pid)
192                         continue;
193
194                 CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
195                        peer, libcfs_id2str(id),
196                        atomic_read(&peer->ksnp_refcount));
197                 return peer;
198         }
199         return NULL;
200 }
201
202 struct ksock_peer *
203 ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id)
204 {
205         struct ksock_peer *peer;
206
207         read_lock(&ksocknal_data.ksnd_global_lock);
208         peer = ksocknal_find_peer_locked(ni, id);
209         if (peer)                       /* +1 ref for caller? */
210                 ksocknal_peer_addref(peer);
211         read_unlock(&ksocknal_data.ksnd_global_lock);
212
213         return peer;
214 }
215
216 static void
217 ksocknal_unlink_peer_locked(struct ksock_peer *peer)
218 {
219         int i;
220         __u32 ip;
221         struct ksock_interface *iface;
222
223         for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
224                 LASSERT(i < LNET_MAX_INTERFACES);
225                 ip = peer->ksnp_passive_ips[i];
226
227                 iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
228                 /*
229                  * All IPs in peer->ksnp_passive_ips[] come from the
230                  * interface list, therefore the call must succeed.
231                  */
232                 LASSERT(iface);
233
234                 CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
235                        peer, iface, iface->ksni_nroutes);
236                 iface->ksni_npeers--;
237         }
238
239         LASSERT(list_empty(&peer->ksnp_conns));
240         LASSERT(list_empty(&peer->ksnp_routes));
241         LASSERT(!peer->ksnp_closing);
242         peer->ksnp_closing = 1;
243         list_del(&peer->ksnp_list);
244         /* lose peerlist's ref */
245         ksocknal_peer_decref(peer);
246 }
247
248 static int
249 ksocknal_get_peer_info(struct lnet_ni *ni, int index,
250                        struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip,
251                        int *port, int *conn_count, int *share_count)
252 {
253         struct ksock_peer *peer;
254         struct list_head *ptmp;
255         struct ksock_route *route;
256         struct list_head *rtmp;
257         int i;
258         int j;
259         int rc = -ENOENT;
260
261         read_lock(&ksocknal_data.ksnd_global_lock);
262
263         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
264                 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
265                         peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
266
267                         if (peer->ksnp_ni != ni)
268                                 continue;
269
270                         if (!peer->ksnp_n_passive_ips &&
271                             list_empty(&peer->ksnp_routes)) {
272                                 if (index-- > 0)
273                                         continue;
274
275                                 *id = peer->ksnp_id;
276                                 *myip = 0;
277                                 *peer_ip = 0;
278                                 *port = 0;
279                                 *conn_count = 0;
280                                 *share_count = 0;
281                                 rc = 0;
282                                 goto out;
283                         }
284
285                         for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
286                                 if (index-- > 0)
287                                         continue;
288
289                                 *id = peer->ksnp_id;
290                                 *myip = peer->ksnp_passive_ips[j];
291                                 *peer_ip = 0;
292                                 *port = 0;
293                                 *conn_count = 0;
294                                 *share_count = 0;
295                                 rc = 0;
296                                 goto out;
297                         }
298
299                         list_for_each(rtmp, &peer->ksnp_routes) {
300                                 if (index-- > 0)
301                                         continue;
302
303                                 route = list_entry(rtmp, struct ksock_route,
304                                                    ksnr_list);
305
306                                 *id = peer->ksnp_id;
307                                 *myip = route->ksnr_myipaddr;
308                                 *peer_ip = route->ksnr_ipaddr;
309                                 *port = route->ksnr_port;
310                                 *conn_count = route->ksnr_conn_count;
311                                 *share_count = route->ksnr_share_count;
312                                 rc = 0;
313                                 goto out;
314                         }
315                 }
316         }
317  out:
318         read_unlock(&ksocknal_data.ksnd_global_lock);
319         return rc;
320 }
321
322 static void
323 ksocknal_associate_route_conn_locked(struct ksock_route *route,
324                                      struct ksock_conn *conn)
325 {
326         struct ksock_peer *peer = route->ksnr_peer;
327         int type = conn->ksnc_type;
328         struct ksock_interface *iface;
329
330         conn->ksnc_route = route;
331         ksocknal_route_addref(route);
332
333         if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
334                 if (!route->ksnr_myipaddr) {
335                         /* route wasn't bound locally yet (the initial route) */
336                         CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n",
337                                libcfs_id2str(peer->ksnp_id),
338                                &route->ksnr_ipaddr,
339                                &conn->ksnc_myipaddr);
340                 } else {
341                         CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h to %pI4h\n",
342                                libcfs_id2str(peer->ksnp_id),
343                                &route->ksnr_ipaddr,
344                                &route->ksnr_myipaddr,
345                                &conn->ksnc_myipaddr);
346
347                         iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
348                                                   route->ksnr_myipaddr);
349                         if (iface)
350                                 iface->ksni_nroutes--;
351                 }
352                 route->ksnr_myipaddr = conn->ksnc_myipaddr;
353                 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
354                                           route->ksnr_myipaddr);
355                 if (iface)
356                         iface->ksni_nroutes++;
357         }
358
359         route->ksnr_connected |= (1 << type);
360         route->ksnr_conn_count++;
361
362         /*
363          * Successful connection => further attempts can
364          * proceed immediately
365          */
366         route->ksnr_retry_interval = 0;
367 }
368
369 static void
370 ksocknal_add_route_locked(struct ksock_peer *peer, struct ksock_route *route)
371 {
372         struct list_head *tmp;
373         struct ksock_conn *conn;
374         struct ksock_route *route2;
375
376         LASSERT(!peer->ksnp_closing);
377         LASSERT(!route->ksnr_peer);
378         LASSERT(!route->ksnr_scheduled);
379         LASSERT(!route->ksnr_connecting);
380         LASSERT(!route->ksnr_connected);
381
382         /* LASSERT(unique) */
383         list_for_each(tmp, &peer->ksnp_routes) {
384                 route2 = list_entry(tmp, struct ksock_route, ksnr_list);
385
386                 if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
387                         CERROR("Duplicate route %s %pI4h\n",
388                                libcfs_id2str(peer->ksnp_id),
389                                &route->ksnr_ipaddr);
390                         LBUG();
391                 }
392         }
393
394         route->ksnr_peer = peer;
395         ksocknal_peer_addref(peer);
396         /* peer's routelist takes over my ref on 'route' */
397         list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
398
399         list_for_each(tmp, &peer->ksnp_conns) {
400                 conn = list_entry(tmp, struct ksock_conn, ksnc_list);
401
402                 if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
403                         continue;
404
405                 ksocknal_associate_route_conn_locked(route, conn);
406                 /* keep going (typed routes) */
407         }
408 }
409
410 static void
411 ksocknal_del_route_locked(struct ksock_route *route)
412 {
413         struct ksock_peer *peer = route->ksnr_peer;
414         struct ksock_interface *iface;
415         struct ksock_conn *conn;
416         struct list_head *ctmp;
417         struct list_head *cnxt;
418
419         LASSERT(!route->ksnr_deleted);
420
421         /* Close associated conns */
422         list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
423                 conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
424
425                 if (conn->ksnc_route != route)
426                         continue;
427
428                 ksocknal_close_conn_locked(conn, 0);
429         }
430
431         if (route->ksnr_myipaddr) {
432                 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
433                                           route->ksnr_myipaddr);
434                 if (iface)
435                         iface->ksni_nroutes--;
436         }
437
438         route->ksnr_deleted = 1;
439         list_del(&route->ksnr_list);
440         ksocknal_route_decref(route);        /* drop peer's ref */
441
442         if (list_empty(&peer->ksnp_routes) &&
443             list_empty(&peer->ksnp_conns)) {
444                 /*
445                  * I've just removed the last route to a peer with no active
446                  * connections
447                  */
448                 ksocknal_unlink_peer_locked(peer);
449         }
450 }
451
452 int
453 ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
454                   int port)
455 {
456         struct list_head *tmp;
457         struct ksock_peer *peer;
458         struct ksock_peer *peer2;
459         struct ksock_route *route;
460         struct ksock_route *route2;
461         int rc;
462
463         if (id.nid == LNET_NID_ANY ||
464             id.pid == LNET_PID_ANY)
465                 return -EINVAL;
466
467         /* Have a brand new peer ready... */
468         rc = ksocknal_create_peer(&peer, ni, id);
469         if (rc)
470                 return rc;
471
472         route = ksocknal_create_route(ipaddr, port);
473         if (!route) {
474                 ksocknal_peer_decref(peer);
475                 return -ENOMEM;
476         }
477
478         write_lock_bh(&ksocknal_data.ksnd_global_lock);
479
480         /* always called with a ref on ni, so shutdown can't have started */
481         LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
482
483         peer2 = ksocknal_find_peer_locked(ni, id);
484         if (peer2) {
485                 ksocknal_peer_decref(peer);
486                 peer = peer2;
487         } else {
488                 /* peer table takes my ref on peer */
489                 list_add_tail(&peer->ksnp_list,
490                               ksocknal_nid2peerlist(id.nid));
491         }
492
493         route2 = NULL;
494         list_for_each(tmp, &peer->ksnp_routes) {
495                 route2 = list_entry(tmp, struct ksock_route, ksnr_list);
496
497                 if (route2->ksnr_ipaddr == ipaddr)
498                         break;
499
500                 route2 = NULL;
501         }
502         if (!route2) {
503                 ksocknal_add_route_locked(peer, route);
504                 route->ksnr_share_count++;
505         } else {
506                 ksocknal_route_decref(route);
507                 route2->ksnr_share_count++;
508         }
509
510         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
511
512         return 0;
513 }
514
515 static void
516 ksocknal_del_peer_locked(struct ksock_peer *peer, __u32 ip)
517 {
518         struct ksock_conn *conn;
519         struct ksock_route *route;
520         struct list_head *tmp;
521         struct list_head *nxt;
522         int nshared;
523
524         LASSERT(!peer->ksnp_closing);
525
526         /* Extra ref prevents peer disappearing until I'm done with it */
527         ksocknal_peer_addref(peer);
528
529         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
530                 route = list_entry(tmp, struct ksock_route, ksnr_list);
531
532                 /* no match */
533                 if (!(!ip || route->ksnr_ipaddr == ip))
534                         continue;
535
536                 route->ksnr_share_count = 0;
537                 /* This deletes associated conns too */
538                 ksocknal_del_route_locked(route);
539         }
540
541         nshared = 0;
542         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
543                 route = list_entry(tmp, struct ksock_route, ksnr_list);
544                 nshared += route->ksnr_share_count;
545         }
546
547         if (!nshared) {
548                 /*
549                  * remove everything else if there are no explicit entries
550                  * left
551                  */
552                 list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
553                         route = list_entry(tmp, struct ksock_route, ksnr_list);
554
555                         /* we should only be removing auto-entries */
556                         LASSERT(!route->ksnr_share_count);
557                         ksocknal_del_route_locked(route);
558                 }
559
560                 list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
561                         conn = list_entry(tmp, struct ksock_conn, ksnc_list);
562
563                         ksocknal_close_conn_locked(conn, 0);
564                 }
565         }
566
567         ksocknal_peer_decref(peer);
568         /* NB peer unlinks itself when last conn/route is removed */
569 }
570
571 static int
572 ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip)
573 {
574         LIST_HEAD(zombies);
575         struct list_head *ptmp;
576         struct list_head *pnxt;
577         struct ksock_peer *peer;
578         int lo;
579         int hi;
580         int i;
581         int rc = -ENOENT;
582
583         write_lock_bh(&ksocknal_data.ksnd_global_lock);
584
585         if (id.nid != LNET_NID_ANY) {
586                 lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
587                 hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
588         } else {
589                 lo = 0;
590                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
591         }
592
593         for (i = lo; i <= hi; i++) {
594                 list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
595                         peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
596
597                         if (peer->ksnp_ni != ni)
598                                 continue;
599
600                         if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
601                               (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
602                                 continue;
603
604                         ksocknal_peer_addref(peer);     /* a ref for me... */
605
606                         ksocknal_del_peer_locked(peer, ip);
607
608                         if (peer->ksnp_closing &&
609                             !list_empty(&peer->ksnp_tx_queue)) {
610                                 LASSERT(list_empty(&peer->ksnp_conns));
611                                 LASSERT(list_empty(&peer->ksnp_routes));
612
613                                 list_splice_init(&peer->ksnp_tx_queue,
614                                                  &zombies);
615                         }
616
617                         ksocknal_peer_decref(peer);     /* ...till here */
618
619                         rc = 0;          /* matched! */
620                 }
621         }
622
623         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
624
625         ksocknal_txlist_done(ni, &zombies, 1);
626
627         return rc;
628 }
629
630 static struct ksock_conn *
631 ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index)
632 {
633         struct ksock_peer *peer;
634         struct list_head *ptmp;
635         struct ksock_conn *conn;
636         struct list_head *ctmp;
637         int i;
638
639         read_lock(&ksocknal_data.ksnd_global_lock);
640
641         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
642                 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
643                         peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
644
645                         LASSERT(!peer->ksnp_closing);
646
647                         if (peer->ksnp_ni != ni)
648                                 continue;
649
650                         list_for_each(ctmp, &peer->ksnp_conns) {
651                                 if (index-- > 0)
652                                         continue;
653
654                                 conn = list_entry(ctmp, struct ksock_conn,
655                                                   ksnc_list);
656                                 ksocknal_conn_addref(conn);
657                                 read_unlock(&ksocknal_data.ksnd_global_lock);
658                                 return conn;
659                         }
660                 }
661         }
662
663         read_unlock(&ksocknal_data.ksnd_global_lock);
664         return NULL;
665 }
666
667 static struct ksock_sched *
668 ksocknal_choose_scheduler_locked(unsigned int cpt)
669 {
670         struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt];
671         struct ksock_sched *sched;
672         int i;
673
674         LASSERT(info->ksi_nthreads > 0);
675
676         sched = &info->ksi_scheds[0];
677         /*
678          * NB: it's safe so far, but info->ksi_nthreads could be changed
679          * at runtime when we have dynamic LNet configuration, then we
680          * need to take care of this.
681          */
682         for (i = 1; i < info->ksi_nthreads; i++) {
683                 if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
684                         sched = &info->ksi_scheds[i];
685         }
686
687         return sched;
688 }
689
690 static int
691 ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs)
692 {
693         struct ksock_net *net = ni->ni_data;
694         int i;
695         int nip;
696
697         read_lock(&ksocknal_data.ksnd_global_lock);
698
699         nip = net->ksnn_ninterfaces;
700         LASSERT(nip <= LNET_MAX_INTERFACES);
701
702         /*
703          * Only offer interfaces for additional connections if I have
704          * more than one.
705          */
706         if (nip < 2) {
707                 read_unlock(&ksocknal_data.ksnd_global_lock);
708                 return 0;
709         }
710
711         for (i = 0; i < nip; i++) {
712                 ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
713                 LASSERT(ipaddrs[i]);
714         }
715
716         read_unlock(&ksocknal_data.ksnd_global_lock);
717         return nip;
718 }
719
720 static int
721 ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips)
722 {
723         int best_netmatch = 0;
724         int best_xor      = 0;
725         int best          = -1;
726         int this_xor;
727         int this_netmatch;
728         int i;
729
730         for (i = 0; i < nips; i++) {
731                 if (!ips[i])
732                         continue;
733
734                 this_xor = ips[i] ^ iface->ksni_ipaddr;
735                 this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0;
736
737                 if (!(best < 0 ||
738                       best_netmatch < this_netmatch ||
739                       (best_netmatch == this_netmatch &&
740                        best_xor > this_xor)))
741                         continue;
742
743                 best = i;
744                 best_netmatch = this_netmatch;
745                 best_xor = this_xor;
746         }
747
748         LASSERT(best >= 0);
749         return best;
750 }
751
752 static int
753 ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips)
754 {
755         rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
756         struct ksock_net *net = peer->ksnp_ni->ni_data;
757         struct ksock_interface *iface;
758         struct ksock_interface *best_iface;
759         int n_ips;
760         int i;
761         int j;
762         int k;
763         __u32 ip;
764         __u32 xor;
765         int this_netmatch;
766         int best_netmatch;
767         int best_npeers;
768
769         /*
770          * CAVEAT EMPTOR: We do all our interface matching with an
771          * exclusive hold of global lock at IRQ priority.  We're only
772          * expecting to be dealing with small numbers of interfaces, so the
773          * O(n**3)-ness shouldn't matter
774          */
775         /*
776          * Also note that I'm not going to return more than n_peerips
777          * interfaces, even if I have more myself
778          */
779         write_lock_bh(global_lock);
780
781         LASSERT(n_peerips <= LNET_MAX_INTERFACES);
782         LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
783
784         /*
785          * Only match interfaces for additional connections
786          * if I have > 1 interface
787          */
788         n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
789                 min(n_peerips, net->ksnn_ninterfaces);
790
791         for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
792                 /*            ^ yes really... */
793
794                 /*
795                  * If we have any new interfaces, first tick off all the
796                  * peer IPs that match old interfaces, then choose new
797                  * interfaces to match the remaining peer IPS.
798                  * We don't forget interfaces we've stopped using; we might
799                  * start using them again...
800                  */
801                 if (i < peer->ksnp_n_passive_ips) {
802                         /* Old interface. */
803                         ip = peer->ksnp_passive_ips[i];
804                         best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
805
806                         /* peer passive ips are kept up to date */
807                         LASSERT(best_iface);
808                 } else {
809                         /* choose a new interface */
810                         LASSERT(i == peer->ksnp_n_passive_ips);
811
812                         best_iface = NULL;
813                         best_netmatch = 0;
814                         best_npeers = 0;
815
816                         for (j = 0; j < net->ksnn_ninterfaces; j++) {
817                                 iface = &net->ksnn_interfaces[j];
818                                 ip = iface->ksni_ipaddr;
819
820                                 for (k = 0; k < peer->ksnp_n_passive_ips; k++)
821                                         if (peer->ksnp_passive_ips[k] == ip)
822                                                 break;
823
824                                 if (k < peer->ksnp_n_passive_ips) /* using it already */
825                                         continue;
826
827                                 k = ksocknal_match_peerip(iface, peerips,
828                                                           n_peerips);
829                                 xor = ip ^ peerips[k];
830                                 this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0;
831
832                                 if (!(!best_iface ||
833                                       best_netmatch < this_netmatch ||
834                                       (best_netmatch == this_netmatch &&
835                                        best_npeers > iface->ksni_npeers)))
836                                         continue;
837
838                                 best_iface = iface;
839                                 best_netmatch = this_netmatch;
840                                 best_npeers = iface->ksni_npeers;
841                         }
842
843                         LASSERT(best_iface);
844
845                         best_iface->ksni_npeers++;
846                         ip = best_iface->ksni_ipaddr;
847                         peer->ksnp_passive_ips[i] = ip;
848                         peer->ksnp_n_passive_ips = i + 1;
849                 }
850
851                 /* mark the best matching peer IP used */
852                 j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
853                 peerips[j] = 0;
854         }
855
856         /* Overwrite input peer IP addresses */
857         memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
858
859         write_unlock_bh(global_lock);
860
861         return n_ips;
862 }
863
864 static void
865 ksocknal_create_routes(struct ksock_peer *peer, int port,
866                        __u32 *peer_ipaddrs, int npeer_ipaddrs)
867 {
868         struct ksock_route *newroute = NULL;
869         rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
870         struct lnet_ni *ni = peer->ksnp_ni;
871         struct ksock_net *net = ni->ni_data;
872         struct list_head *rtmp;
873         struct ksock_route *route;
874         struct ksock_interface *iface;
875         struct ksock_interface *best_iface;
876         int best_netmatch;
877         int this_netmatch;
878         int best_nroutes;
879         int i;
880         int j;
881
882         /*
883          * CAVEAT EMPTOR: We do all our interface matching with an
884          * exclusive hold of global lock at IRQ priority.  We're only
885          * expecting to be dealing with small numbers of interfaces, so the
886          * O(n**3)-ness here shouldn't matter
887          */
888         write_lock_bh(global_lock);
889
890         if (net->ksnn_ninterfaces < 2) {
891                 /*
892                  * Only create additional connections
893                  * if I have > 1 interface
894                  */
895                 write_unlock_bh(global_lock);
896                 return;
897         }
898
899         LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES);
900
901         for (i = 0; i < npeer_ipaddrs; i++) {
902                 if (newroute) {
903                         newroute->ksnr_ipaddr = peer_ipaddrs[i];
904                 } else {
905                         write_unlock_bh(global_lock);
906
907                         newroute = ksocknal_create_route(peer_ipaddrs[i], port);
908                         if (!newroute)
909                                 return;
910
911                         write_lock_bh(global_lock);
912                 }
913
914                 if (peer->ksnp_closing) {
915                         /* peer got closed under me */
916                         break;
917                 }
918
919                 /* Already got a route? */
920                 route = NULL;
921                 list_for_each(rtmp, &peer->ksnp_routes) {
922                         route = list_entry(rtmp, struct ksock_route, ksnr_list);
923
924                         if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
925                                 break;
926
927                         route = NULL;
928                 }
929                 if (route)
930                         continue;
931
932                 best_iface = NULL;
933                 best_nroutes = 0;
934                 best_netmatch = 0;
935
936                 LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
937
938                 /* Select interface to connect from */
939                 for (j = 0; j < net->ksnn_ninterfaces; j++) {
940                         iface = &net->ksnn_interfaces[j];
941
942                         /* Using this interface already? */
943                         list_for_each(rtmp, &peer->ksnp_routes) {
944                                 route = list_entry(rtmp, struct ksock_route,
945                                                    ksnr_list);
946
947                                 if (route->ksnr_myipaddr == iface->ksni_ipaddr)
948                                         break;
949
950                                 route = NULL;
951                         }
952                         if (route)
953                                 continue;
954
955                         this_netmatch = (!((iface->ksni_ipaddr ^
956                                            newroute->ksnr_ipaddr) &
957                                            iface->ksni_netmask)) ? 1 : 0;
958
959                         if (!(!best_iface ||
960                               best_netmatch < this_netmatch ||
961                               (best_netmatch == this_netmatch &&
962                                best_nroutes > iface->ksni_nroutes)))
963                                 continue;
964
965                         best_iface = iface;
966                         best_netmatch = this_netmatch;
967                         best_nroutes = iface->ksni_nroutes;
968                 }
969
970                 if (!best_iface)
971                         continue;
972
973                 newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
974                 best_iface->ksni_nroutes++;
975
976                 ksocknal_add_route_locked(peer, newroute);
977                 newroute = NULL;
978         }
979
980         write_unlock_bh(global_lock);
981         if (newroute)
982                 ksocknal_route_decref(newroute);
983 }
984
985 int
986 ksocknal_accept(struct lnet_ni *ni, struct socket *sock)
987 {
988         struct ksock_connreq *cr;
989         int rc;
990         __u32 peer_ip;
991         int peer_port;
992
993         rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
994         LASSERT(!rc);                 /* we succeeded before */
995
996         LIBCFS_ALLOC(cr, sizeof(*cr));
997         if (!cr) {
998                 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n",
999                                    &peer_ip);
1000                 return -ENOMEM;
1001         }
1002
1003         lnet_ni_addref(ni);
1004         cr->ksncr_ni   = ni;
1005         cr->ksncr_sock = sock;
1006
1007         spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
1008
1009         list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
1010         wake_up(&ksocknal_data.ksnd_connd_waitq);
1011
1012         spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
1013         return 0;
1014 }
1015
1016 static int
1017 ksocknal_connecting(struct ksock_peer *peer, __u32 ipaddr)
1018 {
1019         struct ksock_route *route;
1020
1021         list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
1022                 if (route->ksnr_ipaddr == ipaddr)
1023                         return route->ksnr_connecting;
1024         }
1025         return 0;
1026 }
1027
1028 int
1029 ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
1030                      struct socket *sock, int type)
1031 {
1032         rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
1033         LIST_HEAD(zombies);
1034         struct lnet_process_id peerid;
1035         struct list_head *tmp;
1036         __u64 incarnation;
1037         struct ksock_conn *conn;
1038         struct ksock_conn *conn2;
1039         struct ksock_peer *peer = NULL;
1040         struct ksock_peer *peer2;
1041         struct ksock_sched *sched;
1042         struct ksock_hello_msg *hello;
1043         int cpt;
1044         struct ksock_tx *tx;
1045         struct ksock_tx *txtmp;
1046         int rc;
1047         int active;
1048         char *warn = NULL;
1049
1050         active = !!route;
1051
1052         LASSERT(active == (type != SOCKLND_CONN_NONE));
1053
1054         LIBCFS_ALLOC(conn, sizeof(*conn));
1055         if (!conn) {
1056                 rc = -ENOMEM;
1057                 goto failed_0;
1058         }
1059
1060         conn->ksnc_peer = NULL;
1061         conn->ksnc_route = NULL;
1062         conn->ksnc_sock = sock;
1063         /*
1064          * 2 ref, 1 for conn, another extra ref prevents socket
1065          * being closed before establishment of connection
1066          */
1067         atomic_set(&conn->ksnc_sock_refcount, 2);
1068         conn->ksnc_type = type;
1069         ksocknal_lib_save_callback(sock, conn);
1070         atomic_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
1071
1072         conn->ksnc_rx_ready = 0;
1073         conn->ksnc_rx_scheduled = 0;
1074
1075         INIT_LIST_HEAD(&conn->ksnc_tx_queue);
1076         conn->ksnc_tx_ready = 0;
1077         conn->ksnc_tx_scheduled = 0;
1078         conn->ksnc_tx_carrier = NULL;
1079         atomic_set(&conn->ksnc_tx_nob, 0);
1080
1081         LIBCFS_ALLOC(hello, offsetof(struct ksock_hello_msg,
1082                                      kshm_ips[LNET_MAX_INTERFACES]));
1083         if (!hello) {
1084                 rc = -ENOMEM;
1085                 goto failed_1;
1086         }
1087
1088         /* stash conn's local and remote addrs */
1089         rc = ksocknal_lib_get_conn_addrs(conn);
1090         if (rc)
1091                 goto failed_1;
1092
1093         /*
1094          * Find out/confirm peer's NID and connection type and get the
1095          * vector of interfaces she's willing to let me connect to.
1096          * Passive connections use the listener timeout since the peer sends
1097          * eagerly
1098          */
1099         if (active) {
1100                 peer = route->ksnr_peer;
1101                 LASSERT(ni == peer->ksnp_ni);
1102
1103                 /* Active connection sends HELLO eagerly */
1104                 hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
1105                 peerid = peer->ksnp_id;
1106
1107                 write_lock_bh(global_lock);
1108                 conn->ksnc_proto = peer->ksnp_proto;
1109                 write_unlock_bh(global_lock);
1110
1111                 if (!conn->ksnc_proto) {
1112                         conn->ksnc_proto = &ksocknal_protocol_v3x;
1113 #if SOCKNAL_VERSION_DEBUG
1114                         if (*ksocknal_tunables.ksnd_protocol == 2)
1115                                 conn->ksnc_proto = &ksocknal_protocol_v2x;
1116                         else if (*ksocknal_tunables.ksnd_protocol == 1)
1117                                 conn->ksnc_proto = &ksocknal_protocol_v1x;
1118 #endif
1119                 }
1120
1121                 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1122                 if (rc)
1123                         goto failed_1;
1124         } else {
1125                 peerid.nid = LNET_NID_ANY;
1126                 peerid.pid = LNET_PID_ANY;
1127
1128                 /* Passive, get protocol from peer */
1129                 conn->ksnc_proto = NULL;
1130         }
1131
1132         rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation);
1133         if (rc < 0)
1134                 goto failed_1;
1135
1136         LASSERT(!rc || active);
1137         LASSERT(conn->ksnc_proto);
1138         LASSERT(peerid.nid != LNET_NID_ANY);
1139
1140         cpt = lnet_cpt_of_nid(peerid.nid);
1141
1142         if (active) {
1143                 ksocknal_peer_addref(peer);
1144                 write_lock_bh(global_lock);
1145         } else {
1146                 rc = ksocknal_create_peer(&peer, ni, peerid);
1147                 if (rc)
1148                         goto failed_1;
1149
1150                 write_lock_bh(global_lock);
1151
1152                 /* called with a ref on ni, so shutdown can't have started */
1153                 LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
1154
1155                 peer2 = ksocknal_find_peer_locked(ni, peerid);
1156                 if (!peer2) {
1157                         /*
1158                          * NB this puts an "empty" peer in the peer
1159                          * table (which takes my ref)
1160                          */
1161                         list_add_tail(&peer->ksnp_list,
1162                                       ksocknal_nid2peerlist(peerid.nid));
1163                 } else {
1164                         ksocknal_peer_decref(peer);
1165                         peer = peer2;
1166                 }
1167
1168                 /* +1 ref for me */
1169                 ksocknal_peer_addref(peer);
1170                 peer->ksnp_accepting++;
1171
1172                 /*
1173                  * Am I already connecting to this guy?  Resolve in
1174                  * favour of higher NID...
1175                  */
1176                 if (peerid.nid < ni->ni_nid &&
1177                     ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
1178                         rc = EALREADY;
1179                         warn = "connection race resolution";
1180                         goto failed_2;
1181                 }
1182         }
1183
1184         if (peer->ksnp_closing ||
1185             (active && route->ksnr_deleted)) {
1186                 /* peer/route got closed under me */
1187                 rc = -ESTALE;
1188                 warn = "peer/route removed";
1189                 goto failed_2;
1190         }
1191
1192         if (!peer->ksnp_proto) {
1193                 /*
1194                  * Never connected before.
1195                  * NB recv_hello may have returned EPROTO to signal my peer
1196                  * wants a different protocol than the one I asked for.
1197                  */
1198                 LASSERT(list_empty(&peer->ksnp_conns));
1199
1200                 peer->ksnp_proto = conn->ksnc_proto;
1201                 peer->ksnp_incarnation = incarnation;
1202         }
1203
1204         if (peer->ksnp_proto != conn->ksnc_proto ||
1205             peer->ksnp_incarnation != incarnation) {
1206                 /* Peer rebooted or I've got the wrong protocol version */
1207                 ksocknal_close_peer_conns_locked(peer, 0, 0);
1208
1209                 peer->ksnp_proto = NULL;
1210                 rc = ESTALE;
1211                 warn = peer->ksnp_incarnation != incarnation ?
1212                        "peer rebooted" :
1213                        "wrong proto version";
1214                 goto failed_2;
1215         }
1216
1217         switch (rc) {
1218         default:
1219                 LBUG();
1220         case 0:
1221                 break;
1222         case EALREADY:
1223                 warn = "lost conn race";
1224                 goto failed_2;
1225         case EPROTO:
1226                 warn = "retry with different protocol version";
1227                 goto failed_2;
1228         }
1229
1230         /*
1231          * Refuse to duplicate an existing connection, unless this is a
1232          * loopback connection
1233          */
1234         if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
1235                 list_for_each(tmp, &peer->ksnp_conns) {
1236                         conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
1237
1238                         if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
1239                             conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
1240                             conn2->ksnc_type != conn->ksnc_type)
1241                                 continue;
1242
1243                         /*
1244                          * Reply on a passive connection attempt so the peer
1245                          * realises we're connected.
1246                          */
1247                         LASSERT(!rc);
1248                         if (!active)
1249                                 rc = EALREADY;
1250
1251                         warn = "duplicate";
1252                         goto failed_2;
1253                 }
1254         }
1255
1256         /*
1257          * If the connection created by this route didn't bind to the IP
1258          * address the route connected to, the connection/route matching
1259          * code below probably isn't going to work.
1260          */
1261         if (active &&
1262             route->ksnr_ipaddr != conn->ksnc_ipaddr) {
1263                 CERROR("Route %s %pI4h connected to %pI4h\n",
1264                        libcfs_id2str(peer->ksnp_id),
1265                        &route->ksnr_ipaddr,
1266                        &conn->ksnc_ipaddr);
1267         }
1268
1269         /*
1270          * Search for a route corresponding to the new connection and
1271          * create an association.  This allows incoming connections created
1272          * by routes in my peer to match my own route entries so I don't
1273          * continually create duplicate routes.
1274          */
1275         list_for_each(tmp, &peer->ksnp_routes) {
1276                 route = list_entry(tmp, struct ksock_route, ksnr_list);
1277
1278                 if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
1279                         continue;
1280
1281                 ksocknal_associate_route_conn_locked(route, conn);
1282                 break;
1283         }
1284
1285         conn->ksnc_peer = peer;          /* conn takes my ref on peer */
1286         peer->ksnp_last_alive = cfs_time_current();
1287         peer->ksnp_send_keepalive = 0;
1288         peer->ksnp_error = 0;
1289
1290         sched = ksocknal_choose_scheduler_locked(cpt);
1291         sched->kss_nconns++;
1292         conn->ksnc_scheduler = sched;
1293
1294         conn->ksnc_tx_last_post = cfs_time_current();
1295         /* Set the deadline for the outgoing HELLO to drain */
1296         conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
1297         conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1298         mb();   /* order with adding to peer's conn list */
1299
1300         list_add(&conn->ksnc_list, &peer->ksnp_conns);
1301         ksocknal_conn_addref(conn);
1302
1303         ksocknal_new_packet(conn, 0);
1304
1305         conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
1306
1307         /* Take packets blocking for this connection. */
1308         list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
1309                 int match = conn->ksnc_proto->pro_match_tx(conn, tx,
1310                                                            tx->tx_nonblk);
1311
1312                 if (match == SOCKNAL_MATCH_NO)
1313                         continue;
1314
1315                 list_del(&tx->tx_list);
1316                 ksocknal_queue_tx_locked(tx, conn);
1317         }
1318
1319         write_unlock_bh(global_lock);
1320
1321         /*
1322          * We've now got a new connection.  Any errors from here on are just
1323          * like "normal" comms errors and we close the connection normally.
1324          * NB (a) we still have to send the reply HELLO for passive
1325          *      connections,
1326          *    (b) normal I/O on the conn is blocked until I setup and call the
1327          *      socket callbacks.
1328          */
1329         CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n",
1330                libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
1331                &conn->ksnc_myipaddr, &conn->ksnc_ipaddr,
1332                conn->ksnc_port, incarnation, cpt,
1333                (int)(sched - &sched->kss_info->ksi_scheds[0]));
1334
1335         if (active) {
1336                 /* additional routes after interface exchange? */
1337                 ksocknal_create_routes(peer, conn->ksnc_port,
1338                                        hello->kshm_ips, hello->kshm_nips);
1339         } else {
1340                 hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
1341                                                        hello->kshm_nips);
1342                 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1343         }
1344
1345         LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg,
1346                                     kshm_ips[LNET_MAX_INTERFACES]));
1347
1348         /*
1349          * setup the socket AFTER I've received hello (it disables
1350          * SO_LINGER).  I might call back to the acceptor who may want
1351          * to send a protocol version response and then close the
1352          * socket; this ensures the socket only tears down after the
1353          * response has been sent.
1354          */
1355         if (!rc)
1356                 rc = ksocknal_lib_setup_sock(sock);
1357
1358         write_lock_bh(global_lock);
1359
1360         /* NB my callbacks block while I hold ksnd_global_lock */
1361         ksocknal_lib_set_callback(sock, conn);
1362
1363         if (!active)
1364                 peer->ksnp_accepting--;
1365
1366         write_unlock_bh(global_lock);
1367
1368         if (rc) {
1369                 write_lock_bh(global_lock);
1370                 if (!conn->ksnc_closing) {
1371                         /* could be closed by another thread */
1372                         ksocknal_close_conn_locked(conn, rc);
1373                 }
1374                 write_unlock_bh(global_lock);
1375         } else if (!ksocknal_connsock_addref(conn)) {
1376                 /* Allow I/O to proceed. */
1377                 ksocknal_read_callback(conn);
1378                 ksocknal_write_callback(conn);
1379                 ksocknal_connsock_decref(conn);
1380         }
1381
1382         ksocknal_connsock_decref(conn);
1383         ksocknal_conn_decref(conn);
1384         return rc;
1385
1386  failed_2:
1387         if (!peer->ksnp_closing &&
1388             list_empty(&peer->ksnp_conns) &&
1389             list_empty(&peer->ksnp_routes)) {
1390                 list_add(&zombies, &peer->ksnp_tx_queue);
1391                 list_del_init(&peer->ksnp_tx_queue);
1392                 ksocknal_unlink_peer_locked(peer);
1393         }
1394
1395         write_unlock_bh(global_lock);
1396
1397         if (warn) {
1398                 if (rc < 0)
1399                         CERROR("Not creating conn %s type %d: %s\n",
1400                                libcfs_id2str(peerid), conn->ksnc_type, warn);
1401                 else
1402                         CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
1403                                libcfs_id2str(peerid), conn->ksnc_type, warn);
1404         }
1405
1406         if (!active) {
1407                 if (rc > 0) {
1408                         /*
1409                          * Request retry by replying with CONN_NONE
1410                          * ksnc_proto has been set already
1411                          */
1412                         conn->ksnc_type = SOCKLND_CONN_NONE;
1413                         hello->kshm_nips = 0;
1414                         ksocknal_send_hello(ni, conn, peerid.nid, hello);
1415                 }
1416
1417                 write_lock_bh(global_lock);
1418                 peer->ksnp_accepting--;
1419                 write_unlock_bh(global_lock);
1420         }
1421
1422         ksocknal_txlist_done(ni, &zombies, 1);
1423         ksocknal_peer_decref(peer);
1424
1425 failed_1:
1426         if (hello)
1427                 LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg,
1428                                             kshm_ips[LNET_MAX_INTERFACES]));
1429
1430         LIBCFS_FREE(conn, sizeof(*conn));
1431
1432 failed_0:
1433         sock_release(sock);
1434         return rc;
1435 }
1436
1437 void
1438 ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
1439 {
1440         /*
1441          * This just does the immmediate housekeeping, and queues the
1442          * connection for the reaper to terminate.
1443          * Caller holds ksnd_global_lock exclusively in irq context
1444          */
1445         struct ksock_peer *peer = conn->ksnc_peer;
1446         struct ksock_route *route;
1447         struct ksock_conn *conn2;
1448         struct list_head *tmp;
1449
1450         LASSERT(!peer->ksnp_error);
1451         LASSERT(!conn->ksnc_closing);
1452         conn->ksnc_closing = 1;
1453
1454         /* ksnd_deathrow_conns takes over peer's ref */
1455         list_del(&conn->ksnc_list);
1456
1457         route = conn->ksnc_route;
1458         if (route) {
1459                 /* dissociate conn from route... */
1460                 LASSERT(!route->ksnr_deleted);
1461                 LASSERT(route->ksnr_connected & (1 << conn->ksnc_type));
1462
1463                 conn2 = NULL;
1464                 list_for_each(tmp, &peer->ksnp_conns) {
1465                         conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
1466
1467                         if (conn2->ksnc_route == route &&
1468                             conn2->ksnc_type == conn->ksnc_type)
1469                                 break;
1470
1471                         conn2 = NULL;
1472                 }
1473                 if (!conn2)
1474                         route->ksnr_connected &= ~(1 << conn->ksnc_type);
1475
1476                 conn->ksnc_route = NULL;
1477
1478                 ksocknal_route_decref(route);     /* drop conn's ref on route */
1479         }
1480
1481         if (list_empty(&peer->ksnp_conns)) {
1482                 /* No more connections to this peer */
1483
1484                 if (!list_empty(&peer->ksnp_tx_queue)) {
1485                         struct ksock_tx *tx;
1486
1487                         LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
1488
1489                         /*
1490                          * throw them to the last connection...,
1491                          * these TXs will be send to /dev/null by scheduler
1492                          */
1493                         list_for_each_entry(tx, &peer->ksnp_tx_queue,
1494                                             tx_list)
1495                                 ksocknal_tx_prep(conn, tx);
1496
1497                         spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
1498                         list_splice_init(&peer->ksnp_tx_queue,
1499                                          &conn->ksnc_tx_queue);
1500                         spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
1501                 }
1502
1503                 peer->ksnp_proto = NULL;  /* renegotiate protocol version */
1504                 peer->ksnp_error = error; /* stash last conn close reason */
1505
1506                 if (list_empty(&peer->ksnp_routes)) {
1507                         /*
1508                          * I've just closed last conn belonging to a
1509                          * peer with no routes to it
1510                          */
1511                         ksocknal_unlink_peer_locked(peer);
1512                 }
1513         }
1514
1515         spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1516
1517         list_add_tail(&conn->ksnc_list,
1518                       &ksocknal_data.ksnd_deathrow_conns);
1519         wake_up(&ksocknal_data.ksnd_reaper_waitq);
1520
1521         spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1522 }
1523
1524 void
1525 ksocknal_peer_failed(struct ksock_peer *peer)
1526 {
1527         int notify = 0;
1528         unsigned long last_alive = 0;
1529
1530         /*
1531          * There has been a connection failure or comms error; but I'll only
1532          * tell LNET I think the peer is dead if it's to another kernel and
1533          * there are no connections or connection attempts in existence.
1534          */
1535         read_lock(&ksocknal_data.ksnd_global_lock);
1536
1537         if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) &&
1538             list_empty(&peer->ksnp_conns) &&
1539             !peer->ksnp_accepting &&
1540             !ksocknal_find_connecting_route_locked(peer)) {
1541                 notify = 1;
1542                 last_alive = peer->ksnp_last_alive;
1543         }
1544
1545         read_unlock(&ksocknal_data.ksnd_global_lock);
1546
1547         if (notify)
1548                 lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0,
1549                             last_alive);
1550 }
1551
1552 void
1553 ksocknal_finalize_zcreq(struct ksock_conn *conn)
1554 {
1555         struct ksock_peer *peer = conn->ksnc_peer;
1556         struct ksock_tx *tx;
1557         struct ksock_tx *temp;
1558         struct ksock_tx *tmp;
1559         LIST_HEAD(zlist);
1560
1561         /*
1562          * NB safe to finalize TXs because closing of socket will
1563          * abort all buffered data
1564          */
1565         LASSERT(!conn->ksnc_sock);
1566
1567         spin_lock(&peer->ksnp_lock);
1568
1569         list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
1570                 if (tx->tx_conn != conn)
1571                         continue;
1572
1573                 LASSERT(tx->tx_msg.ksm_zc_cookies[0]);
1574
1575                 tx->tx_msg.ksm_zc_cookies[0] = 0;
1576                 tx->tx_zc_aborted = 1; /* mark it as not-acked */
1577                 list_del(&tx->tx_zc_list);
1578                 list_add(&tx->tx_zc_list, &zlist);
1579         }
1580
1581         spin_unlock(&peer->ksnp_lock);
1582
1583         list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
1584                 list_del(&tx->tx_zc_list);
1585                 ksocknal_tx_decref(tx);
1586         }
1587 }
1588
1589 void
1590 ksocknal_terminate_conn(struct ksock_conn *conn)
1591 {
1592         /*
1593          * This gets called by the reaper (guaranteed thread context) to
1594          * disengage the socket from its callbacks and close it.
1595          * ksnc_refcount will eventually hit zero, and then the reaper will
1596          * destroy it.
1597          */
1598         struct ksock_peer *peer = conn->ksnc_peer;
1599         struct ksock_sched *sched = conn->ksnc_scheduler;
1600         int failed = 0;
1601
1602         LASSERT(conn->ksnc_closing);
1603
1604         /* wake up the scheduler to "send" all remaining packets to /dev/null */
1605         spin_lock_bh(&sched->kss_lock);
1606
1607         /* a closing conn is always ready to tx */
1608         conn->ksnc_tx_ready = 1;
1609
1610         if (!conn->ksnc_tx_scheduled &&
1611             !list_empty(&conn->ksnc_tx_queue)) {
1612                 list_add_tail(&conn->ksnc_tx_list,
1613                               &sched->kss_tx_conns);
1614                 conn->ksnc_tx_scheduled = 1;
1615                 /* extra ref for scheduler */
1616                 ksocknal_conn_addref(conn);
1617
1618                 wake_up(&sched->kss_waitq);
1619         }
1620
1621         spin_unlock_bh(&sched->kss_lock);
1622
1623         /* serialise with callbacks */
1624         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1625
1626         ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
1627
1628         /*
1629          * OK, so this conn may not be completely disengaged from its
1630          * scheduler yet, but it _has_ committed to terminate...
1631          */
1632         conn->ksnc_scheduler->kss_nconns--;
1633
1634         if (peer->ksnp_error) {
1635                 /* peer's last conn closed in error */
1636                 LASSERT(list_empty(&peer->ksnp_conns));
1637                 failed = 1;
1638                 peer->ksnp_error = 0;     /* avoid multiple notifications */
1639         }
1640
1641         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1642
1643         if (failed)
1644                 ksocknal_peer_failed(peer);
1645
1646         /*
1647          * The socket is closed on the final put; either here, or in
1648          * ksocknal_{send,recv}msg().  Since we set up the linger2 option
1649          * when the connection was established, this will close the socket
1650          * immediately, aborting anything buffered in it. Any hung
1651          * zero-copy transmits will therefore complete in finite time.
1652          */
1653         ksocknal_connsock_decref(conn);
1654 }
1655
1656 void
1657 ksocknal_queue_zombie_conn(struct ksock_conn *conn)
1658 {
1659         /* Queue the conn for the reaper to destroy */
1660
1661         LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
1662         spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1663
1664         list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
1665         wake_up(&ksocknal_data.ksnd_reaper_waitq);
1666
1667         spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1668 }
1669
1670 void
1671 ksocknal_destroy_conn(struct ksock_conn *conn)
1672 {
1673         unsigned long last_rcv;
1674
1675         /* Final coup-de-grace of the reaper */
1676         CDEBUG(D_NET, "connection %p\n", conn);
1677
1678         LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
1679         LASSERT(!atomic_read(&conn->ksnc_sock_refcount));
1680         LASSERT(!conn->ksnc_sock);
1681         LASSERT(!conn->ksnc_route);
1682         LASSERT(!conn->ksnc_tx_scheduled);
1683         LASSERT(!conn->ksnc_rx_scheduled);
1684         LASSERT(list_empty(&conn->ksnc_tx_queue));
1685
1686         /* complete current receive if any */
1687         switch (conn->ksnc_rx_state) {
1688         case SOCKNAL_RX_LNET_PAYLOAD:
1689                 last_rcv = conn->ksnc_rx_deadline -
1690                            cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
1691                 CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %d, left: %d, last alive is %ld secs ago\n",
1692                        libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
1693                        &conn->ksnc_ipaddr, conn->ksnc_port,
1694                        conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
1695                        cfs_duration_sec(cfs_time_sub(cfs_time_current(),
1696                                                      last_rcv)));
1697                 lnet_finalize(conn->ksnc_peer->ksnp_ni,
1698                               conn->ksnc_cookie, -EIO);
1699                 break;
1700         case SOCKNAL_RX_LNET_HEADER:
1701                 if (conn->ksnc_rx_started)
1702                         CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
1703                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1704                                &conn->ksnc_ipaddr, conn->ksnc_port,
1705                                conn->ksnc_proto->pro_version);
1706                 break;
1707         case SOCKNAL_RX_KSM_HEADER:
1708                 if (conn->ksnc_rx_started)
1709                         CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
1710                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1711                                &conn->ksnc_ipaddr, conn->ksnc_port,
1712                                conn->ksnc_proto->pro_version);
1713                 break;
1714         case SOCKNAL_RX_SLOP:
1715                 if (conn->ksnc_rx_started)
1716                         CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n",
1717                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1718                                &conn->ksnc_ipaddr, conn->ksnc_port);
1719                break;
1720         default:
1721                 LBUG();
1722                 break;
1723         }
1724
1725         ksocknal_peer_decref(conn->ksnc_peer);
1726
1727         LIBCFS_FREE(conn, sizeof(*conn));
1728 }
1729
1730 int
1731 ksocknal_close_peer_conns_locked(struct ksock_peer *peer, __u32 ipaddr, int why)
1732 {
1733         struct ksock_conn *conn;
1734         struct list_head *ctmp;
1735         struct list_head *cnxt;
1736         int count = 0;
1737
1738         list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
1739                 conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
1740
1741                 if (!ipaddr || conn->ksnc_ipaddr == ipaddr) {
1742                         count++;
1743                         ksocknal_close_conn_locked(conn, why);
1744                 }
1745         }
1746
1747         return count;
1748 }
1749
1750 int
1751 ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why)
1752 {
1753         struct ksock_peer *peer = conn->ksnc_peer;
1754         __u32 ipaddr = conn->ksnc_ipaddr;
1755         int count;
1756
1757         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1758
1759         count = ksocknal_close_peer_conns_locked(peer, ipaddr, why);
1760
1761         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1762
1763         return count;
1764 }
1765
1766 int
1767 ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr)
1768 {
1769         struct ksock_peer *peer;
1770         struct list_head *ptmp;
1771         struct list_head *pnxt;
1772         int lo;
1773         int hi;
1774         int i;
1775         int count = 0;
1776
1777         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1778
1779         if (id.nid != LNET_NID_ANY) {
1780                 lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
1781                 hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
1782         } else {
1783                 lo = 0;
1784                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
1785         }
1786
1787         for (i = lo; i <= hi; i++) {
1788                 list_for_each_safe(ptmp, pnxt,
1789                                    &ksocknal_data.ksnd_peers[i]) {
1790                         peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
1791
1792                         if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
1793                               (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
1794                                 continue;
1795
1796                         count += ksocknal_close_peer_conns_locked(peer, ipaddr,
1797                                                                   0);
1798                 }
1799         }
1800
1801         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1802
1803         /* wildcards always succeed */
1804         if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr)
1805                 return 0;
1806
1807         if (!count)
1808                 return -ENOENT;
1809         else
1810                 return 0;
1811 }
1812
1813 void
1814 ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive)
1815 {
1816         /*
1817          * The router is telling me she's been notified of a change in
1818          * gateway state....
1819          */
1820         struct lnet_process_id id = {0};
1821
1822         id.nid = gw_nid;
1823         id.pid = LNET_PID_ANY;
1824
1825         CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
1826                alive ? "up" : "down");
1827
1828         if (!alive) {
1829                 /* If the gateway crashed, close all open connections... */
1830                 ksocknal_close_matching_conns(id, 0);
1831                 return;
1832         }
1833
1834         /*
1835          * ...otherwise do nothing.  We can only establish new connections
1836          * if we have autroutes, and these connect on demand.
1837          */
1838 }
1839
1840 void
1841 ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when)
1842 {
1843         int connect = 1;
1844         unsigned long last_alive = 0;
1845         unsigned long now = cfs_time_current();
1846         struct ksock_peer *peer = NULL;
1847         rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
1848         struct lnet_process_id id = {
1849                 .nid = nid,
1850                 .pid = LNET_PID_LUSTRE,
1851         };
1852
1853         read_lock(glock);
1854
1855         peer = ksocknal_find_peer_locked(ni, id);
1856         if (peer) {
1857                 struct list_head *tmp;
1858                 struct ksock_conn *conn;
1859                 int bufnob;
1860
1861                 list_for_each(tmp, &peer->ksnp_conns) {
1862                         conn = list_entry(tmp, struct ksock_conn, ksnc_list);
1863                         bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
1864
1865                         if (bufnob < conn->ksnc_tx_bufnob) {
1866                                 /* something got ACKed */
1867                                 conn->ksnc_tx_deadline =
1868                                         cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1869                                 peer->ksnp_last_alive = now;
1870                                 conn->ksnc_tx_bufnob = bufnob;
1871                         }
1872                 }
1873
1874                 last_alive = peer->ksnp_last_alive;
1875                 if (!ksocknal_find_connectable_route_locked(peer))
1876                         connect = 0;
1877         }
1878
1879         read_unlock(glock);
1880
1881         if (last_alive)
1882                 *when = last_alive;
1883
1884         CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
1885                libcfs_nid2str(nid), peer,
1886                last_alive ? cfs_duration_sec(now - last_alive) : -1,
1887                connect);
1888
1889         if (!connect)
1890                 return;
1891
1892         ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
1893
1894         write_lock_bh(glock);
1895
1896         peer = ksocknal_find_peer_locked(ni, id);
1897         if (peer)
1898                 ksocknal_launch_all_connections_locked(peer);
1899
1900         write_unlock_bh(glock);
1901 }
1902
1903 static void
1904 ksocknal_push_peer(struct ksock_peer *peer)
1905 {
1906         int index;
1907         int i;
1908         struct list_head *tmp;
1909         struct ksock_conn *conn;
1910
1911         for (index = 0; ; index++) {
1912                 read_lock(&ksocknal_data.ksnd_global_lock);
1913
1914                 i = 0;
1915                 conn = NULL;
1916
1917                 list_for_each(tmp, &peer->ksnp_conns) {
1918                         if (i++ == index) {
1919                                 conn = list_entry(tmp, struct ksock_conn,
1920                                                   ksnc_list);
1921                                 ksocknal_conn_addref(conn);
1922                                 break;
1923                         }
1924                 }
1925
1926                 read_unlock(&ksocknal_data.ksnd_global_lock);
1927
1928                 if (!conn)
1929                         break;
1930
1931                 ksocknal_lib_push_conn(conn);
1932                 ksocknal_conn_decref(conn);
1933         }
1934 }
1935
1936 static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
1937 {
1938         struct list_head *start;
1939         struct list_head *end;
1940         struct list_head *tmp;
1941         int rc = -ENOENT;
1942         unsigned int hsize = ksocknal_data.ksnd_peer_hash_size;
1943
1944         if (id.nid == LNET_NID_ANY) {
1945                 start = &ksocknal_data.ksnd_peers[0];
1946                 end = &ksocknal_data.ksnd_peers[hsize - 1];
1947         } else {
1948                 start = ksocknal_nid2peerlist(id.nid);
1949                 end = ksocknal_nid2peerlist(id.nid);
1950         }
1951
1952         for (tmp = start; tmp <= end; tmp++) {
1953                 int peer_off; /* searching offset in peer hash table */
1954
1955                 for (peer_off = 0; ; peer_off++) {
1956                         struct ksock_peer *peer;
1957                         int i = 0;
1958
1959                         read_lock(&ksocknal_data.ksnd_global_lock);
1960                         list_for_each_entry(peer, tmp, ksnp_list) {
1961                                 if (!((id.nid == LNET_NID_ANY ||
1962                                        id.nid == peer->ksnp_id.nid) &&
1963                                       (id.pid == LNET_PID_ANY ||
1964                                        id.pid == peer->ksnp_id.pid)))
1965                                         continue;
1966
1967                                 if (i++ == peer_off) {
1968                                         ksocknal_peer_addref(peer);
1969                                         break;
1970                                 }
1971                         }
1972                         read_unlock(&ksocknal_data.ksnd_global_lock);
1973
1974                         if (!i) /* no match */
1975                                 break;
1976
1977                         rc = 0;
1978                         ksocknal_push_peer(peer);
1979                         ksocknal_peer_decref(peer);
1980                 }
1981         }
1982         return rc;
1983 }
1984
1985 static int
1986 ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask)
1987 {
1988         struct ksock_net *net = ni->ni_data;
1989         struct ksock_interface *iface;
1990         int rc;
1991         int i;
1992         int j;
1993         struct list_head *ptmp;
1994         struct ksock_peer *peer;
1995         struct list_head *rtmp;
1996         struct ksock_route *route;
1997
1998         if (!ipaddress || !netmask)
1999                 return -EINVAL;
2000
2001         write_lock_bh(&ksocknal_data.ksnd_global_lock);
2002
2003         iface = ksocknal_ip2iface(ni, ipaddress);
2004         if (iface) {
2005                 /* silently ignore dups */
2006                 rc = 0;
2007         } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
2008                 rc = -ENOSPC;
2009         } else {
2010                 iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
2011
2012                 iface->ksni_ipaddr = ipaddress;
2013                 iface->ksni_netmask = netmask;
2014                 iface->ksni_nroutes = 0;
2015                 iface->ksni_npeers = 0;
2016
2017                 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2018                         list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
2019                                 peer = list_entry(ptmp, struct ksock_peer,
2020                                                   ksnp_list);
2021
2022                                 for (j = 0; j < peer->ksnp_n_passive_ips; j++)
2023                                         if (peer->ksnp_passive_ips[j] == ipaddress)
2024                                                 iface->ksni_npeers++;
2025
2026                                 list_for_each(rtmp, &peer->ksnp_routes) {
2027                                         route = list_entry(rtmp, struct ksock_route,
2028                                                            ksnr_list);
2029
2030                                         if (route->ksnr_myipaddr == ipaddress)
2031                                                 iface->ksni_nroutes++;
2032                                 }
2033                         }
2034                 }
2035
2036                 rc = 0;
2037                 /*
2038                  * NB only new connections will pay attention to the
2039                  * new interface!
2040                  */
2041         }
2042
2043         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
2044
2045         return rc;
2046 }
2047
2048 static void
2049 ksocknal_peer_del_interface_locked(struct ksock_peer *peer, __u32 ipaddr)
2050 {
2051         struct list_head *tmp;
2052         struct list_head *nxt;
2053         struct ksock_route *route;
2054         struct ksock_conn *conn;
2055         int i;
2056         int j;
2057
2058         for (i = 0; i < peer->ksnp_n_passive_ips; i++)
2059                 if (peer->ksnp_passive_ips[i] == ipaddr) {
2060                         for (j = i + 1; j < peer->ksnp_n_passive_ips; j++)
2061                                 peer->ksnp_passive_ips[j - 1] =
2062                                         peer->ksnp_passive_ips[j];
2063                         peer->ksnp_n_passive_ips--;
2064                         break;
2065                 }
2066
2067         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
2068                 route = list_entry(tmp, struct ksock_route, ksnr_list);
2069
2070                 if (route->ksnr_myipaddr != ipaddr)
2071                         continue;
2072
2073                 if (route->ksnr_share_count) {
2074                         /* Manually created; keep, but unbind */
2075                         route->ksnr_myipaddr = 0;
2076                 } else {
2077                         ksocknal_del_route_locked(route);
2078                 }
2079         }
2080
2081         list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
2082                 conn = list_entry(tmp, struct ksock_conn, ksnc_list);
2083
2084                 if (conn->ksnc_myipaddr == ipaddr)
2085                         ksocknal_close_conn_locked(conn, 0);
2086         }
2087 }
2088
2089 static int
2090 ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress)
2091 {
2092         struct ksock_net *net = ni->ni_data;
2093         int rc = -ENOENT;
2094         struct list_head *tmp;
2095         struct list_head *nxt;
2096         struct ksock_peer *peer;
2097         __u32 this_ip;
2098         int i;
2099         int j;
2100
2101         write_lock_bh(&ksocknal_data.ksnd_global_lock);
2102
2103         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2104                 this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
2105
2106                 if (!(!ipaddress || ipaddress == this_ip))
2107                         continue;
2108
2109                 rc = 0;
2110
2111                 for (j = i + 1; j < net->ksnn_ninterfaces; j++)
2112                         net->ksnn_interfaces[j - 1] =
2113                                 net->ksnn_interfaces[j];
2114
2115                 net->ksnn_ninterfaces--;
2116
2117                 for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
2118                         list_for_each_safe(tmp, nxt,
2119                                            &ksocknal_data.ksnd_peers[j]) {
2120                                 peer = list_entry(tmp, struct ksock_peer, ksnp_list);
2121
2122                                 if (peer->ksnp_ni != ni)
2123                                         continue;
2124
2125                                 ksocknal_peer_del_interface_locked(peer, this_ip);
2126                         }
2127                 }
2128         }
2129
2130         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
2131
2132         return rc;
2133 }
2134
2135 int
2136 ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
2137 {
2138         struct lnet_process_id id = {0};
2139         struct libcfs_ioctl_data *data = arg;
2140         int rc;
2141
2142         switch (cmd) {
2143         case IOC_LIBCFS_GET_INTERFACE: {
2144                 struct ksock_net       *net = ni->ni_data;
2145                 struct ksock_interface *iface;
2146
2147                 read_lock(&ksocknal_data.ksnd_global_lock);
2148
2149                 if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
2150                         rc = -ENOENT;
2151                 } else {
2152                         rc = 0;
2153                         iface = &net->ksnn_interfaces[data->ioc_count];
2154
2155                         data->ioc_u32[0] = iface->ksni_ipaddr;
2156                         data->ioc_u32[1] = iface->ksni_netmask;
2157                         data->ioc_u32[2] = iface->ksni_npeers;
2158                         data->ioc_u32[3] = iface->ksni_nroutes;
2159                 }
2160
2161                 read_unlock(&ksocknal_data.ksnd_global_lock);
2162                 return rc;
2163         }
2164
2165         case IOC_LIBCFS_ADD_INTERFACE:
2166                 return ksocknal_add_interface(ni,
2167                                               data->ioc_u32[0], /* IP address */
2168                                               data->ioc_u32[1]); /* net mask */
2169
2170         case IOC_LIBCFS_DEL_INTERFACE:
2171                 return ksocknal_del_interface(ni,
2172                                               data->ioc_u32[0]); /* IP address */
2173
2174         case IOC_LIBCFS_GET_PEER: {
2175                 __u32 myip = 0;
2176                 __u32 ip = 0;
2177                 int port = 0;
2178                 int conn_count = 0;
2179                 int share_count = 0;
2180
2181                 rc = ksocknal_get_peer_info(ni, data->ioc_count,
2182                                             &id, &myip, &ip, &port,
2183                                             &conn_count,  &share_count);
2184                 if (rc)
2185                         return rc;
2186
2187                 data->ioc_nid    = id.nid;
2188                 data->ioc_count  = share_count;
2189                 data->ioc_u32[0] = ip;
2190                 data->ioc_u32[1] = port;
2191                 data->ioc_u32[2] = myip;
2192                 data->ioc_u32[3] = conn_count;
2193                 data->ioc_u32[4] = id.pid;
2194                 return 0;
2195         }
2196
2197         case IOC_LIBCFS_ADD_PEER:
2198                 id.nid = data->ioc_nid;
2199                 id.pid = LNET_PID_LUSTRE;
2200                 return ksocknal_add_peer(ni, id,
2201                                           data->ioc_u32[0], /* IP */
2202                                           data->ioc_u32[1]); /* port */
2203
2204         case IOC_LIBCFS_DEL_PEER:
2205                 id.nid = data->ioc_nid;
2206                 id.pid = LNET_PID_ANY;
2207                 return ksocknal_del_peer(ni, id,
2208                                           data->ioc_u32[0]); /* IP */
2209
2210         case IOC_LIBCFS_GET_CONN: {
2211                 int txmem;
2212                 int rxmem;
2213                 int nagle;
2214                 struct ksock_conn *conn;
2215
2216                 conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
2217                 if (!conn)
2218                         return -ENOENT;
2219
2220                 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
2221
2222                 data->ioc_count  = txmem;
2223                 data->ioc_nid    = conn->ksnc_peer->ksnp_id.nid;
2224                 data->ioc_flags  = nagle;
2225                 data->ioc_u32[0] = conn->ksnc_ipaddr;
2226                 data->ioc_u32[1] = conn->ksnc_port;
2227                 data->ioc_u32[2] = conn->ksnc_myipaddr;
2228                 data->ioc_u32[3] = conn->ksnc_type;
2229                 data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
2230                 data->ioc_u32[5] = rxmem;
2231                 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
2232                 ksocknal_conn_decref(conn);
2233                 return 0;
2234         }
2235
2236         case IOC_LIBCFS_CLOSE_CONNECTION:
2237                 id.nid = data->ioc_nid;
2238                 id.pid = LNET_PID_ANY;
2239                 return ksocknal_close_matching_conns(id,
2240                                                       data->ioc_u32[0]);
2241
2242         case IOC_LIBCFS_REGISTER_MYNID:
2243                 /* Ignore if this is a noop */
2244                 if (data->ioc_nid == ni->ni_nid)
2245                         return 0;
2246
2247                 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
2248                        libcfs_nid2str(data->ioc_nid),
2249                        libcfs_nid2str(ni->ni_nid));
2250                 return -EINVAL;
2251
2252         case IOC_LIBCFS_PUSH_CONNECTION:
2253                 id.nid = data->ioc_nid;
2254                 id.pid = LNET_PID_ANY;
2255                 return ksocknal_push(ni, id);
2256
2257         default:
2258                 return -EINVAL;
2259         }
2260         /* not reached */
2261 }
2262
2263 static void
2264 ksocknal_free_buffers(void)
2265 {
2266         LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs));
2267
2268         if (ksocknal_data.ksnd_sched_info) {
2269                 struct ksock_sched_info *info;
2270                 int i;
2271
2272                 cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
2273                         if (info->ksi_scheds) {
2274                                 LIBCFS_FREE(info->ksi_scheds,
2275                                             info->ksi_nthreads_max *
2276                                             sizeof(info->ksi_scheds[0]));
2277                         }
2278                 }
2279                 cfs_percpt_free(ksocknal_data.ksnd_sched_info);
2280         }
2281
2282         LIBCFS_FREE(ksocknal_data.ksnd_peers,
2283                     sizeof(struct list_head) *
2284                     ksocknal_data.ksnd_peer_hash_size);
2285
2286         spin_lock(&ksocknal_data.ksnd_tx_lock);
2287
2288         if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
2289                 struct list_head zlist;
2290                 struct ksock_tx *tx;
2291                 struct ksock_tx *temp;
2292
2293                 list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
2294                 list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
2295                 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2296
2297                 list_for_each_entry_safe(tx, temp, &zlist, tx_list) {
2298                         list_del(&tx->tx_list);
2299                         LIBCFS_FREE(tx, tx->tx_desc_size);
2300                 }
2301         } else {
2302                 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2303         }
2304 }
2305
2306 static void
2307 ksocknal_base_shutdown(void)
2308 {
2309         struct ksock_sched_info *info;
2310         struct ksock_sched *sched;
2311         int i;
2312         int j;
2313
2314         LASSERT(!ksocknal_data.ksnd_nnets);
2315
2316         switch (ksocknal_data.ksnd_init) {
2317         default:
2318                 LASSERT(0);
2319
2320         case SOCKNAL_INIT_ALL:
2321         case SOCKNAL_INIT_DATA:
2322                 LASSERT(ksocknal_data.ksnd_peers);
2323                 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
2324                         LASSERT(list_empty(&ksocknal_data.ksnd_peers[i]));
2325
2326                 LASSERT(list_empty(&ksocknal_data.ksnd_nets));
2327                 LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
2328                 LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
2329                 LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
2330                 LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
2331
2332                 if (ksocknal_data.ksnd_sched_info) {
2333                         cfs_percpt_for_each(info, i,
2334                                             ksocknal_data.ksnd_sched_info) {
2335                                 if (!info->ksi_scheds)
2336                                         continue;
2337
2338                                 for (j = 0; j < info->ksi_nthreads_max; j++) {
2339                                         sched = &info->ksi_scheds[j];
2340                                         LASSERT(list_empty(
2341                                                 &sched->kss_tx_conns));
2342                                         LASSERT(list_empty(
2343                                                 &sched->kss_rx_conns));
2344                                         LASSERT(list_empty(
2345                                                 &sched->kss_zombie_noop_txs));
2346                                         LASSERT(!sched->kss_nconns);
2347                                 }
2348                         }
2349                 }
2350
2351                 /* flag threads to terminate; wake and wait for them to die */
2352                 ksocknal_data.ksnd_shuttingdown = 1;
2353                 wake_up_all(&ksocknal_data.ksnd_connd_waitq);
2354                 wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
2355
2356                 if (ksocknal_data.ksnd_sched_info) {
2357                         cfs_percpt_for_each(info, i,
2358                                             ksocknal_data.ksnd_sched_info) {
2359                                 if (!info->ksi_scheds)
2360                                         continue;
2361
2362                                 for (j = 0; j < info->ksi_nthreads_max; j++) {
2363                                         sched = &info->ksi_scheds[j];
2364                                         wake_up_all(&sched->kss_waitq);
2365                                 }
2366                         }
2367                 }
2368
2369                 i = 4;
2370                 read_lock(&ksocknal_data.ksnd_global_lock);
2371                 while (ksocknal_data.ksnd_nthreads) {
2372                         i++;
2373                         CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2374                                "waiting for %d threads to terminate\n",
2375                                 ksocknal_data.ksnd_nthreads);
2376                         read_unlock(&ksocknal_data.ksnd_global_lock);
2377                         set_current_state(TASK_UNINTERRUPTIBLE);
2378                         schedule_timeout(cfs_time_seconds(1));
2379                         read_lock(&ksocknal_data.ksnd_global_lock);
2380                 }
2381                 read_unlock(&ksocknal_data.ksnd_global_lock);
2382
2383                 ksocknal_free_buffers();
2384
2385                 ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
2386                 break;
2387         }
2388
2389         module_put(THIS_MODULE);
2390 }
2391
2392 static __u64
2393 ksocknal_new_incarnation(void)
2394 {
2395         /* The incarnation number is the time this module loaded and it
2396          * identifies this particular instance of the socknal.
2397          */
2398         return ktime_get_ns();
2399 }
2400
2401 static int
2402 ksocknal_base_startup(void)
2403 {
2404         struct ksock_sched_info *info;
2405         int rc;
2406         int i;
2407
2408         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
2409         LASSERT(!ksocknal_data.ksnd_nnets);
2410
2411         memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */
2412
2413         ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
2414         LIBCFS_ALLOC(ksocknal_data.ksnd_peers,
2415                      sizeof(struct list_head) *
2416                      ksocknal_data.ksnd_peer_hash_size);
2417         if (!ksocknal_data.ksnd_peers)
2418                 return -ENOMEM;
2419
2420         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
2421                 INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
2422
2423         rwlock_init(&ksocknal_data.ksnd_global_lock);
2424         INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
2425
2426         spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
2427         INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
2428         INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
2429         INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
2430         init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
2431
2432         spin_lock_init(&ksocknal_data.ksnd_connd_lock);
2433         INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
2434         INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
2435         init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
2436
2437         spin_lock_init(&ksocknal_data.ksnd_tx_lock);
2438         INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
2439
2440         /* NB memset above zeros whole of ksocknal_data */
2441
2442         /* flag lists/ptrs/locks initialised */
2443         ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
2444         try_module_get(THIS_MODULE);
2445
2446         ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
2447                                                          sizeof(*info));
2448         if (!ksocknal_data.ksnd_sched_info)
2449                 goto failed;
2450
2451         cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
2452                 struct ksock_sched *sched;
2453                 int nthrs;
2454
2455                 nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
2456                 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2457                         nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
2458                 } else {
2459                         /*
2460                          * max to half of CPUs, assume another half should be
2461                          * reserved for upper layer modules
2462                          */
2463                         nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2464                 }
2465
2466                 info->ksi_nthreads_max = nthrs;
2467                 info->ksi_cpt = i;
2468
2469                 LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i,
2470                                  info->ksi_nthreads_max * sizeof(*sched));
2471                 if (!info->ksi_scheds)
2472                         goto failed;
2473
2474                 for (; nthrs > 0; nthrs--) {
2475                         sched = &info->ksi_scheds[nthrs - 1];
2476
2477                         sched->kss_info = info;
2478                         spin_lock_init(&sched->kss_lock);
2479                         INIT_LIST_HEAD(&sched->kss_rx_conns);
2480                         INIT_LIST_HEAD(&sched->kss_tx_conns);
2481                         INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
2482                         init_waitqueue_head(&sched->kss_waitq);
2483                 }
2484         }
2485
2486         ksocknal_data.ksnd_connd_starting       = 0;
2487         ksocknal_data.ksnd_connd_failed_stamp   = 0;
2488         ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds();
2489         /*
2490          * must have at least 2 connds to remain responsive to accepts while
2491          * connecting
2492          */
2493         if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
2494                 *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
2495
2496         if (*ksocknal_tunables.ksnd_nconnds_max <
2497             *ksocknal_tunables.ksnd_nconnds) {
2498                 ksocknal_tunables.ksnd_nconnds_max =
2499                         ksocknal_tunables.ksnd_nconnds;
2500         }
2501
2502         for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
2503                 char name[16];
2504
2505                 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2506                 ksocknal_data.ksnd_connd_starting++;
2507                 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2508
2509                 snprintf(name, sizeof(name), "socknal_cd%02d", i);
2510                 rc = ksocknal_thread_start(ksocknal_connd,
2511                                            (void *)((uintptr_t)i), name);
2512                 if (rc) {
2513                         spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2514                         ksocknal_data.ksnd_connd_starting--;
2515                         spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2516                         CERROR("Can't spawn socknal connd: %d\n", rc);
2517                         goto failed;
2518                 }
2519         }
2520
2521         rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
2522         if (rc) {
2523                 CERROR("Can't spawn socknal reaper: %d\n", rc);
2524                 goto failed;
2525         }
2526
2527         /* flag everything initialised */
2528         ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
2529
2530         return 0;
2531
2532  failed:
2533         ksocknal_base_shutdown();
2534         return -ENETDOWN;
2535 }
2536
2537 static void
2538 ksocknal_debug_peerhash(struct lnet_ni *ni)
2539 {
2540         struct ksock_peer *peer = NULL;
2541         struct list_head *tmp;
2542         int i;
2543
2544         read_lock(&ksocknal_data.ksnd_global_lock);
2545
2546         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2547                 list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
2548                         peer = list_entry(tmp, struct ksock_peer, ksnp_list);
2549
2550                         if (peer->ksnp_ni == ni)
2551                                 break;
2552
2553                         peer = NULL;
2554                 }
2555         }
2556
2557         if (peer) {
2558                 struct ksock_route *route;
2559                 struct ksock_conn  *conn;
2560
2561                 CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n",
2562                       libcfs_id2str(peer->ksnp_id),
2563                       atomic_read(&peer->ksnp_refcount),
2564                       peer->ksnp_sharecount, peer->ksnp_closing,
2565                       peer->ksnp_accepting, peer->ksnp_error,
2566                       peer->ksnp_zc_next_cookie,
2567                       !list_empty(&peer->ksnp_tx_queue),
2568                       !list_empty(&peer->ksnp_zc_req_list));
2569
2570                 list_for_each(tmp, &peer->ksnp_routes) {
2571                         route = list_entry(tmp, struct ksock_route, ksnr_list);
2572                         CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
2573                               atomic_read(&route->ksnr_refcount),
2574                               route->ksnr_scheduled, route->ksnr_connecting,
2575                               route->ksnr_connected, route->ksnr_deleted);
2576                 }
2577
2578                 list_for_each(tmp, &peer->ksnp_conns) {
2579                         conn = list_entry(tmp, struct ksock_conn, ksnc_list);
2580                         CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
2581                               atomic_read(&conn->ksnc_conn_refcount),
2582                               atomic_read(&conn->ksnc_sock_refcount),
2583                               conn->ksnc_type, conn->ksnc_closing);
2584                 }
2585         }
2586
2587         read_unlock(&ksocknal_data.ksnd_global_lock);
2588 }
2589
2590 void
2591 ksocknal_shutdown(struct lnet_ni *ni)
2592 {
2593         struct ksock_net *net = ni->ni_data;
2594         int i;
2595         struct lnet_process_id anyid = {0};
2596
2597         anyid.nid = LNET_NID_ANY;
2598         anyid.pid = LNET_PID_ANY;
2599
2600         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
2601         LASSERT(ksocknal_data.ksnd_nnets > 0);
2602
2603         spin_lock_bh(&net->ksnn_lock);
2604         net->ksnn_shutdown = 1;          /* prevent new peers */
2605         spin_unlock_bh(&net->ksnn_lock);
2606
2607         /* Delete all peers */
2608         ksocknal_del_peer(ni, anyid, 0);
2609
2610         /* Wait for all peer state to clean up */
2611         i = 2;
2612         spin_lock_bh(&net->ksnn_lock);
2613         while (net->ksnn_npeers) {
2614                 spin_unlock_bh(&net->ksnn_lock);
2615
2616                 i++;
2617                 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2618                        "waiting for %d peers to disconnect\n",
2619                        net->ksnn_npeers);
2620                 set_current_state(TASK_UNINTERRUPTIBLE);
2621                 schedule_timeout(cfs_time_seconds(1));
2622
2623                 ksocknal_debug_peerhash(ni);
2624
2625                 spin_lock_bh(&net->ksnn_lock);
2626         }
2627         spin_unlock_bh(&net->ksnn_lock);
2628
2629         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2630                 LASSERT(!net->ksnn_interfaces[i].ksni_npeers);
2631                 LASSERT(!net->ksnn_interfaces[i].ksni_nroutes);
2632         }
2633
2634         list_del(&net->ksnn_list);
2635         LIBCFS_FREE(net, sizeof(*net));
2636
2637         ksocknal_data.ksnd_nnets--;
2638         if (!ksocknal_data.ksnd_nnets)
2639                 ksocknal_base_shutdown();
2640 }
2641
2642 static int
2643 ksocknal_enumerate_interfaces(struct ksock_net *net)
2644 {
2645         char **names;
2646         int i;
2647         int j;
2648         int rc;
2649         int n;
2650
2651         n = lnet_ipif_enumerate(&names);
2652         if (n <= 0) {
2653                 CERROR("Can't enumerate interfaces: %d\n", n);
2654                 return n;
2655         }
2656
2657         for (i = j = 0; i < n; i++) {
2658                 int up;
2659                 __u32 ip;
2660                 __u32 mask;
2661
2662                 if (!strcmp(names[i], "lo")) /* skip the loopback IF */
2663                         continue;
2664
2665                 rc = lnet_ipif_query(names[i], &up, &ip, &mask);
2666                 if (rc) {
2667                         CWARN("Can't get interface %s info: %d\n",
2668                               names[i], rc);
2669                         continue;
2670                 }
2671
2672                 if (!up) {
2673                         CWARN("Ignoring interface %s (down)\n",
2674                               names[i]);
2675                         continue;
2676                 }
2677
2678                 if (j == LNET_MAX_INTERFACES) {
2679                         CWARN("Ignoring interface %s (too many interfaces)\n",
2680                               names[i]);
2681                         continue;
2682                 }
2683
2684                 net->ksnn_interfaces[j].ksni_ipaddr = ip;
2685                 net->ksnn_interfaces[j].ksni_netmask = mask;
2686                 strlcpy(net->ksnn_interfaces[j].ksni_name,
2687                         names[i], sizeof(net->ksnn_interfaces[j].ksni_name));
2688                 j++;
2689         }
2690
2691         lnet_ipif_free_enumeration(names, n);
2692
2693         if (!j)
2694                 CERROR("Can't find any usable interfaces\n");
2695
2696         return j;
2697 }
2698
2699 static int
2700 ksocknal_search_new_ipif(struct ksock_net *net)
2701 {
2702         int new_ipif = 0;
2703         int i;
2704
2705         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2706                 char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
2707                 char *colon = strchr(ifnam, ':');
2708                 int found  = 0;
2709                 struct ksock_net *tmp;
2710                 int j;
2711
2712                 if (colon) /* ignore alias device */
2713                         *colon = 0;
2714
2715                 list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) {
2716                         for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
2717                                 char *ifnam2 =
2718                                         &tmp->ksnn_interfaces[j].ksni_name[0];
2719                                 char *colon2 = strchr(ifnam2, ':');
2720
2721                                 if (colon2)
2722                                         *colon2 = 0;
2723
2724                                 found = !strcmp(ifnam, ifnam2);
2725                                 if (colon2)
2726                                         *colon2 = ':';
2727                         }
2728                         if (found)
2729                                 break;
2730                 }
2731
2732                 new_ipif += !found;
2733                 if (colon)
2734                         *colon = ':';
2735         }
2736
2737         return new_ipif;
2738 }
2739
2740 static int
2741 ksocknal_start_schedulers(struct ksock_sched_info *info)
2742 {
2743         int nthrs;
2744         int rc = 0;
2745         int i;
2746
2747         if (!info->ksi_nthreads) {
2748                 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2749                         nthrs = info->ksi_nthreads_max;
2750                 } else {
2751                         nthrs = cfs_cpt_weight(lnet_cpt_table(),
2752                                                info->ksi_cpt);
2753                         nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2754                         nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
2755                 }
2756                 nthrs = min(nthrs, info->ksi_nthreads_max);
2757         } else {
2758                 LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
2759                 /* increase two threads if there is new interface */
2760                 nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
2761         }
2762
2763         for (i = 0; i < nthrs; i++) {
2764                 long id;
2765                 char name[20];
2766                 struct ksock_sched *sched;
2767
2768                 id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
2769                 sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
2770                 snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
2771                          info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
2772
2773                 rc = ksocknal_thread_start(ksocknal_scheduler,
2774                                            (void *)id, name);
2775                 if (!rc)
2776                         continue;
2777
2778                 CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
2779                        info->ksi_cpt, info->ksi_nthreads + i, rc);
2780                 break;
2781         }
2782
2783         info->ksi_nthreads += i;
2784         return rc;
2785 }
2786
2787 static int
2788 ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts)
2789 {
2790         int newif = ksocknal_search_new_ipif(net);
2791         int rc;
2792         int i;
2793
2794         LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
2795
2796         for (i = 0; i < ncpts; i++) {
2797                 struct ksock_sched_info *info;
2798                 int cpt = !cpts ? i : cpts[i];
2799
2800                 LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
2801                 info = ksocknal_data.ksnd_sched_info[cpt];
2802
2803                 if (!newif && info->ksi_nthreads > 0)
2804                         continue;
2805
2806                 rc = ksocknal_start_schedulers(info);
2807                 if (rc)
2808                         return rc;
2809         }
2810         return 0;
2811 }
2812
2813 int
2814 ksocknal_startup(struct lnet_ni *ni)
2815 {
2816         struct ksock_net *net;
2817         int rc;
2818         int i;
2819
2820         LASSERT(ni->ni_lnd == &the_ksocklnd);
2821
2822         if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
2823                 rc = ksocknal_base_startup();
2824                 if (rc)
2825                         return rc;
2826         }
2827
2828         LIBCFS_ALLOC(net, sizeof(*net));
2829         if (!net)
2830                 goto fail_0;
2831
2832         spin_lock_init(&net->ksnn_lock);
2833         net->ksnn_incarnation = ksocknal_new_incarnation();
2834         ni->ni_data = net;
2835         ni->ni_peertimeout    = *ksocknal_tunables.ksnd_peertimeout;
2836         ni->ni_maxtxcredits   = *ksocknal_tunables.ksnd_credits;
2837         ni->ni_peertxcredits  = *ksocknal_tunables.ksnd_peertxcredits;
2838         ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
2839
2840         if (!ni->ni_interfaces[0]) {
2841                 rc = ksocknal_enumerate_interfaces(net);
2842                 if (rc <= 0)
2843                         goto fail_1;
2844
2845                 net->ksnn_ninterfaces = 1;
2846         } else {
2847                 for (i = 0; i < LNET_MAX_INTERFACES; i++) {
2848                         int up;
2849
2850                         if (!ni->ni_interfaces[i])
2851                                 break;
2852
2853                         rc = lnet_ipif_query(ni->ni_interfaces[i], &up,
2854                                              &net->ksnn_interfaces[i].ksni_ipaddr,
2855                                              &net->ksnn_interfaces[i].ksni_netmask);
2856
2857                         if (rc) {
2858                                 CERROR("Can't get interface %s info: %d\n",
2859                                        ni->ni_interfaces[i], rc);
2860                                 goto fail_1;
2861                         }
2862
2863                         if (!up) {
2864                                 CERROR("Interface %s is down\n",
2865                                        ni->ni_interfaces[i]);
2866                                 goto fail_1;
2867                         }
2868
2869                         strlcpy(net->ksnn_interfaces[i].ksni_name,
2870                                 ni->ni_interfaces[i],
2871                                 sizeof(net->ksnn_interfaces[i].ksni_name));
2872                 }
2873                 net->ksnn_ninterfaces = i;
2874         }
2875
2876         /* call it before add it to ksocknal_data.ksnd_nets */
2877         rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
2878         if (rc)
2879                 goto fail_1;
2880
2881         ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
2882                                 net->ksnn_interfaces[0].ksni_ipaddr);
2883         list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
2884
2885         ksocknal_data.ksnd_nnets++;
2886
2887         return 0;
2888
2889  fail_1:
2890         LIBCFS_FREE(net, sizeof(*net));
2891  fail_0:
2892         if (!ksocknal_data.ksnd_nnets)
2893                 ksocknal_base_shutdown();
2894
2895         return -ENETDOWN;
2896 }
2897
2898 static void __exit ksocklnd_exit(void)
2899 {
2900         lnet_unregister_lnd(&the_ksocklnd);
2901 }
2902
2903 static int __init ksocklnd_init(void)
2904 {
2905         int rc;
2906
2907         /* check ksnr_connected/connecting field large enough */
2908         BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4);
2909         BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN);
2910
2911         /* initialize the_ksocklnd */
2912         the_ksocklnd.lnd_type     = SOCKLND;
2913         the_ksocklnd.lnd_startup  = ksocknal_startup;
2914         the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
2915         the_ksocklnd.lnd_ctl      = ksocknal_ctl;
2916         the_ksocklnd.lnd_send     = ksocknal_send;
2917         the_ksocklnd.lnd_recv     = ksocknal_recv;
2918         the_ksocklnd.lnd_notify   = ksocknal_notify;
2919         the_ksocklnd.lnd_query    = ksocknal_query;
2920         the_ksocklnd.lnd_accept   = ksocknal_accept;
2921
2922         rc = ksocknal_tunables_init();
2923         if (rc)
2924                 return rc;
2925
2926         lnet_register_lnd(&the_ksocklnd);
2927
2928         return 0;
2929 }
2930
2931 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
2932 MODULE_DESCRIPTION("TCP Socket LNet Network Driver");
2933 MODULE_VERSION("2.7.0");
2934 MODULE_LICENSE("GPL");
2935
2936 module_init(ksocklnd_init);
2937 module_exit(ksocklnd_exit);