]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/smc/af_smc.c
x86/cpu: Drop wp_works_ok member of struct cpuinfo_x86
[karo-tx-linux.git] / net / smc / af_smc.c
1 /*
2  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
3  *
4  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
5  *  applies to SOCK_STREAM sockets only
6  *  offers an alternative communication option for TCP-protocol sockets
7  *  applicable with RoCE-cards only
8  *
9  *  Initial restrictions:
10  *    - non-blocking connect postponed
11  *    - IPv6 support postponed
12  *    - support for alternate links postponed
13  *    - partial support for non-blocking sockets only
14  *    - support for urgent data postponed
15  *
16  *  Copyright IBM Corp. 2016
17  *
18  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
19  *              based on prototype from Frank Blaschka
20  */
21
22 #define KMSG_COMPONENT "smc"
23 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24
25 #include <linux/module.h>
26 #include <linux/socket.h>
27 #include <linux/inetdevice.h>
28 #include <linux/workqueue.h>
29 #include <linux/in.h>
30 #include <linux/sched/signal.h>
31
32 #include <net/sock.h>
33 #include <net/tcp.h>
34 #include <net/smc.h>
35
36 #include "smc.h"
37 #include "smc_clc.h"
38 #include "smc_llc.h"
39 #include "smc_cdc.h"
40 #include "smc_core.h"
41 #include "smc_ib.h"
42 #include "smc_pnet.h"
43 #include "smc_tx.h"
44 #include "smc_rx.h"
45 #include "smc_close.h"
46
47 static DEFINE_MUTEX(smc_create_lgr_pending);    /* serialize link group
48                                                  * creation
49                                                  */
50
51 struct smc_lgr_list smc_lgr_list = {            /* established link groups */
52         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
53         .list = LIST_HEAD_INIT(smc_lgr_list.list),
54 };
55
56 static void smc_tcp_listen_work(struct work_struct *);
57
58 static void smc_set_keepalive(struct sock *sk, int val)
59 {
60         struct smc_sock *smc = smc_sk(sk);
61
62         smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
63 }
64
65 static struct smc_hashinfo smc_v4_hashinfo = {
66         .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
67 };
68
69 int smc_hash_sk(struct sock *sk)
70 {
71         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
72         struct hlist_head *head;
73
74         head = &h->ht;
75
76         write_lock_bh(&h->lock);
77         sk_add_node(sk, head);
78         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
79         write_unlock_bh(&h->lock);
80
81         return 0;
82 }
83 EXPORT_SYMBOL_GPL(smc_hash_sk);
84
85 void smc_unhash_sk(struct sock *sk)
86 {
87         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
88
89         write_lock_bh(&h->lock);
90         if (sk_del_node_init(sk))
91                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
92         write_unlock_bh(&h->lock);
93 }
94 EXPORT_SYMBOL_GPL(smc_unhash_sk);
95
96 struct proto smc_proto = {
97         .name           = "SMC",
98         .owner          = THIS_MODULE,
99         .keepalive      = smc_set_keepalive,
100         .hash           = smc_hash_sk,
101         .unhash         = smc_unhash_sk,
102         .obj_size       = sizeof(struct smc_sock),
103         .h.smc_hash     = &smc_v4_hashinfo,
104         .slab_flags     = SLAB_DESTROY_BY_RCU,
105 };
106 EXPORT_SYMBOL_GPL(smc_proto);
107
108 static int smc_release(struct socket *sock)
109 {
110         struct sock *sk = sock->sk;
111         struct smc_sock *smc;
112         int rc = 0;
113
114         if (!sk)
115                 goto out;
116
117         smc = smc_sk(sk);
118         sock_hold(sk);
119         if (sk->sk_state == SMC_LISTEN)
120                 /* smc_close_non_accepted() is called and acquires
121                  * sock lock for child sockets again
122                  */
123                 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
124         else
125                 lock_sock(sk);
126
127         if (smc->use_fallback) {
128                 sk->sk_state = SMC_CLOSED;
129                 sk->sk_state_change(sk);
130         } else {
131                 rc = smc_close_active(smc);
132                 sock_set_flag(sk, SOCK_DEAD);
133                 sk->sk_shutdown |= SHUTDOWN_MASK;
134         }
135         if (smc->clcsock) {
136                 sock_release(smc->clcsock);
137                 smc->clcsock = NULL;
138         }
139
140         /* detach socket */
141         sock_orphan(sk);
142         sock->sk = NULL;
143         if (smc->use_fallback) {
144                 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
145         } else if (sk->sk_state == SMC_CLOSED) {
146                 smc_conn_free(&smc->conn);
147                 schedule_delayed_work(&smc->sock_put_work,
148                                       SMC_CLOSE_SOCK_PUT_DELAY);
149         }
150         sk->sk_prot->unhash(sk);
151         release_sock(sk);
152
153         sock_put(sk);
154 out:
155         return rc;
156 }
157
158 static void smc_destruct(struct sock *sk)
159 {
160         if (sk->sk_state != SMC_CLOSED)
161                 return;
162         if (!sock_flag(sk, SOCK_DEAD))
163                 return;
164
165         sk_refcnt_debug_dec(sk);
166 }
167
168 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
169 {
170         struct smc_sock *smc;
171         struct sock *sk;
172
173         sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
174         if (!sk)
175                 return NULL;
176
177         sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
178         sk->sk_state = SMC_INIT;
179         sk->sk_destruct = smc_destruct;
180         sk->sk_protocol = SMCPROTO_SMC;
181         smc = smc_sk(sk);
182         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
183         INIT_LIST_HEAD(&smc->accept_q);
184         spin_lock_init(&smc->accept_q_lock);
185         INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
186         sk->sk_prot->hash(sk);
187         sk_refcnt_debug_inc(sk);
188
189         return sk;
190 }
191
192 static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
193                     int addr_len)
194 {
195         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
196         struct sock *sk = sock->sk;
197         struct smc_sock *smc;
198         int rc;
199
200         smc = smc_sk(sk);
201
202         /* replicate tests from inet_bind(), to be safe wrt. future changes */
203         rc = -EINVAL;
204         if (addr_len < sizeof(struct sockaddr_in))
205                 goto out;
206
207         rc = -EAFNOSUPPORT;
208         /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
209         if ((addr->sin_family != AF_INET) &&
210             ((addr->sin_family != AF_UNSPEC) ||
211              (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
212                 goto out;
213
214         lock_sock(sk);
215
216         /* Check if socket is already active */
217         rc = -EINVAL;
218         if (sk->sk_state != SMC_INIT)
219                 goto out_rel;
220
221         smc->clcsock->sk->sk_reuse = sk->sk_reuse;
222         rc = kernel_bind(smc->clcsock, uaddr, addr_len);
223
224 out_rel:
225         release_sock(sk);
226 out:
227         return rc;
228 }
229
230 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
231                                    unsigned long mask)
232 {
233         /* options we don't get control via setsockopt for */
234         nsk->sk_type = osk->sk_type;
235         nsk->sk_sndbuf = osk->sk_sndbuf;
236         nsk->sk_rcvbuf = osk->sk_rcvbuf;
237         nsk->sk_sndtimeo = osk->sk_sndtimeo;
238         nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
239         nsk->sk_mark = osk->sk_mark;
240         nsk->sk_priority = osk->sk_priority;
241         nsk->sk_rcvlowat = osk->sk_rcvlowat;
242         nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
243         nsk->sk_err = osk->sk_err;
244
245         nsk->sk_flags &= ~mask;
246         nsk->sk_flags |= osk->sk_flags & mask;
247 }
248
249 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
250                              (1UL << SOCK_KEEPOPEN) | \
251                              (1UL << SOCK_LINGER) | \
252                              (1UL << SOCK_BROADCAST) | \
253                              (1UL << SOCK_TIMESTAMP) | \
254                              (1UL << SOCK_DBG) | \
255                              (1UL << SOCK_RCVTSTAMP) | \
256                              (1UL << SOCK_RCVTSTAMPNS) | \
257                              (1UL << SOCK_LOCALROUTE) | \
258                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
259                              (1UL << SOCK_RXQ_OVFL) | \
260                              (1UL << SOCK_WIFI_STATUS) | \
261                              (1UL << SOCK_NOFCS) | \
262                              (1UL << SOCK_FILTER_LOCKED))
263 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
264  * clc socket (since smc is not called for these options from net/core)
265  */
266 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
267 {
268         smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
269 }
270
271 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
272                              (1UL << SOCK_KEEPOPEN) | \
273                              (1UL << SOCK_LINGER) | \
274                              (1UL << SOCK_DBG))
275 /* copy only settings and flags relevant for smc from clc to smc socket */
276 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
277 {
278         smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
279 }
280
281 /* determine subnet and mask of internal TCP socket */
282 int smc_netinfo_by_tcpsk(struct socket *clcsock,
283                          __be32 *subnet, u8 *prefix_len)
284 {
285         struct dst_entry *dst = sk_dst_get(clcsock->sk);
286         struct sockaddr_in addr;
287         int rc = -ENOENT;
288         int len;
289
290         if (!dst) {
291                 rc = -ENOTCONN;
292                 goto out;
293         }
294         if (!dst->dev) {
295                 rc = -ENODEV;
296                 goto out_rel;
297         }
298
299         /* get address to which the internal TCP socket is bound */
300         kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
301         /* analyze IPv4 specific data of net_device belonging to TCP socket */
302         for_ifa(dst->dev->ip_ptr) {
303                 if (ifa->ifa_address != addr.sin_addr.s_addr)
304                         continue;
305                 *prefix_len = inet_mask_len(ifa->ifa_mask);
306                 *subnet = ifa->ifa_address & ifa->ifa_mask;
307                 rc = 0;
308                 break;
309         } endfor_ifa(dst->dev->ip_ptr);
310
311 out_rel:
312         dst_release(dst);
313 out:
314         return rc;
315 }
316
317 static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
318 {
319         struct smc_link_group *lgr = smc->conn.lgr;
320         struct smc_link *link;
321         int rest;
322         int rc;
323
324         link = &lgr->lnk[SMC_SINGLE_LINK];
325         /* receive CONFIRM LINK request from server over RoCE fabric */
326         rest = wait_for_completion_interruptible_timeout(
327                 &link->llc_confirm,
328                 SMC_LLC_WAIT_FIRST_TIME);
329         if (rest <= 0) {
330                 struct smc_clc_msg_decline dclc;
331
332                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
333                                       SMC_CLC_DECLINE);
334                 return rc;
335         }
336
337         rc = smc_ib_modify_qp_rts(link);
338         if (rc)
339                 return SMC_CLC_DECL_INTERR;
340
341         smc_wr_remember_qp_attr(link);
342         /* send CONFIRM LINK response over RoCE fabric */
343         rc = smc_llc_send_confirm_link(link,
344                                        link->smcibdev->mac[link->ibport - 1],
345                                        gid, SMC_LLC_RESP);
346         if (rc < 0)
347                 return SMC_CLC_DECL_TCL;
348
349         return rc;
350 }
351
352 static void smc_conn_save_peer_info(struct smc_sock *smc,
353                                     struct smc_clc_msg_accept_confirm *clc)
354 {
355         smc->conn.peer_conn_idx = clc->conn_idx;
356         smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
357         smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size);
358         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
359 }
360
361 static void smc_link_save_peer_info(struct smc_link *link,
362                                     struct smc_clc_msg_accept_confirm *clc)
363 {
364         link->peer_qpn = ntoh24(clc->qpn);
365         memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
366         memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
367         link->peer_psn = ntoh24(clc->psn);
368         link->peer_mtu = clc->qp_mtu;
369 }
370
371 /* setup for RDMA connection of client */
372 static int smc_connect_rdma(struct smc_sock *smc)
373 {
374         struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
375         struct smc_clc_msg_accept_confirm aclc;
376         int local_contact = SMC_FIRST_CONTACT;
377         struct smc_ib_device *smcibdev;
378         struct smc_link *link;
379         u8 srv_first_contact;
380         int reason_code = 0;
381         int rc = 0;
382         u8 ibport;
383
384         /* IPSec connections opt out of SMC-R optimizations */
385         if (using_ipsec(smc)) {
386                 reason_code = SMC_CLC_DECL_IPSEC;
387                 goto decline_rdma;
388         }
389
390         /* PNET table look up: search active ib_device and port
391          * within same PNETID that also contains the ethernet device
392          * used for the internal TCP socket
393          */
394         smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport);
395         if (!smcibdev) {
396                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
397                 goto decline_rdma;
398         }
399
400         /* do inband token exchange */
401         reason_code = smc_clc_send_proposal(smc, smcibdev, ibport);
402         if (reason_code < 0) {
403                 rc = reason_code;
404                 goto out_err;
405         }
406         if (reason_code > 0) /* configuration error */
407                 goto decline_rdma;
408         /* receive SMC Accept CLC message */
409         reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc),
410                                        SMC_CLC_ACCEPT);
411         if (reason_code < 0) {
412                 rc = reason_code;
413                 goto out_err;
414         }
415         if (reason_code > 0)
416                 goto decline_rdma;
417
418         srv_first_contact = aclc.hdr.flag;
419         mutex_lock(&smc_create_lgr_pending);
420         local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
421                                         ibport, &aclc.lcl, srv_first_contact);
422         if (local_contact < 0) {
423                 rc = local_contact;
424                 if (rc == -ENOMEM)
425                         reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
426                 else if (rc == -ENOLINK)
427                         reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
428                 goto decline_rdma_unlock;
429         }
430         link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
431
432         smc_conn_save_peer_info(smc, &aclc);
433
434         rc = smc_sndbuf_create(smc);
435         if (rc) {
436                 reason_code = SMC_CLC_DECL_MEM;
437                 goto decline_rdma_unlock;
438         }
439         rc = smc_rmb_create(smc);
440         if (rc) {
441                 reason_code = SMC_CLC_DECL_MEM;
442                 goto decline_rdma_unlock;
443         }
444
445         if (local_contact == SMC_FIRST_CONTACT)
446                 smc_link_save_peer_info(link, &aclc);
447
448         rc = smc_rmb_rtoken_handling(&smc->conn, &aclc);
449         if (rc) {
450                 reason_code = SMC_CLC_DECL_INTERR;
451                 goto decline_rdma_unlock;
452         }
453
454         if (local_contact == SMC_FIRST_CONTACT) {
455                 rc = smc_ib_ready_link(link);
456                 if (rc) {
457                         reason_code = SMC_CLC_DECL_INTERR;
458                         goto decline_rdma_unlock;
459                 }
460         }
461
462         rc = smc_clc_send_confirm(smc);
463         if (rc)
464                 goto out_err_unlock;
465
466         if (local_contact == SMC_FIRST_CONTACT) {
467                 /* QP confirmation over RoCE fabric */
468                 reason_code = smc_clnt_conf_first_link(
469                         smc, &smcibdev->gid[ibport - 1]);
470                 if (reason_code < 0) {
471                         rc = reason_code;
472                         goto out_err_unlock;
473                 }
474                 if (reason_code > 0)
475                         goto decline_rdma_unlock;
476         }
477
478         mutex_unlock(&smc_create_lgr_pending);
479         smc_tx_init(smc);
480         smc_rx_init(smc);
481
482 out_connected:
483         smc_copy_sock_settings_to_clc(smc);
484         if (smc->sk.sk_state == SMC_INIT)
485                 smc->sk.sk_state = SMC_ACTIVE;
486
487         return rc ? rc : local_contact;
488
489 decline_rdma_unlock:
490         mutex_unlock(&smc_create_lgr_pending);
491         smc_conn_free(&smc->conn);
492 decline_rdma:
493         /* RDMA setup failed, switch back to TCP */
494         smc->use_fallback = true;
495         if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
496                 rc = smc_clc_send_decline(smc, reason_code, 0);
497                 if (rc < sizeof(struct smc_clc_msg_decline))
498                         goto out_err;
499         }
500         goto out_connected;
501
502 out_err_unlock:
503         mutex_unlock(&smc_create_lgr_pending);
504         smc_conn_free(&smc->conn);
505 out_err:
506         return rc;
507 }
508
509 static int smc_connect(struct socket *sock, struct sockaddr *addr,
510                        int alen, int flags)
511 {
512         struct sock *sk = sock->sk;
513         struct smc_sock *smc;
514         int rc = -EINVAL;
515
516         smc = smc_sk(sk);
517
518         /* separate smc parameter checking to be safe */
519         if (alen < sizeof(addr->sa_family))
520                 goto out_err;
521         if (addr->sa_family != AF_INET)
522                 goto out_err;
523         smc->addr = addr;       /* needed for nonblocking connect */
524
525         lock_sock(sk);
526         switch (sk->sk_state) {
527         default:
528                 goto out;
529         case SMC_ACTIVE:
530                 rc = -EISCONN;
531                 goto out;
532         case SMC_INIT:
533                 rc = 0;
534                 break;
535         }
536
537         smc_copy_sock_settings_to_clc(smc);
538         rc = kernel_connect(smc->clcsock, addr, alen, flags);
539         if (rc)
540                 goto out;
541
542         /* setup RDMA connection */
543         rc = smc_connect_rdma(smc);
544         if (rc < 0)
545                 goto out;
546         else
547                 rc = 0; /* success cases including fallback */
548
549 out:
550         release_sock(sk);
551 out_err:
552         return rc;
553 }
554
555 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
556 {
557         struct sock *sk = &lsmc->sk;
558         struct socket *new_clcsock;
559         struct sock *new_sk;
560         int rc;
561
562         release_sock(&lsmc->sk);
563         new_sk = smc_sock_alloc(sock_net(sk), NULL);
564         if (!new_sk) {
565                 rc = -ENOMEM;
566                 lsmc->sk.sk_err = ENOMEM;
567                 *new_smc = NULL;
568                 lock_sock(&lsmc->sk);
569                 goto out;
570         }
571         *new_smc = smc_sk(new_sk);
572
573         rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
574         lock_sock(&lsmc->sk);
575         if  (rc < 0) {
576                 lsmc->sk.sk_err = -rc;
577                 new_sk->sk_state = SMC_CLOSED;
578                 sock_set_flag(new_sk, SOCK_DEAD);
579                 sk->sk_prot->unhash(new_sk);
580                 sock_put(new_sk);
581                 *new_smc = NULL;
582                 goto out;
583         }
584         if (lsmc->sk.sk_state == SMC_CLOSED) {
585                 if (new_clcsock)
586                         sock_release(new_clcsock);
587                 new_sk->sk_state = SMC_CLOSED;
588                 sock_set_flag(new_sk, SOCK_DEAD);
589                 sk->sk_prot->unhash(new_sk);
590                 sock_put(new_sk);
591                 *new_smc = NULL;
592                 goto out;
593         }
594
595         (*new_smc)->clcsock = new_clcsock;
596 out:
597         return rc;
598 }
599
600 /* add a just created sock to the accept queue of the listen sock as
601  * candidate for a following socket accept call from user space
602  */
603 static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
604 {
605         struct smc_sock *par = smc_sk(parent);
606
607         sock_hold(sk);
608         spin_lock(&par->accept_q_lock);
609         list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
610         spin_unlock(&par->accept_q_lock);
611         sk_acceptq_added(parent);
612 }
613
614 /* remove a socket from the accept queue of its parental listening socket */
615 static void smc_accept_unlink(struct sock *sk)
616 {
617         struct smc_sock *par = smc_sk(sk)->listen_smc;
618
619         spin_lock(&par->accept_q_lock);
620         list_del_init(&smc_sk(sk)->accept_q);
621         spin_unlock(&par->accept_q_lock);
622         sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
623         sock_put(sk);
624 }
625
626 /* remove a sock from the accept queue to bind it to a new socket created
627  * for a socket accept call from user space
628  */
629 struct sock *smc_accept_dequeue(struct sock *parent,
630                                 struct socket *new_sock)
631 {
632         struct smc_sock *isk, *n;
633         struct sock *new_sk;
634
635         list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
636                 new_sk = (struct sock *)isk;
637
638                 smc_accept_unlink(new_sk);
639                 if (new_sk->sk_state == SMC_CLOSED) {
640                         /* tbd in follow-on patch: close this sock */
641                         continue;
642                 }
643                 if (new_sock)
644                         sock_graft(new_sk, new_sock);
645                 return new_sk;
646         }
647         return NULL;
648 }
649
650 /* clean up for a created but never accepted sock */
651 void smc_close_non_accepted(struct sock *sk)
652 {
653         struct smc_sock *smc = smc_sk(sk);
654
655         sock_hold(sk);
656         lock_sock(sk);
657         if (!sk->sk_lingertime)
658                 /* wait for peer closing */
659                 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
660         if (!smc->use_fallback)
661                 smc_close_active(smc);
662         if (smc->clcsock) {
663                 struct socket *tcp;
664
665                 tcp = smc->clcsock;
666                 smc->clcsock = NULL;
667                 sock_release(tcp);
668         }
669         sock_set_flag(sk, SOCK_DEAD);
670         sk->sk_shutdown |= SHUTDOWN_MASK;
671         if (smc->use_fallback) {
672                 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
673         } else {
674                 smc_conn_free(&smc->conn);
675                 schedule_delayed_work(&smc->sock_put_work,
676                                       SMC_CLOSE_SOCK_PUT_DELAY);
677         }
678         release_sock(sk);
679         sock_put(sk);
680 }
681
682 static int smc_serv_conf_first_link(struct smc_sock *smc)
683 {
684         struct smc_link_group *lgr = smc->conn.lgr;
685         struct smc_link *link;
686         int rest;
687         int rc;
688
689         link = &lgr->lnk[SMC_SINGLE_LINK];
690         /* send CONFIRM LINK request to client over the RoCE fabric */
691         rc = smc_llc_send_confirm_link(link,
692                                        link->smcibdev->mac[link->ibport - 1],
693                                        &link->smcibdev->gid[link->ibport - 1],
694                                        SMC_LLC_REQ);
695         if (rc < 0)
696                 return SMC_CLC_DECL_TCL;
697
698         /* receive CONFIRM LINK response from client over the RoCE fabric */
699         rest = wait_for_completion_interruptible_timeout(
700                 &link->llc_confirm_resp,
701                 SMC_LLC_WAIT_FIRST_TIME);
702         if (rest <= 0) {
703                 struct smc_clc_msg_decline dclc;
704
705                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
706                                       SMC_CLC_DECLINE);
707         }
708
709         return rc;
710 }
711
712 /* setup for RDMA connection of server */
713 static void smc_listen_work(struct work_struct *work)
714 {
715         struct smc_sock *new_smc = container_of(work, struct smc_sock,
716                                                 smc_listen_work);
717         struct socket *newclcsock = new_smc->clcsock;
718         struct smc_sock *lsmc = new_smc->listen_smc;
719         struct smc_clc_msg_accept_confirm cclc;
720         int local_contact = SMC_REUSE_CONTACT;
721         struct sock *newsmcsk = &new_smc->sk;
722         struct smc_clc_msg_proposal pclc;
723         struct smc_ib_device *smcibdev;
724         struct sockaddr_in peeraddr;
725         struct smc_link *link;
726         int reason_code = 0;
727         int rc = 0, len;
728         __be32 subnet;
729         u8 prefix_len;
730         u8 ibport;
731
732         /* do inband token exchange -
733          *wait for and receive SMC Proposal CLC message
734          */
735         reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc),
736                                        SMC_CLC_PROPOSAL);
737         if (reason_code < 0)
738                 goto out_err;
739         if (reason_code > 0)
740                 goto decline_rdma;
741
742         /* IPSec connections opt out of SMC-R optimizations */
743         if (using_ipsec(new_smc)) {
744                 reason_code = SMC_CLC_DECL_IPSEC;
745                 goto decline_rdma;
746         }
747
748         /* PNET table look up: search active ib_device and port
749          * within same PNETID that also contains the ethernet device
750          * used for the internal TCP socket
751          */
752         smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport);
753         if (!smcibdev) {
754                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
755                 goto decline_rdma;
756         }
757
758         /* determine subnet and mask from internal TCP socket */
759         rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
760         if (rc) {
761                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
762                 goto decline_rdma;
763         }
764         if ((pclc.outgoing_subnet != subnet) ||
765             (pclc.prefix_len != prefix_len)) {
766                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
767                 goto decline_rdma;
768         }
769
770         /* get address of the peer connected to the internal TCP socket */
771         kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len);
772
773         /* allocate connection / link group */
774         mutex_lock(&smc_create_lgr_pending);
775         local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
776                                         smcibdev, ibport, &pclc.lcl, 0);
777         if (local_contact == SMC_REUSE_CONTACT)
778                 /* lock no longer needed, free it due to following
779                  * smc_clc_wait_msg() call
780                  */
781                 mutex_unlock(&smc_create_lgr_pending);
782         if (local_contact < 0) {
783                 rc = local_contact;
784                 if (rc == -ENOMEM)
785                         reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
786                 else if (rc == -ENOLINK)
787                         reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
788                 goto decline_rdma;
789         }
790         link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
791
792         rc = smc_sndbuf_create(new_smc);
793         if (rc) {
794                 reason_code = SMC_CLC_DECL_MEM;
795                 goto decline_rdma;
796         }
797         rc = smc_rmb_create(new_smc);
798         if (rc) {
799                 reason_code = SMC_CLC_DECL_MEM;
800                 goto decline_rdma;
801         }
802
803         rc = smc_clc_send_accept(new_smc, local_contact);
804         if (rc)
805                 goto out_err;
806
807         /* receive SMC Confirm CLC message */
808         reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
809                                        SMC_CLC_CONFIRM);
810         if (reason_code < 0)
811                 goto out_err;
812         if (reason_code > 0)
813                 goto decline_rdma;
814         smc_conn_save_peer_info(new_smc, &cclc);
815         if (local_contact == SMC_FIRST_CONTACT)
816                 smc_link_save_peer_info(link, &cclc);
817
818         rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
819         if (rc) {
820                 reason_code = SMC_CLC_DECL_INTERR;
821                 goto decline_rdma;
822         }
823
824         if (local_contact == SMC_FIRST_CONTACT) {
825                 rc = smc_ib_ready_link(link);
826                 if (rc) {
827                         reason_code = SMC_CLC_DECL_INTERR;
828                         goto decline_rdma;
829                 }
830                 /* QP confirmation over RoCE fabric */
831                 reason_code = smc_serv_conf_first_link(new_smc);
832                 if (reason_code < 0) {
833                         /* peer is not aware of a problem */
834                         rc = reason_code;
835                         goto out_err;
836                 }
837                 if (reason_code > 0)
838                         goto decline_rdma;
839         }
840
841         smc_tx_init(new_smc);
842         smc_rx_init(new_smc);
843
844 out_connected:
845         sk_refcnt_debug_inc(newsmcsk);
846         if (newsmcsk->sk_state == SMC_INIT)
847                 newsmcsk->sk_state = SMC_ACTIVE;
848 enqueue:
849         if (local_contact == SMC_FIRST_CONTACT)
850                 mutex_unlock(&smc_create_lgr_pending);
851         lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
852         if (lsmc->sk.sk_state == SMC_LISTEN) {
853                 smc_accept_enqueue(&lsmc->sk, newsmcsk);
854         } else { /* no longer listening */
855                 smc_close_non_accepted(newsmcsk);
856         }
857         release_sock(&lsmc->sk);
858
859         /* Wake up accept */
860         lsmc->sk.sk_data_ready(&lsmc->sk);
861         sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
862         return;
863
864 decline_rdma:
865         /* RDMA setup failed, switch back to TCP */
866         smc_conn_free(&new_smc->conn);
867         new_smc->use_fallback = true;
868         if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
869                 rc = smc_clc_send_decline(new_smc, reason_code, 0);
870                 if (rc < sizeof(struct smc_clc_msg_decline))
871                         goto out_err;
872         }
873         goto out_connected;
874
875 out_err:
876         newsmcsk->sk_state = SMC_CLOSED;
877         smc_conn_free(&new_smc->conn);
878         goto enqueue; /* queue new sock with sk_err set */
879 }
880
881 static void smc_tcp_listen_work(struct work_struct *work)
882 {
883         struct smc_sock *lsmc = container_of(work, struct smc_sock,
884                                              tcp_listen_work);
885         struct smc_sock *new_smc;
886         int rc = 0;
887
888         lock_sock(&lsmc->sk);
889         while (lsmc->sk.sk_state == SMC_LISTEN) {
890                 rc = smc_clcsock_accept(lsmc, &new_smc);
891                 if (rc)
892                         goto out;
893                 if (!new_smc)
894                         continue;
895
896                 new_smc->listen_smc = lsmc;
897                 new_smc->use_fallback = false; /* assume rdma capability first*/
898                 sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */
899                 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
900                 smc_copy_sock_settings_to_smc(new_smc);
901                 schedule_work(&new_smc->smc_listen_work);
902         }
903
904 out:
905         release_sock(&lsmc->sk);
906         lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */
907 }
908
909 static int smc_listen(struct socket *sock, int backlog)
910 {
911         struct sock *sk = sock->sk;
912         struct smc_sock *smc;
913         int rc;
914
915         smc = smc_sk(sk);
916         lock_sock(sk);
917
918         rc = -EINVAL;
919         if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
920                 goto out;
921
922         rc = 0;
923         if (sk->sk_state == SMC_LISTEN) {
924                 sk->sk_max_ack_backlog = backlog;
925                 goto out;
926         }
927         /* some socket options are handled in core, so we could not apply
928          * them to the clc socket -- copy smc socket options to clc socket
929          */
930         smc_copy_sock_settings_to_clc(smc);
931
932         rc = kernel_listen(smc->clcsock, backlog);
933         if (rc)
934                 goto out;
935         sk->sk_max_ack_backlog = backlog;
936         sk->sk_ack_backlog = 0;
937         sk->sk_state = SMC_LISTEN;
938         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
939         schedule_work(&smc->tcp_listen_work);
940
941 out:
942         release_sock(sk);
943         return rc;
944 }
945
946 static int smc_accept(struct socket *sock, struct socket *new_sock,
947                       int flags)
948 {
949         struct sock *sk = sock->sk, *nsk;
950         DECLARE_WAITQUEUE(wait, current);
951         struct smc_sock *lsmc;
952         long timeo;
953         int rc = 0;
954
955         lsmc = smc_sk(sk);
956         lock_sock(sk);
957
958         if (lsmc->sk.sk_state != SMC_LISTEN) {
959                 rc = -EINVAL;
960                 goto out;
961         }
962
963         /* Wait for an incoming connection */
964         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
965         add_wait_queue_exclusive(sk_sleep(sk), &wait);
966         while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
967                 set_current_state(TASK_INTERRUPTIBLE);
968                 if (!timeo) {
969                         rc = -EAGAIN;
970                         break;
971                 }
972                 release_sock(sk);
973                 timeo = schedule_timeout(timeo);
974                 /* wakeup by sk_data_ready in smc_listen_work() */
975                 sched_annotate_sleep();
976                 lock_sock(sk);
977                 if (signal_pending(current)) {
978                         rc = sock_intr_errno(timeo);
979                         break;
980                 }
981         }
982         set_current_state(TASK_RUNNING);
983         remove_wait_queue(sk_sleep(sk), &wait);
984
985         if (!rc)
986                 rc = sock_error(nsk);
987
988 out:
989         release_sock(sk);
990         return rc;
991 }
992
993 static int smc_getname(struct socket *sock, struct sockaddr *addr,
994                        int *len, int peer)
995 {
996         struct smc_sock *smc;
997
998         if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
999             (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
1000                 return -ENOTCONN;
1001
1002         smc = smc_sk(sock->sk);
1003
1004         return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
1005 }
1006
1007 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1008 {
1009         struct sock *sk = sock->sk;
1010         struct smc_sock *smc;
1011         int rc = -EPIPE;
1012
1013         smc = smc_sk(sk);
1014         lock_sock(sk);
1015         if ((sk->sk_state != SMC_ACTIVE) &&
1016             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1017             (sk->sk_state != SMC_INIT))
1018                 goto out;
1019         if (smc->use_fallback)
1020                 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1021         else
1022                 rc = smc_tx_sendmsg(smc, msg, len);
1023 out:
1024         release_sock(sk);
1025         return rc;
1026 }
1027
1028 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1029                        int flags)
1030 {
1031         struct sock *sk = sock->sk;
1032         struct smc_sock *smc;
1033         int rc = -ENOTCONN;
1034
1035         smc = smc_sk(sk);
1036         lock_sock(sk);
1037         if ((sk->sk_state == SMC_INIT) ||
1038             (sk->sk_state == SMC_LISTEN) ||
1039             (sk->sk_state == SMC_CLOSED))
1040                 goto out;
1041
1042         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1043                 rc = 0;
1044                 goto out;
1045         }
1046
1047         if (smc->use_fallback)
1048                 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
1049         else
1050                 rc = smc_rx_recvmsg(smc, msg, len, flags);
1051
1052 out:
1053         release_sock(sk);
1054         return rc;
1055 }
1056
1057 static unsigned int smc_accept_poll(struct sock *parent)
1058 {
1059         struct smc_sock *isk;
1060         struct sock *sk;
1061
1062         lock_sock(parent);
1063         list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) {
1064                 sk = (struct sock *)isk;
1065
1066                 if (sk->sk_state == SMC_ACTIVE) {
1067                         release_sock(parent);
1068                         return POLLIN | POLLRDNORM;
1069                 }
1070         }
1071         release_sock(parent);
1072
1073         return 0;
1074 }
1075
1076 static unsigned int smc_poll(struct file *file, struct socket *sock,
1077                              poll_table *wait)
1078 {
1079         struct sock *sk = sock->sk;
1080         unsigned int mask = 0;
1081         struct smc_sock *smc;
1082         int rc;
1083
1084         smc = smc_sk(sock->sk);
1085         if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
1086                 /* delegate to CLC child sock */
1087                 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1088                 /* if non-blocking connect finished ... */
1089                 lock_sock(sk);
1090                 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) {
1091                         sk->sk_err = smc->clcsock->sk->sk_err;
1092                         if (sk->sk_err) {
1093                                 mask |= POLLERR;
1094                         } else {
1095                                 rc = smc_connect_rdma(smc);
1096                                 if (rc < 0)
1097                                         mask |= POLLERR;
1098                                 else
1099                                         /* success cases including fallback */
1100                                         mask |= POLLOUT | POLLWRNORM;
1101                         }
1102                 }
1103                 release_sock(sk);
1104         } else {
1105                 sock_poll_wait(file, sk_sleep(sk), wait);
1106                 if (sk->sk_state == SMC_LISTEN)
1107                         /* woken up by sk_data_ready in smc_listen_work() */
1108                         mask |= smc_accept_poll(sk);
1109                 if (sk->sk_err)
1110                         mask |= POLLERR;
1111                 if (atomic_read(&smc->conn.sndbuf_space) ||
1112                     (sk->sk_shutdown & SEND_SHUTDOWN)) {
1113                         mask |= POLLOUT | POLLWRNORM;
1114                 } else {
1115                         sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1116                         set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1117                 }
1118                 if (atomic_read(&smc->conn.bytes_to_rcv))
1119                         mask |= POLLIN | POLLRDNORM;
1120                 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1121                     (sk->sk_state == SMC_CLOSED))
1122                         mask |= POLLHUP;
1123                 if (sk->sk_shutdown & RCV_SHUTDOWN)
1124                         mask |= POLLIN | POLLRDNORM | POLLRDHUP;
1125                 if (sk->sk_state == SMC_APPCLOSEWAIT1)
1126                         mask |= POLLIN;
1127
1128         }
1129
1130         return mask;
1131 }
1132
1133 static int smc_shutdown(struct socket *sock, int how)
1134 {
1135         struct sock *sk = sock->sk;
1136         struct smc_sock *smc;
1137         int rc = -EINVAL;
1138         int rc1 = 0;
1139
1140         smc = smc_sk(sk);
1141
1142         if ((how < SHUT_RD) || (how > SHUT_RDWR))
1143                 return rc;
1144
1145         lock_sock(sk);
1146
1147         rc = -ENOTCONN;
1148         if ((sk->sk_state != SMC_LISTEN) &&
1149             (sk->sk_state != SMC_ACTIVE) &&
1150             (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1151             (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1152             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1153             (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1154             (sk->sk_state != SMC_APPFINCLOSEWAIT))
1155                 goto out;
1156         if (smc->use_fallback) {
1157                 rc = kernel_sock_shutdown(smc->clcsock, how);
1158                 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1159                 if (sk->sk_shutdown == SHUTDOWN_MASK)
1160                         sk->sk_state = SMC_CLOSED;
1161                 goto out;
1162         }
1163         switch (how) {
1164         case SHUT_RDWR:         /* shutdown in both directions */
1165                 rc = smc_close_active(smc);
1166                 break;
1167         case SHUT_WR:
1168                 rc = smc_close_shutdown_write(smc);
1169                 break;
1170         case SHUT_RD:
1171                 if (sk->sk_state == SMC_LISTEN)
1172                         rc = smc_close_active(smc);
1173                 else
1174                         rc = 0;
1175                         /* nothing more to do because peer is not involved */
1176                 break;
1177         }
1178         rc1 = kernel_sock_shutdown(smc->clcsock, how);
1179         /* map sock_shutdown_cmd constants to sk_shutdown value range */
1180         sk->sk_shutdown |= how + 1;
1181
1182 out:
1183         release_sock(sk);
1184         return rc ? rc : rc1;
1185 }
1186
1187 static int smc_setsockopt(struct socket *sock, int level, int optname,
1188                           char __user *optval, unsigned int optlen)
1189 {
1190         struct sock *sk = sock->sk;
1191         struct smc_sock *smc;
1192
1193         smc = smc_sk(sk);
1194
1195         /* generic setsockopts reaching us here always apply to the
1196          * CLC socket
1197          */
1198         return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1199                                              optval, optlen);
1200 }
1201
1202 static int smc_getsockopt(struct socket *sock, int level, int optname,
1203                           char __user *optval, int __user *optlen)
1204 {
1205         struct smc_sock *smc;
1206
1207         smc = smc_sk(sock->sk);
1208         /* socket options apply to the CLC socket */
1209         return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1210                                              optval, optlen);
1211 }
1212
1213 static int smc_ioctl(struct socket *sock, unsigned int cmd,
1214                      unsigned long arg)
1215 {
1216         struct smc_sock *smc;
1217
1218         smc = smc_sk(sock->sk);
1219         if (smc->use_fallback)
1220                 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1221         else
1222                 return sock_no_ioctl(sock, cmd, arg);
1223 }
1224
1225 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1226                             int offset, size_t size, int flags)
1227 {
1228         struct sock *sk = sock->sk;
1229         struct smc_sock *smc;
1230         int rc = -EPIPE;
1231
1232         smc = smc_sk(sk);
1233         lock_sock(sk);
1234         if (sk->sk_state != SMC_ACTIVE)
1235                 goto out;
1236         if (smc->use_fallback)
1237                 rc = kernel_sendpage(smc->clcsock, page, offset,
1238                                      size, flags);
1239         else
1240                 rc = sock_no_sendpage(sock, page, offset, size, flags);
1241
1242 out:
1243         release_sock(sk);
1244         return rc;
1245 }
1246
1247 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1248                                struct pipe_inode_info *pipe, size_t len,
1249                                     unsigned int flags)
1250 {
1251         struct sock *sk = sock->sk;
1252         struct smc_sock *smc;
1253         int rc = -ENOTCONN;
1254
1255         smc = smc_sk(sk);
1256         lock_sock(sk);
1257         if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
1258                 goto out;
1259         if (smc->use_fallback) {
1260                 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1261                                                     pipe, len, flags);
1262         } else {
1263                 rc = -EOPNOTSUPP;
1264         }
1265 out:
1266         release_sock(sk);
1267         return rc;
1268 }
1269
1270 /* must look like tcp */
1271 static const struct proto_ops smc_sock_ops = {
1272         .family         = PF_SMC,
1273         .owner          = THIS_MODULE,
1274         .release        = smc_release,
1275         .bind           = smc_bind,
1276         .connect        = smc_connect,
1277         .socketpair     = sock_no_socketpair,
1278         .accept         = smc_accept,
1279         .getname        = smc_getname,
1280         .poll           = smc_poll,
1281         .ioctl          = smc_ioctl,
1282         .listen         = smc_listen,
1283         .shutdown       = smc_shutdown,
1284         .setsockopt     = smc_setsockopt,
1285         .getsockopt     = smc_getsockopt,
1286         .sendmsg        = smc_sendmsg,
1287         .recvmsg        = smc_recvmsg,
1288         .mmap           = sock_no_mmap,
1289         .sendpage       = smc_sendpage,
1290         .splice_read    = smc_splice_read,
1291 };
1292
1293 static int smc_create(struct net *net, struct socket *sock, int protocol,
1294                       int kern)
1295 {
1296         struct smc_sock *smc;
1297         struct sock *sk;
1298         int rc;
1299
1300         rc = -ESOCKTNOSUPPORT;
1301         if (sock->type != SOCK_STREAM)
1302                 goto out;
1303
1304         rc = -EPROTONOSUPPORT;
1305         if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
1306                 goto out;
1307
1308         rc = -ENOBUFS;
1309         sock->ops = &smc_sock_ops;
1310         sk = smc_sock_alloc(net, sock);
1311         if (!sk)
1312                 goto out;
1313
1314         /* create internal TCP socket for CLC handshake and fallback */
1315         smc = smc_sk(sk);
1316         smc->use_fallback = false; /* assume rdma capability first */
1317         rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
1318                               IPPROTO_TCP, &smc->clcsock);
1319         if (rc)
1320                 sk_common_release(sk);
1321         smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1322         smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
1323
1324 out:
1325         return rc;
1326 }
1327
1328 static const struct net_proto_family smc_sock_family_ops = {
1329         .family = PF_SMC,
1330         .owner  = THIS_MODULE,
1331         .create = smc_create,
1332 };
1333
1334 static int __init smc_init(void)
1335 {
1336         int rc;
1337
1338         rc = smc_pnet_init();
1339         if (rc)
1340                 return rc;
1341
1342         rc = smc_llc_init();
1343         if (rc) {
1344                 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
1345                 goto out_pnet;
1346         }
1347
1348         rc = smc_cdc_init();
1349         if (rc) {
1350                 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
1351                 goto out_pnet;
1352         }
1353
1354         rc = proto_register(&smc_proto, 1);
1355         if (rc) {
1356                 pr_err("%s: proto_register fails with %d\n", __func__, rc);
1357                 goto out_pnet;
1358         }
1359
1360         rc = sock_register(&smc_sock_family_ops);
1361         if (rc) {
1362                 pr_err("%s: sock_register fails with %d\n", __func__, rc);
1363                 goto out_proto;
1364         }
1365         INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
1366
1367         rc = smc_ib_register_client();
1368         if (rc) {
1369                 pr_err("%s: ib_register fails with %d\n", __func__, rc);
1370                 goto out_sock;
1371         }
1372
1373         return 0;
1374
1375 out_sock:
1376         sock_unregister(PF_SMC);
1377 out_proto:
1378         proto_unregister(&smc_proto);
1379 out_pnet:
1380         smc_pnet_exit();
1381         return rc;
1382 }
1383
1384 static void __exit smc_exit(void)
1385 {
1386         struct smc_link_group *lgr, *lg;
1387         LIST_HEAD(lgr_freeing_list);
1388
1389         spin_lock_bh(&smc_lgr_list.lock);
1390         if (!list_empty(&smc_lgr_list.list))
1391                 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
1392         spin_unlock_bh(&smc_lgr_list.lock);
1393         list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
1394                 list_del_init(&lgr->list);
1395                 smc_lgr_free(lgr); /* free link group */
1396         }
1397         smc_ib_unregister_client();
1398         sock_unregister(PF_SMC);
1399         proto_unregister(&smc_proto);
1400         smc_pnet_exit();
1401 }
1402
1403 module_init(smc_init);
1404 module_exit(smc_exit);
1405
1406 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
1407 MODULE_DESCRIPTION("smc socket address family");
1408 MODULE_LICENSE("GPL");
1409 MODULE_ALIAS_NETPROTO(PF_SMC);