GNU Linux-libre 4.19.264-gnu1
[releases.git] / drivers / crypto / chelsio / chtls / chtls_io.c
1 /*
2  * Copyright (c) 2018 Chelsio Communications, Inc.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * Written by: Atul Gupta (atul.gupta@chelsio.com)
9  */
10
11 #include <linux/module.h>
12 #include <linux/list.h>
13 #include <linux/workqueue.h>
14 #include <linux/skbuff.h>
15 #include <linux/timer.h>
16 #include <linux/notifier.h>
17 #include <linux/inetdevice.h>
18 #include <linux/ip.h>
19 #include <linux/tcp.h>
20 #include <linux/sched/signal.h>
21 #include <net/tcp.h>
22 #include <net/busy_poll.h>
23 #include <crypto/aes.h>
24
25 #include "chtls.h"
26 #include "chtls_cm.h"
27
28 static bool is_tls_tx(struct chtls_sock *csk)
29 {
30         return csk->tlshws.txkey >= 0;
31 }
32
33 static bool is_tls_rx(struct chtls_sock *csk)
34 {
35         return csk->tlshws.rxkey >= 0;
36 }
37
38 static int data_sgl_len(const struct sk_buff *skb)
39 {
40         unsigned int cnt;
41
42         cnt = skb_shinfo(skb)->nr_frags;
43         return sgl_len(cnt) * 8;
44 }
45
46 static int nos_ivs(struct sock *sk, unsigned int size)
47 {
48         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
49
50         return DIV_ROUND_UP(size, csk->tlshws.mfs);
51 }
52
53 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
54 {
55         int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
56         int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
57
58         if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
59             MAX_IMM_OFLD_TX_DATA_WR_LEN) {
60                 ULP_SKB_CB(skb)->ulp.tls.iv = 1;
61                 return 1;
62         }
63         ULP_SKB_CB(skb)->ulp.tls.iv = 0;
64         return 0;
65 }
66
67 static int max_ivs_size(struct sock *sk, int size)
68 {
69         return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
70 }
71
72 static int ivs_size(struct sock *sk, const struct sk_buff *skb)
73 {
74         return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
75                  CIPHER_BLOCK_SIZE) : 0;
76 }
77
78 static int flowc_wr_credits(int nparams, int *flowclenp)
79 {
80         int flowclen16, flowclen;
81
82         flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
83         flowclen16 = DIV_ROUND_UP(flowclen, 16);
84         flowclen = flowclen16 * 16;
85
86         if (flowclenp)
87                 *flowclenp = flowclen;
88
89         return flowclen16;
90 }
91
92 static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
93                                            struct fw_flowc_wr *flowc,
94                                            int flowclen)
95 {
96         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
97         struct sk_buff *skb;
98
99         skb = alloc_skb(flowclen, GFP_ATOMIC);
100         if (!skb)
101                 return NULL;
102
103         memcpy(__skb_put(skb, flowclen), flowc, flowclen);
104         skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
105
106         return skb;
107 }
108
109 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
110                          int flowclen)
111 {
112         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
113         struct tcp_sock *tp = tcp_sk(sk);
114         struct sk_buff *skb;
115         int flowclen16;
116         int ret;
117
118         flowclen16 = flowclen / 16;
119
120         if (csk_flag(sk, CSK_TX_DATA_SENT)) {
121                 skb = create_flowc_wr_skb(sk, flowc, flowclen);
122                 if (!skb)
123                         return -ENOMEM;
124
125                 skb_entail(sk, skb,
126                            ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
127                 return 0;
128         }
129
130         ret = cxgb4_immdata_send(csk->egress_dev,
131                                  csk->txq_idx,
132                                  flowc, flowclen);
133         if (!ret)
134                 return flowclen16;
135         skb = create_flowc_wr_skb(sk, flowc, flowclen);
136         if (!skb)
137                 return -ENOMEM;
138         send_or_defer(sk, tp, skb, 0);
139         return flowclen16;
140 }
141
142 static u8 tcp_state_to_flowc_state(u8 state)
143 {
144         switch (state) {
145         case TCP_ESTABLISHED:
146                 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
147         case TCP_CLOSE_WAIT:
148                 return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
149         case TCP_FIN_WAIT1:
150                 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
151         case TCP_CLOSING:
152                 return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
153         case TCP_LAST_ACK:
154                 return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
155         case TCP_FIN_WAIT2:
156                 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
157         }
158
159         return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
160 }
161
162 int send_tx_flowc_wr(struct sock *sk, int compl,
163                      u32 snd_nxt, u32 rcv_nxt)
164 {
165         struct flowc_packed {
166                 struct fw_flowc_wr fc;
167                 struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
168         } __packed sflowc;
169         int nparams, paramidx, flowclen16, flowclen;
170         struct fw_flowc_wr *flowc;
171         struct chtls_sock *csk;
172         struct tcp_sock *tp;
173
174         csk = rcu_dereference_sk_user_data(sk);
175         tp = tcp_sk(sk);
176         memset(&sflowc, 0, sizeof(sflowc));
177         flowc = &sflowc.fc;
178
179 #define FLOWC_PARAM(__m, __v) \
180         do { \
181                 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
182                 flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
183                 paramidx++; \
184         } while (0)
185
186         paramidx = 0;
187
188         FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
189         FLOWC_PARAM(CH, csk->tx_chan);
190         FLOWC_PARAM(PORT, csk->tx_chan);
191         FLOWC_PARAM(IQID, csk->rss_qid);
192         FLOWC_PARAM(SNDNXT, tp->snd_nxt);
193         FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
194         FLOWC_PARAM(SNDBUF, csk->sndbuf);
195         FLOWC_PARAM(MSS, tp->mss_cache);
196         FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
197
198         if (SND_WSCALE(tp))
199                 FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
200
201         if (csk->ulp_mode == ULP_MODE_TLS)
202                 FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
203
204         if (csk->tlshws.fcplenmax)
205                 FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
206
207         nparams = paramidx;
208 #undef FLOWC_PARAM
209
210         flowclen16 = flowc_wr_credits(nparams, &flowclen);
211         flowc->op_to_nparams =
212                 cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
213                             FW_WR_COMPL_V(compl) |
214                             FW_FLOWC_WR_NPARAMS_V(nparams));
215         flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
216                                           FW_WR_FLOWID_V(csk->tid));
217
218         return send_flowc_wr(sk, flowc, flowclen);
219 }
220
221 /* Copy IVs to WR */
222 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
223
224 {
225         struct chtls_sock *csk;
226         unsigned char *iv_loc;
227         struct chtls_hws *hws;
228         unsigned char *ivs;
229         u16 number_of_ivs;
230         struct page *page;
231         int err = 0;
232
233         csk = rcu_dereference_sk_user_data(sk);
234         hws = &csk->tlshws;
235         number_of_ivs = nos_ivs(sk, skb->len);
236
237         if (number_of_ivs > MAX_IVS_PAGE) {
238                 pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
239                 return -ENOMEM;
240         }
241
242         /* generate the  IVs */
243         ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
244         if (!ivs)
245                 return -ENOMEM;
246         get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
247
248         if (skb_ulp_tls_iv_imm(skb)) {
249                 /* send the IVs as immediate data in the WR */
250                 iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
251                                                 CIPHER_BLOCK_SIZE);
252                 if (iv_loc)
253                         memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
254
255                 hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
256         } else {
257                 /* Send the IVs as sgls */
258                 /* Already accounted IV DSGL for credits */
259                 skb_shinfo(skb)->nr_frags--;
260                 page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
261                 if (!page) {
262                         pr_info("%s : Page allocation for IVs failed\n",
263                                 __func__);
264                         err = -ENOMEM;
265                         goto out;
266                 }
267                 memcpy(page_address(page), ivs, number_of_ivs *
268                        CIPHER_BLOCK_SIZE);
269                 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
270                                    number_of_ivs * CIPHER_BLOCK_SIZE);
271                 hws->ivsize = 0;
272         }
273 out:
274         kfree(ivs);
275         return err;
276 }
277
278 /* Copy Key to WR */
279 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
280 {
281         struct ulptx_sc_memrd *sc_memrd;
282         struct chtls_sock *csk;
283         struct chtls_dev *cdev;
284         struct ulptx_idata *sc;
285         struct chtls_hws *hws;
286         u32 immdlen;
287         int kaddr;
288
289         csk = rcu_dereference_sk_user_data(sk);
290         hws = &csk->tlshws;
291         cdev = csk->cdev;
292
293         immdlen = sizeof(*sc) + sizeof(*sc_memrd);
294         kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
295         sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
296         if (sc) {
297                 sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
298                 sc->len = htonl(0);
299                 sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
300                 sc_memrd->cmd_to_len =
301                                 htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
302                                 ULP_TX_SC_MORE_V(1) |
303                                 ULPTX_LEN16_V(hws->keylen >> 4));
304                 sc_memrd->addr = htonl(kaddr);
305         }
306 }
307
308 static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
309 {
310         return hws->tx_seq_no++;
311 }
312
313 static bool is_sg_request(const struct sk_buff *skb)
314 {
315         return skb->peeked ||
316                 (skb->len > MAX_IMM_ULPTX_WR_LEN);
317 }
318
319 /*
320  * Returns true if an sk_buff carries urgent data.
321  */
322 static bool skb_urgent(struct sk_buff *skb)
323 {
324         return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
325 }
326
327 /* TLS content type for CPL SFO */
328 static unsigned char tls_content_type(unsigned char content_type)
329 {
330         switch (content_type) {
331         case TLS_HDR_TYPE_CCS:
332                 return CPL_TX_TLS_SFO_TYPE_CCS;
333         case TLS_HDR_TYPE_ALERT:
334                 return CPL_TX_TLS_SFO_TYPE_ALERT;
335         case TLS_HDR_TYPE_HANDSHAKE:
336                 return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
337         case TLS_HDR_TYPE_HEARTBEAT:
338                 return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
339         }
340         return CPL_TX_TLS_SFO_TYPE_DATA;
341 }
342
343 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
344                            int dlen, int tls_immd, u32 credits,
345                            int expn, int pdus)
346 {
347         struct fw_tlstx_data_wr *req_wr;
348         struct cpl_tx_tls_sfo *req_cpl;
349         unsigned int wr_ulp_mode_force;
350         struct tls_scmd *updated_scmd;
351         unsigned char data_type;
352         struct chtls_sock *csk;
353         struct net_device *dev;
354         struct chtls_hws *hws;
355         struct tls_scmd *scmd;
356         struct adapter *adap;
357         unsigned char *req;
358         int immd_len;
359         int iv_imm;
360         int len;
361
362         csk = rcu_dereference_sk_user_data(sk);
363         iv_imm = skb_ulp_tls_iv_imm(skb);
364         dev = csk->egress_dev;
365         adap = netdev2adap(dev);
366         hws = &csk->tlshws;
367         scmd = &hws->scmd;
368         len = dlen + expn;
369
370         dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
371         atomic_inc(&adap->chcr_stats.tls_pdu_tx);
372
373         updated_scmd = scmd;
374         updated_scmd->seqno_numivs &= 0xffffff80;
375         updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
376         hws->scmd = *updated_scmd;
377
378         req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
379         req_cpl = (struct cpl_tx_tls_sfo *)req;
380         req = (unsigned char *)__skb_push(skb, (sizeof(struct
381                                 fw_tlstx_data_wr)));
382
383         req_wr = (struct fw_tlstx_data_wr *)req;
384         immd_len = (tls_immd ? dlen : 0);
385         req_wr->op_to_immdlen =
386                 htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
387                 FW_TLSTX_DATA_WR_COMPL_V(1) |
388                 FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
389         req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
390                                      FW_TLSTX_DATA_WR_LEN16_V(credits));
391         wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
392
393         if (is_sg_request(skb))
394                 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
395                         ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
396                         FW_OFLD_TX_DATA_WR_SHOVE_F);
397
398         req_wr->lsodisable_to_flags =
399                         htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
400                               FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) |
401                               T6_TX_FORCE_F | wr_ulp_mode_force |
402                               TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
403                                          skb_queue_empty(&csk->txq)));
404
405         req_wr->ctxloc_to_exp =
406                         htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
407                               FW_TLSTX_DATA_WR_EXP_V(expn) |
408                               FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
409                               FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
410                               FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
411
412         /* Fill in the length */
413         req_wr->plen = htonl(len);
414         req_wr->mfs = htons(hws->mfs);
415         req_wr->adjustedplen_pkd =
416                 htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
417         req_wr->expinplenmax_pkd =
418                 htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
419         req_wr->pdusinplenmax_pkd =
420                 FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
421         req_wr->r10 = 0;
422
423         data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
424         req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
425                                        CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
426                                        CPL_TX_TLS_SFO_CPL_LEN_V(2) |
427                                        CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
428         req_cpl->pld_len = htonl(len - expn);
429
430         req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
431                 ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
432                 TLS_HDR_TYPE_HEARTBEAT : 0) |
433                 CPL_TX_TLS_SFO_PROTOVER_V(0));
434
435         /* create the s-command */
436         req_cpl->r1_lo = 0;
437         req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
438         req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
439         req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
440 }
441
442 /*
443  * Calculate the TLS data expansion size
444  */
445 static int chtls_expansion_size(struct sock *sk, int data_len,
446                                 int fullpdu,
447                                 unsigned short *pducnt)
448 {
449         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
450         struct chtls_hws *hws = &csk->tlshws;
451         struct tls_scmd *scmd = &hws->scmd;
452         int fragsize = hws->mfs;
453         int expnsize = 0;
454         int fragleft;
455         int fragcnt;
456         int expppdu;
457
458         if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
459             SCMD_CIPH_MODE_AES_GCM) {
460                 expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
461                           TLS_HEADER_LENGTH;
462
463                 if (fullpdu) {
464                         *pducnt = data_len / (expppdu + fragsize);
465                         if (*pducnt > 32)
466                                 *pducnt = 32;
467                         else if (!*pducnt)
468                                 *pducnt = 1;
469                         expnsize = (*pducnt) * expppdu;
470                         return expnsize;
471                 }
472                 fragcnt = (data_len / fragsize);
473                 expnsize =  fragcnt * expppdu;
474                 fragleft = data_len % fragsize;
475                 if (fragleft > 0)
476                         expnsize += expppdu;
477         }
478         return expnsize;
479 }
480
481 /* WR with IV, KEY and CPL SFO added */
482 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
483                                int tls_tx_imm, int tls_len, u32 credits)
484 {
485         unsigned short pdus_per_ulp = 0;
486         struct chtls_sock *csk;
487         struct chtls_hws *hws;
488         int expn_sz;
489         int pdus;
490
491         csk = rcu_dereference_sk_user_data(sk);
492         hws = &csk->tlshws;
493         pdus = DIV_ROUND_UP(tls_len, hws->mfs);
494         expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
495         if (!hws->compute) {
496                 hws->expansion = chtls_expansion_size(sk,
497                                                       hws->fcplenmax,
498                                                       1, &pdus_per_ulp);
499                 hws->pdus = pdus_per_ulp;
500                 hws->adjustlen = hws->pdus *
501                         ((hws->expansion / hws->pdus) + hws->mfs);
502                 hws->compute = 1;
503         }
504         if (tls_copy_ivs(sk, skb))
505                 return;
506         tls_copy_tx_key(sk, skb);
507         tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
508         hws->tx_seq_no += (pdus - 1);
509 }
510
511 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
512                             unsigned int immdlen, int len,
513                             u32 credits, u32 compl)
514 {
515         struct fw_ofld_tx_data_wr *req;
516         unsigned int wr_ulp_mode_force;
517         struct chtls_sock *csk;
518         unsigned int opcode;
519
520         csk = rcu_dereference_sk_user_data(sk);
521         opcode = FW_OFLD_TX_DATA_WR;
522
523         req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
524         req->op_to_immdlen = htonl(WR_OP_V(opcode) |
525                                 FW_WR_COMPL_V(compl) |
526                                 FW_WR_IMMDLEN_V(immdlen));
527         req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
528                                 FW_WR_LEN16_V(credits));
529
530         wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
531         if (is_sg_request(skb))
532                 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
533                         ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
534                                 FW_OFLD_TX_DATA_WR_SHOVE_F);
535
536         req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
537                         FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) |
538                         FW_OFLD_TX_DATA_WR_SHOVE_V((!csk_flag
539                                         (sk, CSK_TX_MORE_DATA)) &&
540                                          skb_queue_empty(&csk->txq)));
541         req->plen = htonl(len);
542 }
543
544 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
545                          bool size)
546 {
547         int wr_size;
548
549         wr_size = TLS_WR_CPL_LEN;
550         wr_size += KEY_ON_MEM_SZ;
551         wr_size += ivs_size(csk->sk, skb);
552
553         if (size)
554                 return wr_size;
555
556         /* frags counted for IV dsgl */
557         if (!skb_ulp_tls_iv_imm(skb))
558                 skb_shinfo(skb)->nr_frags++;
559
560         return wr_size;
561 }
562
563 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
564 {
565         int length = skb->len;
566
567         if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
568                 return false;
569
570         if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
571                 /* Check TLS header len for Immediate */
572                 if (csk->ulp_mode == ULP_MODE_TLS &&
573                     skb_ulp_tls_inline(skb))
574                         length += chtls_wr_size(csk, skb, true);
575                 else
576                         length += sizeof(struct fw_ofld_tx_data_wr);
577
578                 return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
579         }
580         return true;
581 }
582
583 static unsigned int calc_tx_flits(const struct sk_buff *skb,
584                                   unsigned int immdlen)
585 {
586         unsigned int flits, cnt;
587
588         flits = immdlen / 8;   /* headers */
589         cnt = skb_shinfo(skb)->nr_frags;
590         if (skb_tail_pointer(skb) != skb_transport_header(skb))
591                 cnt++;
592         return flits + sgl_len(cnt);
593 }
594
595 static void arp_failure_discard(void *handle, struct sk_buff *skb)
596 {
597         kfree_skb(skb);
598 }
599
600 int chtls_push_frames(struct chtls_sock *csk, int comp)
601 {
602         struct chtls_hws *hws = &csk->tlshws;
603         struct tcp_sock *tp;
604         struct sk_buff *skb;
605         int total_size = 0;
606         struct sock *sk;
607         int wr_size;
608
609         wr_size = sizeof(struct fw_ofld_tx_data_wr);
610         sk = csk->sk;
611         tp = tcp_sk(sk);
612
613         if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
614                 return 0;
615
616         if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
617                 return 0;
618
619         while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
620                (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
621                 skb_queue_len(&csk->txq) > 1)) {
622                 unsigned int credit_len = skb->len;
623                 unsigned int credits_needed;
624                 unsigned int completion = 0;
625                 int tls_len = skb->len;/* TLS data len before IV/key */
626                 unsigned int immdlen;
627                 int len = skb->len;    /* length [ulp bytes] inserted by hw */
628                 int flowclen16 = 0;
629                 int tls_tx_imm = 0;
630
631                 immdlen = skb->len;
632                 if (!is_ofld_imm(csk, skb)) {
633                         immdlen = skb_transport_offset(skb);
634                         if (skb_ulp_tls_inline(skb))
635                                 wr_size = chtls_wr_size(csk, skb, false);
636                         credit_len = 8 * calc_tx_flits(skb, immdlen);
637                 } else {
638                         if (skb_ulp_tls_inline(skb)) {
639                                 wr_size = chtls_wr_size(csk, skb, false);
640                                 tls_tx_imm = 1;
641                         }
642                 }
643                 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
644                         credit_len += wr_size;
645                 credits_needed = DIV_ROUND_UP(credit_len, 16);
646                 if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
647                         flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
648                                                       tp->rcv_nxt);
649                         if (flowclen16 <= 0)
650                                 break;
651                         csk->wr_credits -= flowclen16;
652                         csk->wr_unacked += flowclen16;
653                         csk->wr_nondata += flowclen16;
654                         csk_set_flag(csk, CSK_TX_DATA_SENT);
655                 }
656
657                 if (csk->wr_credits < credits_needed) {
658                         if (skb_ulp_tls_inline(skb) &&
659                             !skb_ulp_tls_iv_imm(skb))
660                                 skb_shinfo(skb)->nr_frags--;
661                         break;
662                 }
663
664                 __skb_unlink(skb, &csk->txq);
665                 skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
666                                       CPL_PRIORITY_DATA);
667                 if (hws->ofld)
668                         hws->txqid = (skb->queue_mapping >> 1);
669                 skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
670                 csk->wr_credits -= credits_needed;
671                 csk->wr_unacked += credits_needed;
672                 csk->wr_nondata = 0;
673                 enqueue_wr(csk, skb);
674
675                 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
676                         if ((comp && csk->wr_unacked == credits_needed) ||
677                             (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
678                             csk->wr_unacked >= csk->wr_max_credits / 2) {
679                                 completion = 1;
680                                 csk->wr_unacked = 0;
681                         }
682                         if (skb_ulp_tls_inline(skb))
683                                 make_tlstx_data_wr(sk, skb, tls_tx_imm,
684                                                    tls_len, credits_needed);
685                         else
686                                 make_tx_data_wr(sk, skb, immdlen, len,
687                                                 credits_needed, completion);
688                         tp->snd_nxt += len;
689                         tp->lsndtime = tcp_jiffies32;
690                         if (completion)
691                                 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
692                 } else {
693                         struct cpl_close_con_req *req = cplhdr(skb);
694                         unsigned int cmd  = CPL_OPCODE_G(ntohl
695                                              (OPCODE_TID(req)));
696
697                         if (cmd == CPL_CLOSE_CON_REQ)
698                                 csk_set_flag(csk,
699                                              CSK_CLOSE_CON_REQUESTED);
700
701                         if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
702                             (csk->wr_unacked >= csk->wr_max_credits / 2)) {
703                                 req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
704                                 csk->wr_unacked = 0;
705                         }
706                 }
707                 total_size += skb->truesize;
708                 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
709                         csk_set_flag(csk, CSK_TX_WAIT_IDLE);
710                 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
711                 cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
712         }
713         sk->sk_wmem_queued -= total_size;
714         return total_size;
715 }
716
717 static void mark_urg(struct tcp_sock *tp, int flags,
718                      struct sk_buff *skb)
719 {
720         if (unlikely(flags & MSG_OOB)) {
721                 tp->snd_up = tp->write_seq;
722                 ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
723                                          ULPCB_FLAG_BARRIER |
724                                          ULPCB_FLAG_NO_APPEND |
725                                          ULPCB_FLAG_NEED_HDR;
726         }
727 }
728
729 /*
730  * Returns true if a connection should send more data to TCP engine
731  */
732 static bool should_push(struct sock *sk)
733 {
734         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
735         struct chtls_dev *cdev = csk->cdev;
736         struct tcp_sock *tp = tcp_sk(sk);
737
738         /*
739          * If we've released our offload resources there's nothing to do ...
740          */
741         if (!cdev)
742                 return false;
743
744         /*
745          * If there aren't any work requests in flight, or there isn't enough
746          * data in flight, or Nagle is off then send the current TX_DATA
747          * otherwise hold it and wait to accumulate more data.
748          */
749         return csk->wr_credits == csk->wr_max_credits ||
750                 (tp->nonagle & TCP_NAGLE_OFF);
751 }
752
753 /*
754  * Returns true if a TCP socket is corked.
755  */
756 static bool corked(const struct tcp_sock *tp, int flags)
757 {
758         return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
759 }
760
761 /*
762  * Returns true if a send should try to push new data.
763  */
764 static bool send_should_push(struct sock *sk, int flags)
765 {
766         return should_push(sk) && !corked(tcp_sk(sk), flags);
767 }
768
769 void chtls_tcp_push(struct sock *sk, int flags)
770 {
771         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
772         int qlen = skb_queue_len(&csk->txq);
773
774         if (likely(qlen)) {
775                 struct sk_buff *skb = skb_peek_tail(&csk->txq);
776                 struct tcp_sock *tp = tcp_sk(sk);
777
778                 mark_urg(tp, flags, skb);
779
780                 if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
781                     corked(tp, flags)) {
782                         ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
783                         return;
784                 }
785
786                 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
787                 if (qlen == 1 &&
788                     ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
789                      should_push(sk)))
790                         chtls_push_frames(csk, 1);
791         }
792 }
793
794 /*
795  * Calculate the size for a new send sk_buff.  It's maximum size so we can
796  * pack lots of data into it, unless we plan to send it immediately, in which
797  * case we size it more tightly.
798  *
799  * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
800  * arise in normal cases and when it does we are just wasting memory.
801  */
802 static int select_size(struct sock *sk, int io_len, int flags, int len)
803 {
804         const int pgbreak = SKB_MAX_HEAD(len);
805
806         /*
807          * If the data wouldn't fit in the main body anyway, put only the
808          * header in the main body so it can use immediate data and place all
809          * the payload in page fragments.
810          */
811         if (io_len > pgbreak)
812                 return 0;
813
814         /*
815          * If we will be accumulating payload get a large main body.
816          */
817         if (!send_should_push(sk, flags))
818                 return pgbreak;
819
820         return io_len;
821 }
822
823 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
824 {
825         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
826         struct tcp_sock *tp = tcp_sk(sk);
827
828         ULP_SKB_CB(skb)->seq = tp->write_seq;
829         ULP_SKB_CB(skb)->flags = flags;
830         __skb_queue_tail(&csk->txq, skb);
831         sk->sk_wmem_queued += skb->truesize;
832
833         if (TCP_PAGE(sk) && TCP_OFF(sk)) {
834                 put_page(TCP_PAGE(sk));
835                 TCP_PAGE(sk) = NULL;
836                 TCP_OFF(sk) = 0;
837         }
838 }
839
840 static struct sk_buff *get_tx_skb(struct sock *sk, int size)
841 {
842         struct sk_buff *skb;
843
844         skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
845         if (likely(skb)) {
846                 skb_reserve(skb, TX_HEADER_LEN);
847                 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
848                 skb_reset_transport_header(skb);
849         }
850         return skb;
851 }
852
853 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
854 {
855         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
856         struct sk_buff *skb;
857
858         skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
859                         KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
860                         sk->sk_allocation);
861         if (likely(skb)) {
862                 skb_reserve(skb, (TX_TLSHDR_LEN +
863                             KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
864                 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
865                 skb_reset_transport_header(skb);
866                 ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
867                 ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
868         }
869         return skb;
870 }
871
872 static void tx_skb_finalize(struct sk_buff *skb)
873 {
874         struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
875
876         if (!(cb->flags & ULPCB_FLAG_NO_HDR))
877                 cb->flags = ULPCB_FLAG_NEED_HDR;
878         cb->flags |= ULPCB_FLAG_NO_APPEND;
879 }
880
881 static void push_frames_if_head(struct sock *sk)
882 {
883         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
884
885         if (skb_queue_len(&csk->txq) == 1)
886                 chtls_push_frames(csk, 1);
887 }
888
889 static int chtls_skb_copy_to_page_nocache(struct sock *sk,
890                                           struct iov_iter *from,
891                                           struct sk_buff *skb,
892                                           struct page *page,
893                                           int off, int copy)
894 {
895         int err;
896
897         err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
898                                        off, copy, skb->len);
899         if (err)
900                 return err;
901
902         skb->len             += copy;
903         skb->data_len        += copy;
904         skb->truesize        += copy;
905         sk->sk_wmem_queued   += copy;
906         return 0;
907 }
908
909 /* Read TLS header to find content type and data length */
910 static int tls_header_read(struct tls_hdr *thdr, struct iov_iter *from)
911 {
912         if (copy_from_iter(thdr, sizeof(*thdr), from) != sizeof(*thdr))
913                 return -EFAULT;
914         return (__force int)cpu_to_be16(thdr->length);
915 }
916
917 static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
918 {
919         return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0);
920 }
921
922 static int csk_wait_memory(struct chtls_dev *cdev,
923                            struct sock *sk, long *timeo_p)
924 {
925         DEFINE_WAIT_FUNC(wait, woken_wake_function);
926         int sndbuf, err = 0;
927         long current_timeo;
928         long vm_wait = 0;
929         bool noblock;
930
931         current_timeo = *timeo_p;
932         noblock = (*timeo_p ? false : true);
933         sndbuf = cdev->max_host_sndbuf;
934         if (csk_mem_free(cdev, sk)) {
935                 current_timeo = (prandom_u32() % (HZ / 5)) + 2;
936                 vm_wait = (prandom_u32() % (HZ / 5)) + 2;
937         }
938
939         add_wait_queue(sk_sleep(sk), &wait);
940         while (1) {
941                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
942
943                 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
944                         goto do_error;
945                 if (!*timeo_p) {
946                         if (noblock)
947                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
948                         goto do_nonblock;
949                 }
950                 if (signal_pending(current))
951                         goto do_interrupted;
952                 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
953                 if (csk_mem_free(cdev, sk) && !vm_wait)
954                         break;
955
956                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
957                 sk->sk_write_pending++;
958                 sk_wait_event(sk, &current_timeo, sk->sk_err ||
959                               (sk->sk_shutdown & SEND_SHUTDOWN) ||
960                               (csk_mem_free(cdev, sk) && !vm_wait), &wait);
961                 sk->sk_write_pending--;
962
963                 if (vm_wait) {
964                         vm_wait -= current_timeo;
965                         current_timeo = *timeo_p;
966                         if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
967                                 current_timeo -= vm_wait;
968                                 if (current_timeo < 0)
969                                         current_timeo = 0;
970                         }
971                         vm_wait = 0;
972                 }
973                 *timeo_p = current_timeo;
974         }
975 do_rm_wq:
976         remove_wait_queue(sk_sleep(sk), &wait);
977         return err;
978 do_error:
979         err = -EPIPE;
980         goto do_rm_wq;
981 do_nonblock:
982         err = -EAGAIN;
983         goto do_rm_wq;
984 do_interrupted:
985         err = sock_intr_errno(*timeo_p);
986         goto do_rm_wq;
987 }
988
989 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
990 {
991         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
992         struct chtls_dev *cdev = csk->cdev;
993         struct tcp_sock *tp = tcp_sk(sk);
994         struct sk_buff *skb;
995         int mss, flags, err;
996         int recordsz = 0;
997         int copied = 0;
998         int hdrlen = 0;
999         long timeo;
1000
1001         lock_sock(sk);
1002         flags = msg->msg_flags;
1003         timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1004
1005         if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
1006                 err = sk_stream_wait_connect(sk, &timeo);
1007                 if (err)
1008                         goto out_err;
1009         }
1010
1011         sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1012         err = -EPIPE;
1013         if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1014                 goto out_err;
1015
1016         mss = csk->mss;
1017         csk_set_flag(csk, CSK_TX_MORE_DATA);
1018
1019         while (msg_data_left(msg)) {
1020                 int copy = 0;
1021
1022                 skb = skb_peek_tail(&csk->txq);
1023                 if (skb) {
1024                         copy = mss - skb->len;
1025                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1026                 }
1027                 if (!csk_mem_free(cdev, sk))
1028                         goto wait_for_sndbuf;
1029
1030                 if (is_tls_tx(csk) && !csk->tlshws.txleft) {
1031                         struct tls_hdr hdr;
1032
1033                         recordsz = tls_header_read(&hdr, &msg->msg_iter);
1034                         size -= TLS_HEADER_LENGTH;
1035                         hdrlen += TLS_HEADER_LENGTH;
1036                         csk->tlshws.txleft = recordsz;
1037                         csk->tlshws.type = hdr.type;
1038                         if (skb)
1039                                 ULP_SKB_CB(skb)->ulp.tls.type = hdr.type;
1040                 }
1041
1042                 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1043                     copy <= 0) {
1044 new_buf:
1045                         if (skb) {
1046                                 tx_skb_finalize(skb);
1047                                 push_frames_if_head(sk);
1048                         }
1049
1050                         if (is_tls_tx(csk)) {
1051                                 skb = get_record_skb(sk,
1052                                                      select_size(sk,
1053                                                                  recordsz,
1054                                                                  flags,
1055                                                                  TX_TLSHDR_LEN),
1056                                                                  false);
1057                         } else {
1058                                 skb = get_tx_skb(sk,
1059                                                  select_size(sk, size, flags,
1060                                                              TX_HEADER_LEN));
1061                         }
1062                         if (unlikely(!skb))
1063                                 goto wait_for_memory;
1064
1065                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1066                         copy = mss;
1067                 }
1068                 if (copy > size)
1069                         copy = size;
1070
1071                 if (skb_tailroom(skb) > 0) {
1072                         copy = min(copy, skb_tailroom(skb));
1073                         if (is_tls_tx(csk))
1074                                 copy = min_t(int, copy, csk->tlshws.txleft);
1075                         err = skb_add_data_nocache(sk, skb,
1076                                                    &msg->msg_iter, copy);
1077                         if (err)
1078                                 goto do_fault;
1079                 } else {
1080                         int i = skb_shinfo(skb)->nr_frags;
1081                         struct page *page = TCP_PAGE(sk);
1082                         int pg_size = PAGE_SIZE;
1083                         int off = TCP_OFF(sk);
1084                         bool merge;
1085
1086                         if (!page)
1087                                 goto wait_for_memory;
1088
1089                         pg_size <<= compound_order(page);
1090                         if (off < pg_size &&
1091                             skb_can_coalesce(skb, i, page, off)) {
1092                                 merge = 1;
1093                                 goto copy;
1094                         }
1095                         merge = 0;
1096                         if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
1097                             MAX_SKB_FRAGS))
1098                                 goto new_buf;
1099
1100                         if (page && off == pg_size) {
1101                                 put_page(page);
1102                                 TCP_PAGE(sk) = page = NULL;
1103                                 pg_size = PAGE_SIZE;
1104                         }
1105
1106                         if (!page) {
1107                                 gfp_t gfp = sk->sk_allocation;
1108                                 int order = cdev->send_page_order;
1109
1110                                 if (order) {
1111                                         page = alloc_pages(gfp | __GFP_COMP |
1112                                                            __GFP_NOWARN |
1113                                                            __GFP_NORETRY,
1114                                                            order);
1115                                         if (page)
1116                                                 pg_size <<=
1117                                                         compound_order(page);
1118                                 }
1119                                 if (!page) {
1120                                         page = alloc_page(gfp);
1121                                         pg_size = PAGE_SIZE;
1122                                 }
1123                                 if (!page)
1124                                         goto wait_for_memory;
1125                                 off = 0;
1126                         }
1127 copy:
1128                         if (copy > pg_size - off)
1129                                 copy = pg_size - off;
1130                         if (is_tls_tx(csk))
1131                                 copy = min_t(int, copy, csk->tlshws.txleft);
1132
1133                         err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
1134                                                              skb, page,
1135                                                              off, copy);
1136                         if (unlikely(err)) {
1137                                 if (!TCP_PAGE(sk)) {
1138                                         TCP_PAGE(sk) = page;
1139                                         TCP_OFF(sk) = 0;
1140                                 }
1141                                 goto do_fault;
1142                         }
1143                         /* Update the skb. */
1144                         if (merge) {
1145                                 skb_shinfo(skb)->frags[i - 1].size += copy;
1146                         } else {
1147                                 skb_fill_page_desc(skb, i, page, off, copy);
1148                                 if (off + copy < pg_size) {
1149                                         /* space left keep page */
1150                                         get_page(page);
1151                                         TCP_PAGE(sk) = page;
1152                                 } else {
1153                                         TCP_PAGE(sk) = NULL;
1154                                 }
1155                         }
1156                         TCP_OFF(sk) = off + copy;
1157                 }
1158                 if (unlikely(skb->len == mss))
1159                         tx_skb_finalize(skb);
1160                 tp->write_seq += copy;
1161                 copied += copy;
1162                 size -= copy;
1163
1164                 if (is_tls_tx(csk))
1165                         csk->tlshws.txleft -= copy;
1166
1167                 if (corked(tp, flags) &&
1168                     (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1169                         ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1170
1171                 if (size == 0)
1172                         goto out;
1173
1174                 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
1175                         push_frames_if_head(sk);
1176                 continue;
1177 wait_for_sndbuf:
1178                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1179 wait_for_memory:
1180                 err = csk_wait_memory(cdev, sk, &timeo);
1181                 if (err)
1182                         goto do_error;
1183         }
1184 out:
1185         csk_reset_flag(csk, CSK_TX_MORE_DATA);
1186         if (copied)
1187                 chtls_tcp_push(sk, flags);
1188 done:
1189         release_sock(sk);
1190         return copied + hdrlen;
1191 do_fault:
1192         if (!skb->len) {
1193                 __skb_unlink(skb, &csk->txq);
1194                 sk->sk_wmem_queued -= skb->truesize;
1195                 __kfree_skb(skb);
1196         }
1197 do_error:
1198         if (copied)
1199                 goto out;
1200 out_err:
1201         if (csk_conn_inline(csk))
1202                 csk_reset_flag(csk, CSK_TX_MORE_DATA);
1203         copied = sk_stream_error(sk, flags, err);
1204         goto done;
1205 }
1206
1207 int chtls_sendpage(struct sock *sk, struct page *page,
1208                    int offset, size_t size, int flags)
1209 {
1210         struct chtls_sock *csk;
1211         struct chtls_dev *cdev;
1212         int mss, err, copied;
1213         struct tcp_sock *tp;
1214         long timeo;
1215
1216         tp = tcp_sk(sk);
1217         copied = 0;
1218         csk = rcu_dereference_sk_user_data(sk);
1219         cdev = csk->cdev;
1220         lock_sock(sk);
1221         timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1222
1223         err = sk_stream_wait_connect(sk, &timeo);
1224         if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
1225             err != 0)
1226                 goto out_err;
1227
1228         mss = csk->mss;
1229         csk_set_flag(csk, CSK_TX_MORE_DATA);
1230
1231         while (size > 0) {
1232                 struct sk_buff *skb = skb_peek_tail(&csk->txq);
1233                 int copy, i;
1234
1235                 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1236                     (copy = mss - skb->len) <= 0) {
1237 new_buf:
1238                         if (!csk_mem_free(cdev, sk))
1239                                 goto wait_for_sndbuf;
1240
1241                         if (is_tls_tx(csk)) {
1242                                 skb = get_record_skb(sk,
1243                                                      select_size(sk, size,
1244                                                                  flags,
1245                                                                  TX_TLSHDR_LEN),
1246                                                      true);
1247                         } else {
1248                                 skb = get_tx_skb(sk, 0);
1249                         }
1250                         if (!skb)
1251                                 goto wait_for_memory;
1252                         copy = mss;
1253                 }
1254                 if (copy > size)
1255                         copy = size;
1256
1257                 i = skb_shinfo(skb)->nr_frags;
1258                 if (skb_can_coalesce(skb, i, page, offset)) {
1259                         skb_shinfo(skb)->frags[i - 1].size += copy;
1260                 } else if (i < MAX_SKB_FRAGS) {
1261                         get_page(page);
1262                         skb_fill_page_desc(skb, i, page, offset, copy);
1263                 } else {
1264                         tx_skb_finalize(skb);
1265                         push_frames_if_head(sk);
1266                         goto new_buf;
1267                 }
1268
1269                 skb->len += copy;
1270                 if (skb->len == mss)
1271                         tx_skb_finalize(skb);
1272                 skb->data_len += copy;
1273                 skb->truesize += copy;
1274                 sk->sk_wmem_queued += copy;
1275                 tp->write_seq += copy;
1276                 copied += copy;
1277                 offset += copy;
1278                 size -= copy;
1279
1280                 if (corked(tp, flags) &&
1281                     (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1282                         ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1283
1284                 if (!size)
1285                         break;
1286
1287                 if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND))
1288                         push_frames_if_head(sk);
1289                 continue;
1290 wait_for_sndbuf:
1291                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1292 wait_for_memory:
1293                 err = csk_wait_memory(cdev, sk, &timeo);
1294                 if (err)
1295                         goto do_error;
1296         }
1297 out:
1298         csk_reset_flag(csk, CSK_TX_MORE_DATA);
1299         if (copied)
1300                 chtls_tcp_push(sk, flags);
1301 done:
1302         release_sock(sk);
1303         return copied;
1304
1305 do_error:
1306         if (copied)
1307                 goto out;
1308
1309 out_err:
1310         if (csk_conn_inline(csk))
1311                 csk_reset_flag(csk, CSK_TX_MORE_DATA);
1312         copied = sk_stream_error(sk, flags, err);
1313         goto done;
1314 }
1315
1316 static void chtls_select_window(struct sock *sk)
1317 {
1318         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1319         struct tcp_sock *tp = tcp_sk(sk);
1320         unsigned int wnd = tp->rcv_wnd;
1321
1322         wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
1323         wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
1324
1325         if (wnd > MAX_RCV_WND)
1326                 wnd = MAX_RCV_WND;
1327
1328 /*
1329  * Check if we need to grow the receive window in response to an increase in
1330  * the socket's receive buffer size.  Some applications increase the buffer
1331  * size dynamically and rely on the window to grow accordingly.
1332  */
1333
1334         if (wnd > tp->rcv_wnd) {
1335                 tp->rcv_wup -= wnd - tp->rcv_wnd;
1336                 tp->rcv_wnd = wnd;
1337                 /* Mark the receive window as updated */
1338                 csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
1339         }
1340 }
1341
1342 /*
1343  * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
1344  * to return without sending the message in case we cannot allocate
1345  * an sk_buff.  Returns the number of credits sent.
1346  */
1347 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
1348 {
1349         struct cpl_rx_data_ack *req;
1350         struct sk_buff *skb;
1351
1352         skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
1353         if (!skb)
1354                 return 0;
1355         __skb_put(skb, sizeof(*req));
1356         req = (struct cpl_rx_data_ack *)skb->head;
1357
1358         set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
1359         INIT_TP_WR(req, csk->tid);
1360         OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1361                                                     csk->tid));
1362         req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
1363                                        RX_FORCE_ACK_F);
1364         cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
1365         return credits;
1366 }
1367
1368 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
1369                              TCPF_FIN_WAIT1 | \
1370                              TCPF_FIN_WAIT2)
1371
1372 /*
1373  * Called after some received data has been read.  It returns RX credits
1374  * to the HW for the amount of data processed.
1375  */
1376 static void chtls_cleanup_rbuf(struct sock *sk, int copied)
1377 {
1378         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1379         struct tcp_sock *tp;
1380         int must_send;
1381         u32 credits;
1382         u32 thres;
1383
1384         thres = 15 * 1024;
1385
1386         if (!sk_in_state(sk, CREDIT_RETURN_STATE))
1387                 return;
1388
1389         chtls_select_window(sk);
1390         tp = tcp_sk(sk);
1391         credits = tp->copied_seq - tp->rcv_wup;
1392         if (unlikely(!credits))
1393                 return;
1394
1395 /*
1396  * For coalescing to work effectively ensure the receive window has
1397  * at least 16KB left.
1398  */
1399         must_send = credits + 16384 >= tp->rcv_wnd;
1400
1401         if (must_send || credits >= thres)
1402                 tp->rcv_wup += send_rx_credits(csk, credits);
1403 }
1404
1405 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1406                             int nonblock, int flags, int *addr_len)
1407 {
1408         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1409         struct net_device *dev = csk->egress_dev;
1410         struct chtls_hws *hws = &csk->tlshws;
1411         struct tcp_sock *tp = tcp_sk(sk);
1412         struct adapter *adap;
1413         unsigned long avail;
1414         int buffers_freed;
1415         int copied = 0;
1416         int request;
1417         int target;
1418         long timeo;
1419
1420         adap = netdev2adap(dev);
1421         buffers_freed = 0;
1422
1423         timeo = sock_rcvtimeo(sk, nonblock);
1424         target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1425         request = len;
1426
1427         if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1428                 chtls_cleanup_rbuf(sk, copied);
1429
1430         do {
1431                 struct sk_buff *skb;
1432                 u32 offset = 0;
1433
1434                 if (unlikely(tp->urg_data &&
1435                              tp->urg_seq == tp->copied_seq)) {
1436                         if (copied)
1437                                 break;
1438                         if (signal_pending(current)) {
1439                                 copied = timeo ? sock_intr_errno(timeo) :
1440                                         -EAGAIN;
1441                                 break;
1442                         }
1443                 }
1444                 skb = skb_peek(&sk->sk_receive_queue);
1445                 if (skb)
1446                         goto found_ok_skb;
1447                 if (csk->wr_credits &&
1448                     skb_queue_len(&csk->txq) &&
1449                     chtls_push_frames(csk, csk->wr_credits ==
1450                                       csk->wr_max_credits))
1451                         sk->sk_write_space(sk);
1452
1453                 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1454                         break;
1455
1456                 if (copied) {
1457                         if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1458                             (sk->sk_shutdown & RCV_SHUTDOWN) ||
1459                             signal_pending(current))
1460                                 break;
1461
1462                         if (!timeo)
1463                                 break;
1464                 } else {
1465                         if (sock_flag(sk, SOCK_DONE))
1466                                 break;
1467                         if (sk->sk_err) {
1468                                 copied = sock_error(sk);
1469                                 break;
1470                         }
1471                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1472                                 break;
1473                         if (sk->sk_state == TCP_CLOSE) {
1474                                 copied = -ENOTCONN;
1475                                 break;
1476                         }
1477                         if (!timeo) {
1478                                 copied = -EAGAIN;
1479                                 break;
1480                         }
1481                         if (signal_pending(current)) {
1482                                 copied = sock_intr_errno(timeo);
1483                                 break;
1484                         }
1485                 }
1486                 if (READ_ONCE(sk->sk_backlog.tail)) {
1487                         release_sock(sk);
1488                         lock_sock(sk);
1489                         chtls_cleanup_rbuf(sk, copied);
1490                         continue;
1491                 }
1492
1493                 if (copied >= target)
1494                         break;
1495                 chtls_cleanup_rbuf(sk, copied);
1496                 sk_wait_data(sk, &timeo, NULL);
1497                 continue;
1498 found_ok_skb:
1499                 if (!skb->len) {
1500                         skb_dst_set(skb, NULL);
1501                         __skb_unlink(skb, &sk->sk_receive_queue);
1502                         kfree_skb(skb);
1503
1504                         if (!copied && !timeo) {
1505                                 copied = -EAGAIN;
1506                                 break;
1507                         }
1508
1509                         if (copied < target) {
1510                                 release_sock(sk);
1511                                 lock_sock(sk);
1512                                 continue;
1513                         }
1514                         break;
1515                 }
1516                 offset = hws->copied_seq;
1517                 avail = skb->len - offset;
1518                 if (len < avail)
1519                         avail = len;
1520
1521                 if (unlikely(tp->urg_data)) {
1522                         u32 urg_offset = tp->urg_seq - tp->copied_seq;
1523
1524                         if (urg_offset < avail) {
1525                                 if (urg_offset) {
1526                                         avail = urg_offset;
1527                                 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
1528                                         /* First byte is urgent, skip */
1529                                         tp->copied_seq++;
1530                                         offset++;
1531                                         avail--;
1532                                         if (!avail)
1533                                                 goto skip_copy;
1534                                 }
1535                         }
1536                 }
1537                 if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
1538                         if (!copied) {
1539                                 copied = -EFAULT;
1540                                 break;
1541                         }
1542                 }
1543
1544                 copied += avail;
1545                 len -= avail;
1546                 hws->copied_seq += avail;
1547 skip_copy:
1548                 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1549                         tp->urg_data = 0;
1550
1551                 if ((avail + offset) >= skb->len) {
1552                         struct sk_buff *next_skb;
1553                         if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1554                                 tp->copied_seq += skb->len;
1555                                 hws->rcvpld = skb->hdr_len;
1556                         } else {
1557                                 tp->copied_seq += hws->rcvpld;
1558                         }
1559                         chtls_free_skb(sk, skb);
1560                         buffers_freed++;
1561                         hws->copied_seq = 0;
1562                         next_skb = skb_peek(&sk->sk_receive_queue);
1563                         if (copied >= target && !next_skb)
1564                                 break;
1565                         if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
1566                                 break;
1567                 }
1568         } while (len > 0);
1569
1570         if (buffers_freed)
1571                 chtls_cleanup_rbuf(sk, copied);
1572         release_sock(sk);
1573         return copied;
1574 }
1575
1576 /*
1577  * Peek at data in a socket's receive buffer.
1578  */
1579 static int peekmsg(struct sock *sk, struct msghdr *msg,
1580                    size_t len, int nonblock, int flags)
1581 {
1582         struct tcp_sock *tp = tcp_sk(sk);
1583         u32 peek_seq, offset;
1584         struct sk_buff *skb;
1585         int copied = 0;
1586         size_t avail;          /* amount of available data in current skb */
1587         long timeo;
1588
1589         lock_sock(sk);
1590         timeo = sock_rcvtimeo(sk, nonblock);
1591         peek_seq = tp->copied_seq;
1592
1593         do {
1594                 if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
1595                         if (copied)
1596                                 break;
1597                         if (signal_pending(current)) {
1598                                 copied = timeo ? sock_intr_errno(timeo) :
1599                                 -EAGAIN;
1600                                 break;
1601                         }
1602                 }
1603
1604                 skb_queue_walk(&sk->sk_receive_queue, skb) {
1605                         offset = peek_seq - ULP_SKB_CB(skb)->seq;
1606                         if (offset < skb->len)
1607                                 goto found_ok_skb;
1608                 }
1609
1610                 /* empty receive queue */
1611                 if (copied)
1612                         break;
1613                 if (sock_flag(sk, SOCK_DONE))
1614                         break;
1615                 if (sk->sk_err) {
1616                         copied = sock_error(sk);
1617                         break;
1618                 }
1619                 if (sk->sk_shutdown & RCV_SHUTDOWN)
1620                         break;
1621                 if (sk->sk_state == TCP_CLOSE) {
1622                         copied = -ENOTCONN;
1623                         break;
1624                 }
1625                 if (!timeo) {
1626                         copied = -EAGAIN;
1627                         break;
1628                 }
1629                 if (signal_pending(current)) {
1630                         copied = sock_intr_errno(timeo);
1631                         break;
1632                 }
1633
1634                 if (READ_ONCE(sk->sk_backlog.tail)) {
1635                         /* Do not sleep, just process backlog. */
1636                         release_sock(sk);
1637                         lock_sock(sk);
1638                 } else {
1639                         sk_wait_data(sk, &timeo, NULL);
1640                 }
1641
1642                 if (unlikely(peek_seq != tp->copied_seq)) {
1643                         if (net_ratelimit())
1644                                 pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
1645                                         current->comm, current->pid);
1646                         peek_seq = tp->copied_seq;
1647                 }
1648                 continue;
1649
1650 found_ok_skb:
1651                 avail = skb->len - offset;
1652                 if (len < avail)
1653                         avail = len;
1654                 /*
1655                  * Do we have urgent data here?  We need to skip over the
1656                  * urgent byte.
1657                  */
1658                 if (unlikely(tp->urg_data)) {
1659                         u32 urg_offset = tp->urg_seq - peek_seq;
1660
1661                         if (urg_offset < avail) {
1662                                 /*
1663                                  * The amount of data we are preparing to copy
1664                                  * contains urgent data.
1665                                  */
1666                                 if (!urg_offset) { /* First byte is urgent */
1667                                         if (!sock_flag(sk, SOCK_URGINLINE)) {
1668                                                 peek_seq++;
1669                                                 offset++;
1670                                                 avail--;
1671                                         }
1672                                         if (!avail)
1673                                                 continue;
1674                                 } else {
1675                                         /* stop short of the urgent data */
1676                                         avail = urg_offset;
1677                                 }
1678                         }
1679                 }
1680
1681                 /*
1682                  * If MSG_TRUNC is specified the data is discarded.
1683                  */
1684                 if (likely(!(flags & MSG_TRUNC)))
1685                         if (skb_copy_datagram_msg(skb, offset, msg, len)) {
1686                                 if (!copied) {
1687                                         copied = -EFAULT;
1688                                         break;
1689                                 }
1690                         }
1691                 peek_seq += avail;
1692                 copied += avail;
1693                 len -= avail;
1694         } while (len > 0);
1695
1696         release_sock(sk);
1697         return copied;
1698 }
1699
1700 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1701                   int nonblock, int flags, int *addr_len)
1702 {
1703         struct tcp_sock *tp = tcp_sk(sk);
1704         struct chtls_sock *csk;
1705         struct chtls_hws *hws;
1706         unsigned long avail;    /* amount of available data in current skb */
1707         int buffers_freed;
1708         int copied = 0;
1709         int request;
1710         long timeo;
1711         int target;             /* Read at least this many bytes */
1712
1713         buffers_freed = 0;
1714
1715         if (unlikely(flags & MSG_OOB))
1716                 return tcp_prot.recvmsg(sk, msg, len, nonblock, flags,
1717                                         addr_len);
1718
1719         if (unlikely(flags & MSG_PEEK))
1720                 return peekmsg(sk, msg, len, nonblock, flags);
1721
1722         if (sk_can_busy_loop(sk) &&
1723             skb_queue_empty_lockless(&sk->sk_receive_queue) &&
1724             sk->sk_state == TCP_ESTABLISHED)
1725                 sk_busy_loop(sk, nonblock);
1726
1727         lock_sock(sk);
1728         csk = rcu_dereference_sk_user_data(sk);
1729         hws = &csk->tlshws;
1730
1731         if (is_tls_rx(csk))
1732                 return chtls_pt_recvmsg(sk, msg, len, nonblock,
1733                                         flags, addr_len);
1734
1735         timeo = sock_rcvtimeo(sk, nonblock);
1736         target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1737         request = len;
1738
1739         if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1740                 chtls_cleanup_rbuf(sk, copied);
1741
1742         do {
1743                 struct sk_buff *skb;
1744                 u32 offset;
1745
1746                 if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
1747                         if (copied)
1748                                 break;
1749                         if (signal_pending(current)) {
1750                                 copied = timeo ? sock_intr_errno(timeo) :
1751                                         -EAGAIN;
1752                                 break;
1753                         }
1754                 }
1755
1756                 skb = skb_peek(&sk->sk_receive_queue);
1757                 if (skb)
1758                         goto found_ok_skb;
1759
1760                 if (csk->wr_credits &&
1761                     skb_queue_len(&csk->txq) &&
1762                     chtls_push_frames(csk, csk->wr_credits ==
1763                                       csk->wr_max_credits))
1764                         sk->sk_write_space(sk);
1765
1766                 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1767                         break;
1768
1769                 if (copied) {
1770                         if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1771                             (sk->sk_shutdown & RCV_SHUTDOWN) ||
1772                             signal_pending(current))
1773                                 break;
1774                 } else {
1775                         if (sock_flag(sk, SOCK_DONE))
1776                                 break;
1777                         if (sk->sk_err) {
1778                                 copied = sock_error(sk);
1779                                 break;
1780                         }
1781                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1782                                 break;
1783                         if (sk->sk_state == TCP_CLOSE) {
1784                                 copied = -ENOTCONN;
1785                                 break;
1786                         }
1787                         if (!timeo) {
1788                                 copied = -EAGAIN;
1789                                 break;
1790                         }
1791                         if (signal_pending(current)) {
1792                                 copied = sock_intr_errno(timeo);
1793                                 break;
1794                         }
1795                 }
1796
1797                 if (READ_ONCE(sk->sk_backlog.tail)) {
1798                         release_sock(sk);
1799                         lock_sock(sk);
1800                         chtls_cleanup_rbuf(sk, copied);
1801                         continue;
1802                 }
1803
1804                 if (copied >= target)
1805                         break;
1806                 chtls_cleanup_rbuf(sk, copied);
1807                 sk_wait_data(sk, &timeo, NULL);
1808                 continue;
1809
1810 found_ok_skb:
1811                 if (!skb->len) {
1812                         chtls_kfree_skb(sk, skb);
1813                         if (!copied && !timeo) {
1814                                 copied = -EAGAIN;
1815                                 break;
1816                         }
1817
1818                         if (copied < target)
1819                                 continue;
1820
1821                         break;
1822                 }
1823
1824                 offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
1825                 avail = skb->len - offset;
1826                 if (len < avail)
1827                         avail = len;
1828
1829                 if (unlikely(tp->urg_data)) {
1830                         u32 urg_offset = tp->urg_seq - tp->copied_seq;
1831
1832                         if (urg_offset < avail) {
1833                                 if (urg_offset) {
1834                                         avail = urg_offset;
1835                                 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
1836                                         tp->copied_seq++;
1837                                         offset++;
1838                                         avail--;
1839                                         if (!avail)
1840                                                 goto skip_copy;
1841                                 }
1842                         }
1843                 }
1844
1845                 if (likely(!(flags & MSG_TRUNC))) {
1846                         if (skb_copy_datagram_msg(skb, offset,
1847                                                   msg, avail)) {
1848                                 if (!copied) {
1849                                         copied = -EFAULT;
1850                                         break;
1851                                 }
1852                         }
1853                 }
1854
1855                 tp->copied_seq += avail;
1856                 copied += avail;
1857                 len -= avail;
1858
1859 skip_copy:
1860                 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1861                         tp->urg_data = 0;
1862
1863                 if (avail + offset >= skb->len) {
1864                         if (likely(skb))
1865                                 chtls_free_skb(sk, skb);
1866                         buffers_freed++;
1867
1868                         if  (copied >= target &&
1869                              !skb_peek(&sk->sk_receive_queue))
1870                                 break;
1871                 }
1872         } while (len > 0);
1873
1874         if (buffers_freed)
1875                 chtls_cleanup_rbuf(sk, copied);
1876
1877         release_sock(sk);
1878         return copied;
1879 }