GNU Linux-libre 4.19.286-gnu1
[releases.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40
41 #define CREATE_TRACE_POINTS
42 #include "trace.h"
43
44 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
45
46 EXPORT_SYMBOL_GPL(dccp_statistics);
47
48 struct percpu_counter dccp_orphan_count;
49 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50
51 struct inet_hashinfo dccp_hashinfo;
52 EXPORT_SYMBOL_GPL(dccp_hashinfo);
53
54 /* the maximum queue length for tx in packets. 0 is no limit */
55 int sysctl_dccp_tx_qlen __read_mostly = 5;
56
57 #ifdef CONFIG_IP_DCCP_DEBUG
58 static const char *dccp_state_name(const int state)
59 {
60         static const char *const dccp_state_names[] = {
61         [DCCP_OPEN]             = "OPEN",
62         [DCCP_REQUESTING]       = "REQUESTING",
63         [DCCP_PARTOPEN]         = "PARTOPEN",
64         [DCCP_LISTEN]           = "LISTEN",
65         [DCCP_RESPOND]          = "RESPOND",
66         [DCCP_CLOSING]          = "CLOSING",
67         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
68         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
69         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
70         [DCCP_TIME_WAIT]        = "TIME_WAIT",
71         [DCCP_CLOSED]           = "CLOSED",
72         };
73
74         if (state >= DCCP_MAX_STATES)
75                 return "INVALID STATE!";
76         else
77                 return dccp_state_names[state];
78 }
79 #endif
80
81 void dccp_set_state(struct sock *sk, const int state)
82 {
83         const int oldstate = sk->sk_state;
84
85         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
86                       dccp_state_name(oldstate), dccp_state_name(state));
87         WARN_ON(state == oldstate);
88
89         switch (state) {
90         case DCCP_OPEN:
91                 if (oldstate != DCCP_OPEN)
92                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
93                 /* Client retransmits all Confirm options until entering OPEN */
94                 if (oldstate == DCCP_PARTOPEN)
95                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
96                 break;
97
98         case DCCP_CLOSED:
99                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
100                     oldstate == DCCP_CLOSING)
101                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
102
103                 sk->sk_prot->unhash(sk);
104                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
105                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
106                         inet_put_port(sk);
107                 /* fall through */
108         default:
109                 if (oldstate == DCCP_OPEN)
110                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
111         }
112
113         /* Change state AFTER socket is unhashed to avoid closed
114          * socket sitting in hash tables.
115          */
116         inet_sk_set_state(sk, state);
117 }
118
119 EXPORT_SYMBOL_GPL(dccp_set_state);
120
121 static void dccp_finish_passive_close(struct sock *sk)
122 {
123         switch (sk->sk_state) {
124         case DCCP_PASSIVE_CLOSE:
125                 /* Node (client or server) has received Close packet. */
126                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
127                 dccp_set_state(sk, DCCP_CLOSED);
128                 break;
129         case DCCP_PASSIVE_CLOSEREQ:
130                 /*
131                  * Client received CloseReq. We set the `active' flag so that
132                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
133                  */
134                 dccp_send_close(sk, 1);
135                 dccp_set_state(sk, DCCP_CLOSING);
136         }
137 }
138
139 void dccp_done(struct sock *sk)
140 {
141         dccp_set_state(sk, DCCP_CLOSED);
142         dccp_clear_xmit_timers(sk);
143
144         sk->sk_shutdown = SHUTDOWN_MASK;
145
146         if (!sock_flag(sk, SOCK_DEAD))
147                 sk->sk_state_change(sk);
148         else
149                 inet_csk_destroy_sock(sk);
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_done);
153
154 const char *dccp_packet_name(const int type)
155 {
156         static const char *const dccp_packet_names[] = {
157                 [DCCP_PKT_REQUEST]  = "REQUEST",
158                 [DCCP_PKT_RESPONSE] = "RESPONSE",
159                 [DCCP_PKT_DATA]     = "DATA",
160                 [DCCP_PKT_ACK]      = "ACK",
161                 [DCCP_PKT_DATAACK]  = "DATAACK",
162                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
163                 [DCCP_PKT_CLOSE]    = "CLOSE",
164                 [DCCP_PKT_RESET]    = "RESET",
165                 [DCCP_PKT_SYNC]     = "SYNC",
166                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
167         };
168
169         if (type >= DCCP_NR_PKT_TYPES)
170                 return "INVALID";
171         else
172                 return dccp_packet_names[type];
173 }
174
175 EXPORT_SYMBOL_GPL(dccp_packet_name);
176
177 void dccp_destruct_common(struct sock *sk)
178 {
179         struct dccp_sock *dp = dccp_sk(sk);
180
181         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
182         dp->dccps_hc_tx_ccid = NULL;
183 }
184 EXPORT_SYMBOL_GPL(dccp_destruct_common);
185
186 static void dccp_sk_destruct(struct sock *sk)
187 {
188         dccp_destruct_common(sk);
189         inet_sock_destruct(sk);
190 }
191
192 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
193 {
194         struct dccp_sock *dp = dccp_sk(sk);
195         struct inet_connection_sock *icsk = inet_csk(sk);
196
197         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
198         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
199         sk->sk_state            = DCCP_CLOSED;
200         sk->sk_write_space      = dccp_write_space;
201         sk->sk_destruct         = dccp_sk_destruct;
202         icsk->icsk_sync_mss     = dccp_sync_mss;
203         dp->dccps_mss_cache     = 536;
204         dp->dccps_rate_last     = jiffies;
205         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
206         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
207         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
208
209         dccp_init_xmit_timers(sk);
210
211         INIT_LIST_HEAD(&dp->dccps_featneg);
212         /* control socket doesn't need feat nego */
213         if (likely(ctl_sock_initialized))
214                 return dccp_feat_init(sk);
215         return 0;
216 }
217
218 EXPORT_SYMBOL_GPL(dccp_init_sock);
219
220 void dccp_destroy_sock(struct sock *sk)
221 {
222         struct dccp_sock *dp = dccp_sk(sk);
223
224         __skb_queue_purge(&sk->sk_write_queue);
225         if (sk->sk_send_head != NULL) {
226                 kfree_skb(sk->sk_send_head);
227                 sk->sk_send_head = NULL;
228         }
229
230         /* Clean up a referenced DCCP bind bucket. */
231         if (inet_csk(sk)->icsk_bind_hash != NULL)
232                 inet_put_port(sk);
233
234         kfree(dp->dccps_service_list);
235         dp->dccps_service_list = NULL;
236
237         if (dp->dccps_hc_rx_ackvec != NULL) {
238                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239                 dp->dccps_hc_rx_ackvec = NULL;
240         }
241         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
242         dp->dccps_hc_rx_ccid = NULL;
243
244         /* clean up feature negotiation state */
245         dccp_feat_list_purge(&dp->dccps_featneg);
246 }
247
248 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
249
250 static inline int dccp_listen_start(struct sock *sk, int backlog)
251 {
252         struct dccp_sock *dp = dccp_sk(sk);
253
254         dp->dccps_role = DCCP_ROLE_LISTEN;
255         /* do not start to listen if feature negotiation setup fails */
256         if (dccp_feat_finalise_settings(dp))
257                 return -EPROTO;
258         return inet_csk_listen_start(sk, backlog);
259 }
260
261 static inline int dccp_need_reset(int state)
262 {
263         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
264                state != DCCP_REQUESTING;
265 }
266
267 int dccp_disconnect(struct sock *sk, int flags)
268 {
269         struct inet_connection_sock *icsk = inet_csk(sk);
270         struct inet_sock *inet = inet_sk(sk);
271         struct dccp_sock *dp = dccp_sk(sk);
272         int err = 0;
273         const int old_state = sk->sk_state;
274
275         if (old_state != DCCP_CLOSED)
276                 dccp_set_state(sk, DCCP_CLOSED);
277
278         /*
279          * This corresponds to the ABORT function of RFC793, sec. 3.8
280          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
281          */
282         if (old_state == DCCP_LISTEN) {
283                 inet_csk_listen_stop(sk);
284         } else if (dccp_need_reset(old_state)) {
285                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
286                 sk->sk_err = ECONNRESET;
287         } else if (old_state == DCCP_REQUESTING)
288                 sk->sk_err = ECONNRESET;
289
290         dccp_clear_xmit_timers(sk);
291         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
292         dp->dccps_hc_rx_ccid = NULL;
293
294         __skb_queue_purge(&sk->sk_receive_queue);
295         __skb_queue_purge(&sk->sk_write_queue);
296         if (sk->sk_send_head != NULL) {
297                 __kfree_skb(sk->sk_send_head);
298                 sk->sk_send_head = NULL;
299         }
300
301         inet->inet_dport = 0;
302
303         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
304                 inet_reset_saddr(sk);
305
306         sk->sk_shutdown = 0;
307         sock_reset_flag(sk, SOCK_DONE);
308
309         icsk->icsk_backoff = 0;
310         inet_csk_delack_init(sk);
311         __sk_dst_reset(sk);
312
313         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
314
315         sk->sk_error_report(sk);
316         return err;
317 }
318
319 EXPORT_SYMBOL_GPL(dccp_disconnect);
320
321 /*
322  *      Wait for a DCCP event.
323  *
324  *      Note that we don't need to lock the socket, as the upper poll layers
325  *      take care of normal races (between the test and the event) and we don't
326  *      go look at any of the socket buffers directly.
327  */
328 __poll_t dccp_poll(struct file *file, struct socket *sock,
329                        poll_table *wait)
330 {
331         __poll_t mask;
332         struct sock *sk = sock->sk;
333
334         sock_poll_wait(file, sock, wait);
335         if (sk->sk_state == DCCP_LISTEN)
336                 return inet_csk_listen_poll(sk);
337
338         /* Socket is not locked. We are protected from async events
339            by poll logic and correct handling of state changes
340            made by another threads is impossible in any case.
341          */
342
343         mask = 0;
344         if (sk->sk_err)
345                 mask = EPOLLERR;
346
347         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
348                 mask |= EPOLLHUP;
349         if (sk->sk_shutdown & RCV_SHUTDOWN)
350                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
351
352         /* Connected? */
353         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
354                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
355                         mask |= EPOLLIN | EPOLLRDNORM;
356
357                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
358                         if (sk_stream_is_writeable(sk)) {
359                                 mask |= EPOLLOUT | EPOLLWRNORM;
360                         } else {  /* send SIGIO later */
361                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
362                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
363
364                                 /* Race breaker. If space is freed after
365                                  * wspace test but before the flags are set,
366                                  * IO signal will be lost.
367                                  */
368                                 if (sk_stream_is_writeable(sk))
369                                         mask |= EPOLLOUT | EPOLLWRNORM;
370                         }
371                 }
372         }
373         return mask;
374 }
375
376 EXPORT_SYMBOL_GPL(dccp_poll);
377
378 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
379 {
380         int rc = -ENOTCONN;
381
382         lock_sock(sk);
383
384         if (sk->sk_state == DCCP_LISTEN)
385                 goto out;
386
387         switch (cmd) {
388         case SIOCINQ: {
389                 struct sk_buff *skb;
390                 unsigned long amount = 0;
391
392                 skb = skb_peek(&sk->sk_receive_queue);
393                 if (skb != NULL) {
394                         /*
395                          * We will only return the amount of this packet since
396                          * that is all that will be read.
397                          */
398                         amount = skb->len;
399                 }
400                 rc = put_user(amount, (int __user *)arg);
401         }
402                 break;
403         default:
404                 rc = -ENOIOCTLCMD;
405                 break;
406         }
407 out:
408         release_sock(sk);
409         return rc;
410 }
411
412 EXPORT_SYMBOL_GPL(dccp_ioctl);
413
414 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
415                                    char __user *optval, unsigned int optlen)
416 {
417         struct dccp_sock *dp = dccp_sk(sk);
418         struct dccp_service_list *sl = NULL;
419
420         if (service == DCCP_SERVICE_INVALID_VALUE ||
421             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
422                 return -EINVAL;
423
424         if (optlen > sizeof(service)) {
425                 sl = kmalloc(optlen, GFP_KERNEL);
426                 if (sl == NULL)
427                         return -ENOMEM;
428
429                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
430                 if (copy_from_user(sl->dccpsl_list,
431                                    optval + sizeof(service),
432                                    optlen - sizeof(service)) ||
433                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
434                         kfree(sl);
435                         return -EFAULT;
436                 }
437         }
438
439         lock_sock(sk);
440         dp->dccps_service = service;
441
442         kfree(dp->dccps_service_list);
443
444         dp->dccps_service_list = sl;
445         release_sock(sk);
446         return 0;
447 }
448
449 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
450 {
451         u8 *list, len;
452         int i, rc;
453
454         if (cscov < 0 || cscov > 15)
455                 return -EINVAL;
456         /*
457          * Populate a list of permissible values, in the range cscov...15. This
458          * is necessary since feature negotiation of single values only works if
459          * both sides incidentally choose the same value. Since the list starts
460          * lowest-value first, negotiation will pick the smallest shared value.
461          */
462         if (cscov == 0)
463                 return 0;
464         len = 16 - cscov;
465
466         list = kmalloc(len, GFP_KERNEL);
467         if (list == NULL)
468                 return -ENOBUFS;
469
470         for (i = 0; i < len; i++)
471                 list[i] = cscov++;
472
473         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
474
475         if (rc == 0) {
476                 if (rx)
477                         dccp_sk(sk)->dccps_pcrlen = cscov;
478                 else
479                         dccp_sk(sk)->dccps_pcslen = cscov;
480         }
481         kfree(list);
482         return rc;
483 }
484
485 static int dccp_setsockopt_ccid(struct sock *sk, int type,
486                                 char __user *optval, unsigned int optlen)
487 {
488         u8 *val;
489         int rc = 0;
490
491         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
492                 return -EINVAL;
493
494         val = memdup_user(optval, optlen);
495         if (IS_ERR(val))
496                 return PTR_ERR(val);
497
498         lock_sock(sk);
499         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
500                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
501
502         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
503                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
504         release_sock(sk);
505
506         kfree(val);
507         return rc;
508 }
509
510 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
511                 char __user *optval, unsigned int optlen)
512 {
513         struct dccp_sock *dp = dccp_sk(sk);
514         int val, err = 0;
515
516         switch (optname) {
517         case DCCP_SOCKOPT_PACKET_SIZE:
518                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
519                 return 0;
520         case DCCP_SOCKOPT_CHANGE_L:
521         case DCCP_SOCKOPT_CHANGE_R:
522                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
523                 return 0;
524         case DCCP_SOCKOPT_CCID:
525         case DCCP_SOCKOPT_RX_CCID:
526         case DCCP_SOCKOPT_TX_CCID:
527                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
528         }
529
530         if (optlen < (int)sizeof(int))
531                 return -EINVAL;
532
533         if (get_user(val, (int __user *)optval))
534                 return -EFAULT;
535
536         if (optname == DCCP_SOCKOPT_SERVICE)
537                 return dccp_setsockopt_service(sk, val, optval, optlen);
538
539         lock_sock(sk);
540         switch (optname) {
541         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
542                 if (dp->dccps_role != DCCP_ROLE_SERVER)
543                         err = -EOPNOTSUPP;
544                 else
545                         dp->dccps_server_timewait = (val != 0);
546                 break;
547         case DCCP_SOCKOPT_SEND_CSCOV:
548                 err = dccp_setsockopt_cscov(sk, val, false);
549                 break;
550         case DCCP_SOCKOPT_RECV_CSCOV:
551                 err = dccp_setsockopt_cscov(sk, val, true);
552                 break;
553         case DCCP_SOCKOPT_QPOLICY_ID:
554                 if (sk->sk_state != DCCP_CLOSED)
555                         err = -EISCONN;
556                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
557                         err = -EINVAL;
558                 else
559                         dp->dccps_qpolicy = val;
560                 break;
561         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
562                 if (val < 0)
563                         err = -EINVAL;
564                 else
565                         dp->dccps_tx_qlen = val;
566                 break;
567         default:
568                 err = -ENOPROTOOPT;
569                 break;
570         }
571         release_sock(sk);
572
573         return err;
574 }
575
576 int dccp_setsockopt(struct sock *sk, int level, int optname,
577                     char __user *optval, unsigned int optlen)
578 {
579         if (level != SOL_DCCP)
580                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
581                                                              optname, optval,
582                                                              optlen);
583         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
584 }
585
586 EXPORT_SYMBOL_GPL(dccp_setsockopt);
587
588 #ifdef CONFIG_COMPAT
589 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
590                            char __user *optval, unsigned int optlen)
591 {
592         if (level != SOL_DCCP)
593                 return inet_csk_compat_setsockopt(sk, level, optname,
594                                                   optval, optlen);
595         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
596 }
597
598 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
599 #endif
600
601 static int dccp_getsockopt_service(struct sock *sk, int len,
602                                    __be32 __user *optval,
603                                    int __user *optlen)
604 {
605         const struct dccp_sock *dp = dccp_sk(sk);
606         const struct dccp_service_list *sl;
607         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
608
609         lock_sock(sk);
610         if ((sl = dp->dccps_service_list) != NULL) {
611                 slen = sl->dccpsl_nr * sizeof(u32);
612                 total_len += slen;
613         }
614
615         err = -EINVAL;
616         if (total_len > len)
617                 goto out;
618
619         err = 0;
620         if (put_user(total_len, optlen) ||
621             put_user(dp->dccps_service, optval) ||
622             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
623                 err = -EFAULT;
624 out:
625         release_sock(sk);
626         return err;
627 }
628
629 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
630                     char __user *optval, int __user *optlen)
631 {
632         struct dccp_sock *dp;
633         int val, len;
634
635         if (get_user(len, optlen))
636                 return -EFAULT;
637
638         if (len < (int)sizeof(int))
639                 return -EINVAL;
640
641         dp = dccp_sk(sk);
642
643         switch (optname) {
644         case DCCP_SOCKOPT_PACKET_SIZE:
645                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
646                 return 0;
647         case DCCP_SOCKOPT_SERVICE:
648                 return dccp_getsockopt_service(sk, len,
649                                                (__be32 __user *)optval, optlen);
650         case DCCP_SOCKOPT_GET_CUR_MPS:
651                 val = dp->dccps_mss_cache;
652                 break;
653         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
654                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
655         case DCCP_SOCKOPT_TX_CCID:
656                 val = ccid_get_current_tx_ccid(dp);
657                 if (val < 0)
658                         return -ENOPROTOOPT;
659                 break;
660         case DCCP_SOCKOPT_RX_CCID:
661                 val = ccid_get_current_rx_ccid(dp);
662                 if (val < 0)
663                         return -ENOPROTOOPT;
664                 break;
665         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
666                 val = dp->dccps_server_timewait;
667                 break;
668         case DCCP_SOCKOPT_SEND_CSCOV:
669                 val = dp->dccps_pcslen;
670                 break;
671         case DCCP_SOCKOPT_RECV_CSCOV:
672                 val = dp->dccps_pcrlen;
673                 break;
674         case DCCP_SOCKOPT_QPOLICY_ID:
675                 val = dp->dccps_qpolicy;
676                 break;
677         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
678                 val = dp->dccps_tx_qlen;
679                 break;
680         case 128 ... 191:
681                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
682                                              len, (u32 __user *)optval, optlen);
683         case 192 ... 255:
684                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
685                                              len, (u32 __user *)optval, optlen);
686         default:
687                 return -ENOPROTOOPT;
688         }
689
690         len = sizeof(val);
691         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
692                 return -EFAULT;
693
694         return 0;
695 }
696
697 int dccp_getsockopt(struct sock *sk, int level, int optname,
698                     char __user *optval, int __user *optlen)
699 {
700         if (level != SOL_DCCP)
701                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
702                                                              optname, optval,
703                                                              optlen);
704         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
705 }
706
707 EXPORT_SYMBOL_GPL(dccp_getsockopt);
708
709 #ifdef CONFIG_COMPAT
710 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
711                            char __user *optval, int __user *optlen)
712 {
713         if (level != SOL_DCCP)
714                 return inet_csk_compat_getsockopt(sk, level, optname,
715                                                   optval, optlen);
716         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
717 }
718
719 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
720 #endif
721
722 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
723 {
724         struct cmsghdr *cmsg;
725
726         /*
727          * Assign an (opaque) qpolicy priority value to skb->priority.
728          *
729          * We are overloading this skb field for use with the qpolicy subystem.
730          * The skb->priority is normally used for the SO_PRIORITY option, which
731          * is initialised from sk_priority. Since the assignment of sk_priority
732          * to skb->priority happens later (on layer 3), we overload this field
733          * for use with queueing priorities as long as the skb is on layer 4.
734          * The default priority value (if nothing is set) is 0.
735          */
736         skb->priority = 0;
737
738         for_each_cmsghdr(cmsg, msg) {
739                 if (!CMSG_OK(msg, cmsg))
740                         return -EINVAL;
741
742                 if (cmsg->cmsg_level != SOL_DCCP)
743                         continue;
744
745                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
746                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
747                         return -EINVAL;
748
749                 switch (cmsg->cmsg_type) {
750                 case DCCP_SCM_PRIORITY:
751                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
752                                 return -EINVAL;
753                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
754                         break;
755                 default:
756                         return -EINVAL;
757                 }
758         }
759         return 0;
760 }
761
762 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
763 {
764         const struct dccp_sock *dp = dccp_sk(sk);
765         const int flags = msg->msg_flags;
766         const int noblock = flags & MSG_DONTWAIT;
767         struct sk_buff *skb;
768         int rc, size;
769         long timeo;
770
771         trace_dccp_probe(sk, len);
772
773         if (len > dp->dccps_mss_cache)
774                 return -EMSGSIZE;
775
776         lock_sock(sk);
777
778         timeo = sock_sndtimeo(sk, noblock);
779
780         /*
781          * We have to use sk_stream_wait_connect here to set sk_write_pending,
782          * so that the trick in dccp_rcv_request_sent_state_process.
783          */
784         /* Wait for a connection to finish. */
785         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
786                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
787                         goto out_release;
788
789         size = sk->sk_prot->max_header + len;
790         release_sock(sk);
791         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
792         lock_sock(sk);
793         if (skb == NULL)
794                 goto out_release;
795
796         if (dccp_qpolicy_full(sk)) {
797                 rc = -EAGAIN;
798                 goto out_discard;
799         }
800
801         if (sk->sk_state == DCCP_CLOSED) {
802                 rc = -ENOTCONN;
803                 goto out_discard;
804         }
805
806         skb_reserve(skb, sk->sk_prot->max_header);
807         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
808         if (rc != 0)
809                 goto out_discard;
810
811         rc = dccp_msghdr_parse(msg, skb);
812         if (rc != 0)
813                 goto out_discard;
814
815         dccp_qpolicy_push(sk, skb);
816         /*
817          * The xmit_timer is set if the TX CCID is rate-based and will expire
818          * when congestion control permits to release further packets into the
819          * network. Window-based CCIDs do not use this timer.
820          */
821         if (!timer_pending(&dp->dccps_xmit_timer))
822                 dccp_write_xmit(sk);
823 out_release:
824         release_sock(sk);
825         return rc ? : len;
826 out_discard:
827         kfree_skb(skb);
828         goto out_release;
829 }
830
831 EXPORT_SYMBOL_GPL(dccp_sendmsg);
832
833 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
834                  int flags, int *addr_len)
835 {
836         const struct dccp_hdr *dh;
837         long timeo;
838
839         lock_sock(sk);
840
841         if (sk->sk_state == DCCP_LISTEN) {
842                 len = -ENOTCONN;
843                 goto out;
844         }
845
846         timeo = sock_rcvtimeo(sk, nonblock);
847
848         do {
849                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
850
851                 if (skb == NULL)
852                         goto verify_sock_status;
853
854                 dh = dccp_hdr(skb);
855
856                 switch (dh->dccph_type) {
857                 case DCCP_PKT_DATA:
858                 case DCCP_PKT_DATAACK:
859                         goto found_ok_skb;
860
861                 case DCCP_PKT_CLOSE:
862                 case DCCP_PKT_CLOSEREQ:
863                         if (!(flags & MSG_PEEK))
864                                 dccp_finish_passive_close(sk);
865                         /* fall through */
866                 case DCCP_PKT_RESET:
867                         dccp_pr_debug("found fin (%s) ok!\n",
868                                       dccp_packet_name(dh->dccph_type));
869                         len = 0;
870                         goto found_fin_ok;
871                 default:
872                         dccp_pr_debug("packet_type=%s\n",
873                                       dccp_packet_name(dh->dccph_type));
874                         sk_eat_skb(sk, skb);
875                 }
876 verify_sock_status:
877                 if (sock_flag(sk, SOCK_DONE)) {
878                         len = 0;
879                         break;
880                 }
881
882                 if (sk->sk_err) {
883                         len = sock_error(sk);
884                         break;
885                 }
886
887                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
888                         len = 0;
889                         break;
890                 }
891
892                 if (sk->sk_state == DCCP_CLOSED) {
893                         if (!sock_flag(sk, SOCK_DONE)) {
894                                 /* This occurs when user tries to read
895                                  * from never connected socket.
896                                  */
897                                 len = -ENOTCONN;
898                                 break;
899                         }
900                         len = 0;
901                         break;
902                 }
903
904                 if (!timeo) {
905                         len = -EAGAIN;
906                         break;
907                 }
908
909                 if (signal_pending(current)) {
910                         len = sock_intr_errno(timeo);
911                         break;
912                 }
913
914                 sk_wait_data(sk, &timeo, NULL);
915                 continue;
916         found_ok_skb:
917                 if (len > skb->len)
918                         len = skb->len;
919                 else if (len < skb->len)
920                         msg->msg_flags |= MSG_TRUNC;
921
922                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
923                         /* Exception. Bailout! */
924                         len = -EFAULT;
925                         break;
926                 }
927                 if (flags & MSG_TRUNC)
928                         len = skb->len;
929         found_fin_ok:
930                 if (!(flags & MSG_PEEK))
931                         sk_eat_skb(sk, skb);
932                 break;
933         } while (1);
934 out:
935         release_sock(sk);
936         return len;
937 }
938
939 EXPORT_SYMBOL_GPL(dccp_recvmsg);
940
941 int inet_dccp_listen(struct socket *sock, int backlog)
942 {
943         struct sock *sk = sock->sk;
944         unsigned char old_state;
945         int err;
946
947         lock_sock(sk);
948
949         err = -EINVAL;
950         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
951                 goto out;
952
953         old_state = sk->sk_state;
954         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
955                 goto out;
956
957         /* Really, if the socket is already in listen state
958          * we can only allow the backlog to be adjusted.
959          */
960         if (old_state != DCCP_LISTEN) {
961                 /*
962                  * FIXME: here it probably should be sk->sk_prot->listen_start
963                  * see tcp_listen_start
964                  */
965                 err = dccp_listen_start(sk, backlog);
966                 if (err)
967                         goto out;
968         }
969         sk->sk_max_ack_backlog = backlog;
970         err = 0;
971
972 out:
973         release_sock(sk);
974         return err;
975 }
976
977 EXPORT_SYMBOL_GPL(inet_dccp_listen);
978
979 static void dccp_terminate_connection(struct sock *sk)
980 {
981         u8 next_state = DCCP_CLOSED;
982
983         switch (sk->sk_state) {
984         case DCCP_PASSIVE_CLOSE:
985         case DCCP_PASSIVE_CLOSEREQ:
986                 dccp_finish_passive_close(sk);
987                 break;
988         case DCCP_PARTOPEN:
989                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
990                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
991                 /* fall through */
992         case DCCP_OPEN:
993                 dccp_send_close(sk, 1);
994
995                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
996                     !dccp_sk(sk)->dccps_server_timewait)
997                         next_state = DCCP_ACTIVE_CLOSEREQ;
998                 else
999                         next_state = DCCP_CLOSING;
1000                 /* fall through */
1001         default:
1002                 dccp_set_state(sk, next_state);
1003         }
1004 }
1005
1006 void dccp_close(struct sock *sk, long timeout)
1007 {
1008         struct dccp_sock *dp = dccp_sk(sk);
1009         struct sk_buff *skb;
1010         u32 data_was_unread = 0;
1011         int state;
1012
1013         lock_sock(sk);
1014
1015         sk->sk_shutdown = SHUTDOWN_MASK;
1016
1017         if (sk->sk_state == DCCP_LISTEN) {
1018                 dccp_set_state(sk, DCCP_CLOSED);
1019
1020                 /* Special case. */
1021                 inet_csk_listen_stop(sk);
1022
1023                 goto adjudge_to_death;
1024         }
1025
1026         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1027
1028         /*
1029          * We need to flush the recv. buffs.  We do this only on the
1030          * descriptor close, not protocol-sourced closes, because the
1031           *reader process may not have drained the data yet!
1032          */
1033         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1034                 data_was_unread += skb->len;
1035                 __kfree_skb(skb);
1036         }
1037
1038         /* If socket has been already reset kill it. */
1039         if (sk->sk_state == DCCP_CLOSED)
1040                 goto adjudge_to_death;
1041
1042         if (data_was_unread) {
1043                 /* Unread data was tossed, send an appropriate Reset Code */
1044                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1045                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1046                 dccp_set_state(sk, DCCP_CLOSED);
1047         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1048                 /* Check zero linger _after_ checking for unread data. */
1049                 sk->sk_prot->disconnect(sk, 0);
1050         } else if (sk->sk_state != DCCP_CLOSED) {
1051                 /*
1052                  * Normal connection termination. May need to wait if there are
1053                  * still packets in the TX queue that are delayed by the CCID.
1054                  */
1055                 dccp_flush_write_queue(sk, &timeout);
1056                 dccp_terminate_connection(sk);
1057         }
1058
1059         /*
1060          * Flush write queue. This may be necessary in several cases:
1061          * - we have been closed by the peer but still have application data;
1062          * - abortive termination (unread data or zero linger time),
1063          * - normal termination but queue could not be flushed within time limit
1064          */
1065         __skb_queue_purge(&sk->sk_write_queue);
1066
1067         sk_stream_wait_close(sk, timeout);
1068
1069 adjudge_to_death:
1070         state = sk->sk_state;
1071         sock_hold(sk);
1072         sock_orphan(sk);
1073
1074         /*
1075          * It is the last release_sock in its life. It will remove backlog.
1076          */
1077         release_sock(sk);
1078         /*
1079          * Now socket is owned by kernel and we acquire BH lock
1080          * to finish close. No need to check for user refs.
1081          */
1082         local_bh_disable();
1083         bh_lock_sock(sk);
1084         WARN_ON(sock_owned_by_user(sk));
1085
1086         percpu_counter_inc(sk->sk_prot->orphan_count);
1087
1088         /* Have we already been destroyed by a softirq or backlog? */
1089         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1090                 goto out;
1091
1092         if (sk->sk_state == DCCP_CLOSED)
1093                 inet_csk_destroy_sock(sk);
1094
1095         /* Otherwise, socket is reprieved until protocol close. */
1096
1097 out:
1098         bh_unlock_sock(sk);
1099         local_bh_enable();
1100         sock_put(sk);
1101 }
1102
1103 EXPORT_SYMBOL_GPL(dccp_close);
1104
1105 void dccp_shutdown(struct sock *sk, int how)
1106 {
1107         dccp_pr_debug("called shutdown(%x)\n", how);
1108 }
1109
1110 EXPORT_SYMBOL_GPL(dccp_shutdown);
1111
1112 static inline int __init dccp_mib_init(void)
1113 {
1114         dccp_statistics = alloc_percpu(struct dccp_mib);
1115         if (!dccp_statistics)
1116                 return -ENOMEM;
1117         return 0;
1118 }
1119
1120 static inline void dccp_mib_exit(void)
1121 {
1122         free_percpu(dccp_statistics);
1123 }
1124
1125 static int thash_entries;
1126 module_param(thash_entries, int, 0444);
1127 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1128
1129 #ifdef CONFIG_IP_DCCP_DEBUG
1130 bool dccp_debug;
1131 module_param(dccp_debug, bool, 0644);
1132 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1133
1134 EXPORT_SYMBOL_GPL(dccp_debug);
1135 #endif
1136
1137 static int __init dccp_init(void)
1138 {
1139         unsigned long goal;
1140         int ehash_order, bhash_order, i;
1141         int rc;
1142
1143         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1144                      FIELD_SIZEOF(struct sk_buff, cb));
1145         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1146         if (rc)
1147                 goto out_fail;
1148         rc = -ENOBUFS;
1149         inet_hashinfo_init(&dccp_hashinfo);
1150         dccp_hashinfo.bind_bucket_cachep =
1151                 kmem_cache_create("dccp_bind_bucket",
1152                                   sizeof(struct inet_bind_bucket), 0,
1153                                   SLAB_HWCACHE_ALIGN, NULL);
1154         if (!dccp_hashinfo.bind_bucket_cachep)
1155                 goto out_free_percpu;
1156
1157         /*
1158          * Size and allocate the main established and bind bucket
1159          * hash tables.
1160          *
1161          * The methodology is similar to that of the buffer cache.
1162          */
1163         if (totalram_pages >= (128 * 1024))
1164                 goal = totalram_pages >> (21 - PAGE_SHIFT);
1165         else
1166                 goal = totalram_pages >> (23 - PAGE_SHIFT);
1167
1168         if (thash_entries)
1169                 goal = (thash_entries *
1170                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1171         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1172                 ;
1173         do {
1174                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1175                                         sizeof(struct inet_ehash_bucket);
1176
1177                 while (hash_size & (hash_size - 1))
1178                         hash_size--;
1179                 dccp_hashinfo.ehash_mask = hash_size - 1;
1180                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1181                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1182         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1183
1184         if (!dccp_hashinfo.ehash) {
1185                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1186                 goto out_free_bind_bucket_cachep;
1187         }
1188
1189         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1190                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1191
1192         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1193                         goto out_free_dccp_ehash;
1194
1195         bhash_order = ehash_order;
1196
1197         do {
1198                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1199                                         sizeof(struct inet_bind_hashbucket);
1200                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1201                     bhash_order > 0)
1202                         continue;
1203                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1204                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1205         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1206
1207         if (!dccp_hashinfo.bhash) {
1208                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1209                 goto out_free_dccp_locks;
1210         }
1211
1212         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1213                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1214                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1215         }
1216
1217         rc = dccp_mib_init();
1218         if (rc)
1219                 goto out_free_dccp_bhash;
1220
1221         rc = dccp_ackvec_init();
1222         if (rc)
1223                 goto out_free_dccp_mib;
1224
1225         rc = dccp_sysctl_init();
1226         if (rc)
1227                 goto out_ackvec_exit;
1228
1229         rc = ccid_initialize_builtins();
1230         if (rc)
1231                 goto out_sysctl_exit;
1232
1233         dccp_timestamping_init();
1234
1235         return 0;
1236
1237 out_sysctl_exit:
1238         dccp_sysctl_exit();
1239 out_ackvec_exit:
1240         dccp_ackvec_exit();
1241 out_free_dccp_mib:
1242         dccp_mib_exit();
1243 out_free_dccp_bhash:
1244         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1245 out_free_dccp_locks:
1246         inet_ehash_locks_free(&dccp_hashinfo);
1247 out_free_dccp_ehash:
1248         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1249 out_free_bind_bucket_cachep:
1250         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1251 out_free_percpu:
1252         percpu_counter_destroy(&dccp_orphan_count);
1253 out_fail:
1254         dccp_hashinfo.bhash = NULL;
1255         dccp_hashinfo.ehash = NULL;
1256         dccp_hashinfo.bind_bucket_cachep = NULL;
1257         return rc;
1258 }
1259
1260 static void __exit dccp_fini(void)
1261 {
1262         ccid_cleanup_builtins();
1263         dccp_mib_exit();
1264         free_pages((unsigned long)dccp_hashinfo.bhash,
1265                    get_order(dccp_hashinfo.bhash_size *
1266                              sizeof(struct inet_bind_hashbucket)));
1267         free_pages((unsigned long)dccp_hashinfo.ehash,
1268                    get_order((dccp_hashinfo.ehash_mask + 1) *
1269                              sizeof(struct inet_ehash_bucket)));
1270         inet_ehash_locks_free(&dccp_hashinfo);
1271         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1272         dccp_ackvec_exit();
1273         dccp_sysctl_exit();
1274         percpu_counter_destroy(&dccp_orphan_count);
1275 }
1276
1277 module_init(dccp_init);
1278 module_exit(dccp_fini);
1279
1280 MODULE_LICENSE("GPL");
1281 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1282 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");