GNU Linux-libre 4.14.290-gnu1
[releases.git] / net / ipv4 / sysctl_net_ipv4.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
4  *
5  * Begun April 1, 1996, Mike Shaver.
6  * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
7  */
8
9 #include <linux/mm.h>
10 #include <linux/module.h>
11 #include <linux/sysctl.h>
12 #include <linux/igmp.h>
13 #include <linux/inetdevice.h>
14 #include <linux/seqlock.h>
15 #include <linux/init.h>
16 #include <linux/slab.h>
17 #include <linux/nsproxy.h>
18 #include <linux/swap.h>
19 #include <net/snmp.h>
20 #include <net/icmp.h>
21 #include <net/ip.h>
22 #include <net/route.h>
23 #include <net/tcp.h>
24 #include <net/udp.h>
25 #include <net/cipso_ipv4.h>
26 #include <net/inet_frag.h>
27 #include <net/ping.h>
28 #include <net/protocol.h>
29
30 static int zero;
31 static int one = 1;
32 static int four = 4;
33 static int thousand = 1000;
34 static int gso_max_segs = GSO_MAX_SEGS;
35 static int tcp_retr1_max = 255;
36 static int ip_local_port_range_min[] = { 1, 1 };
37 static int ip_local_port_range_max[] = { 65535, 65535 };
38 static int tcp_adv_win_scale_min = -31;
39 static int tcp_adv_win_scale_max = 31;
40 static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
41 static int tcp_min_snd_mss_max = 65535;
42 static int ip_privileged_port_min;
43 static int ip_privileged_port_max = 65535;
44 static int ip_ttl_min = 1;
45 static int ip_ttl_max = 255;
46 static int tcp_syn_retries_min = 1;
47 static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
48 static int ip_ping_group_range_min[] = { 0, 0 };
49 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
50 static int one_day_secs = 24 * 3600;
51
52 /* obsolete */
53 static int sysctl_tcp_low_latency __read_mostly;
54
55 /* Update system visible IP port range */
56 static void set_local_port_range(struct net *net, int range[2])
57 {
58         bool same_parity = !((range[0] ^ range[1]) & 1);
59
60         write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
61         if (same_parity && !net->ipv4.ip_local_ports.warned) {
62                 net->ipv4.ip_local_ports.warned = true;
63                 pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
64         }
65         net->ipv4.ip_local_ports.range[0] = range[0];
66         net->ipv4.ip_local_ports.range[1] = range[1];
67         write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
68 }
69
70 /* Validate changes from /proc interface. */
71 static int ipv4_local_port_range(struct ctl_table *table, int write,
72                                  void __user *buffer,
73                                  size_t *lenp, loff_t *ppos)
74 {
75         struct net *net =
76                 container_of(table->data, struct net, ipv4.ip_local_ports.range);
77         int ret;
78         int range[2];
79         struct ctl_table tmp = {
80                 .data = &range,
81                 .maxlen = sizeof(range),
82                 .mode = table->mode,
83                 .extra1 = &ip_local_port_range_min,
84                 .extra2 = &ip_local_port_range_max,
85         };
86
87         inet_get_local_port_range(net, &range[0], &range[1]);
88
89         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
90
91         if (write && ret == 0) {
92                 /* Ensure that the upper limit is not smaller than the lower,
93                  * and that the lower does not encroach upon the privileged
94                  * port limit.
95                  */
96                 if ((range[1] < range[0]) ||
97                     (range[0] < net->ipv4.sysctl_ip_prot_sock))
98                         ret = -EINVAL;
99                 else
100                         set_local_port_range(net, range);
101         }
102
103         return ret;
104 }
105
106 /* Validate changes from /proc interface. */
107 static int ipv4_privileged_ports(struct ctl_table *table, int write,
108                                 void __user *buffer, size_t *lenp, loff_t *ppos)
109 {
110         struct net *net = container_of(table->data, struct net,
111             ipv4.sysctl_ip_prot_sock);
112         int ret;
113         int pports;
114         int range[2];
115         struct ctl_table tmp = {
116                 .data = &pports,
117                 .maxlen = sizeof(pports),
118                 .mode = table->mode,
119                 .extra1 = &ip_privileged_port_min,
120                 .extra2 = &ip_privileged_port_max,
121         };
122
123         pports = net->ipv4.sysctl_ip_prot_sock;
124
125         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
126
127         if (write && ret == 0) {
128                 inet_get_local_port_range(net, &range[0], &range[1]);
129                 /* Ensure that the local port range doesn't overlap with the
130                  * privileged port range.
131                  */
132                 if (range[0] < pports)
133                         ret = -EINVAL;
134                 else
135                         net->ipv4.sysctl_ip_prot_sock = pports;
136         }
137
138         return ret;
139 }
140
141 static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
142 {
143         kgid_t *data = table->data;
144         struct net *net =
145                 container_of(table->data, struct net, ipv4.ping_group_range.range);
146         unsigned int seq;
147         do {
148                 seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
149
150                 *low = data[0];
151                 *high = data[1];
152         } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
153 }
154
155 /* Update system visible IP port range */
156 static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
157 {
158         kgid_t *data = table->data;
159         struct net *net =
160                 container_of(table->data, struct net, ipv4.ping_group_range.range);
161         write_seqlock(&net->ipv4.ping_group_range.lock);
162         data[0] = low;
163         data[1] = high;
164         write_sequnlock(&net->ipv4.ping_group_range.lock);
165 }
166
167 /* Validate changes from /proc interface. */
168 static int ipv4_ping_group_range(struct ctl_table *table, int write,
169                                  void __user *buffer,
170                                  size_t *lenp, loff_t *ppos)
171 {
172         struct user_namespace *user_ns = current_user_ns();
173         int ret;
174         gid_t urange[2];
175         kgid_t low, high;
176         struct ctl_table tmp = {
177                 .data = &urange,
178                 .maxlen = sizeof(urange),
179                 .mode = table->mode,
180                 .extra1 = &ip_ping_group_range_min,
181                 .extra2 = &ip_ping_group_range_max,
182         };
183
184         inet_get_ping_group_range_table(table, &low, &high);
185         urange[0] = from_kgid_munged(user_ns, low);
186         urange[1] = from_kgid_munged(user_ns, high);
187         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
188
189         if (write && ret == 0) {
190                 low = make_kgid(user_ns, urange[0]);
191                 high = make_kgid(user_ns, urange[1]);
192                 if (!gid_valid(low) || !gid_valid(high))
193                         return -EINVAL;
194                 if (urange[1] < urange[0] || gid_lt(high, low)) {
195                         low = make_kgid(&init_user_ns, 1);
196                         high = make_kgid(&init_user_ns, 0);
197                 }
198                 set_ping_group_range(table, low, high);
199         }
200
201         return ret;
202 }
203
204 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
205                                        void __user *buffer, size_t *lenp, loff_t *ppos)
206 {
207         char val[TCP_CA_NAME_MAX];
208         struct ctl_table tbl = {
209                 .data = val,
210                 .maxlen = TCP_CA_NAME_MAX,
211         };
212         int ret;
213
214         tcp_get_default_congestion_control(val);
215
216         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
217         if (write && ret == 0)
218                 ret = tcp_set_default_congestion_control(val);
219         return ret;
220 }
221
222 static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
223                                                  int write,
224                                                  void __user *buffer, size_t *lenp,
225                                                  loff_t *ppos)
226 {
227         struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
228         int ret;
229
230         tbl.data = kmalloc(tbl.maxlen, GFP_USER);
231         if (!tbl.data)
232                 return -ENOMEM;
233         tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
234         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
235         kfree(tbl.data);
236         return ret;
237 }
238
239 static int proc_allowed_congestion_control(struct ctl_table *ctl,
240                                            int write,
241                                            void __user *buffer, size_t *lenp,
242                                            loff_t *ppos)
243 {
244         struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
245         int ret;
246
247         tbl.data = kmalloc(tbl.maxlen, GFP_USER);
248         if (!tbl.data)
249                 return -ENOMEM;
250
251         tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
252         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
253         if (write && ret == 0)
254                 ret = tcp_set_allowed_congestion_control(tbl.data);
255         kfree(tbl.data);
256         return ret;
257 }
258
259 static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
260                                  void __user *buffer, size_t *lenp,
261                                  loff_t *ppos)
262 {
263         struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
264         struct tcp_fastopen_context *ctxt;
265         u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
266         __le32 key[4];
267         int ret, i;
268
269         tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
270         if (!tbl.data)
271                 return -ENOMEM;
272
273         rcu_read_lock();
274         ctxt = rcu_dereference(tcp_fastopen_ctx);
275         if (ctxt)
276                 memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
277         else
278                 memset(key, 0, sizeof(key));
279         rcu_read_unlock();
280
281         for (i = 0; i < ARRAY_SIZE(key); i++)
282                 user_key[i] = le32_to_cpu(key[i]);
283
284         snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
285                 user_key[0], user_key[1], user_key[2], user_key[3]);
286         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
287
288         if (write && ret == 0) {
289                 if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
290                            user_key + 2, user_key + 3) != 4) {
291                         ret = -EINVAL;
292                         goto bad_key;
293                 }
294                 /* Generate a dummy secret but don't publish it. This
295                  * is needed so we don't regenerate a new key on the
296                  * first invocation of tcp_fastopen_cookie_gen
297                  */
298                 tcp_fastopen_init_key_once(false);
299
300                 for (i = 0; i < ARRAY_SIZE(user_key); i++)
301                         key[i] = cpu_to_le32(user_key[i]);
302
303                 tcp_fastopen_reset_cipher(key, TCP_FASTOPEN_KEY_LENGTH);
304         }
305
306 bad_key:
307         pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
308                  user_key[0], user_key[1], user_key[2], user_key[3],
309                (char *)tbl.data, ret);
310         kfree(tbl.data);
311         return ret;
312 }
313
314 static void proc_configure_early_demux(int enabled, int protocol)
315 {
316         struct net_protocol *ipprot;
317 #if IS_ENABLED(CONFIG_IPV6)
318         struct inet6_protocol *ip6prot;
319 #endif
320
321         rcu_read_lock();
322
323         ipprot = rcu_dereference(inet_protos[protocol]);
324         if (ipprot)
325                 ipprot->early_demux = enabled ? ipprot->early_demux_handler :
326                                                 NULL;
327
328 #if IS_ENABLED(CONFIG_IPV6)
329         ip6prot = rcu_dereference(inet6_protos[protocol]);
330         if (ip6prot)
331                 ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
332                                                  NULL;
333 #endif
334         rcu_read_unlock();
335 }
336
337 static int proc_tcp_early_demux(struct ctl_table *table, int write,
338                                 void __user *buffer, size_t *lenp, loff_t *ppos)
339 {
340         int ret = 0;
341
342         ret = proc_dointvec(table, write, buffer, lenp, ppos);
343
344         if (write && !ret) {
345                 int enabled = init_net.ipv4.sysctl_tcp_early_demux;
346
347                 proc_configure_early_demux(enabled, IPPROTO_TCP);
348         }
349
350         return ret;
351 }
352
353 static int proc_udp_early_demux(struct ctl_table *table, int write,
354                                 void __user *buffer, size_t *lenp, loff_t *ppos)
355 {
356         int ret = 0;
357
358         ret = proc_dointvec(table, write, buffer, lenp, ppos);
359
360         if (write && !ret) {
361                 int enabled = init_net.ipv4.sysctl_udp_early_demux;
362
363                 proc_configure_early_demux(enabled, IPPROTO_UDP);
364         }
365
366         return ret;
367 }
368
369 static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
370                                              int write,
371                                              void __user *buffer,
372                                              size_t *lenp, loff_t *ppos)
373 {
374         int ret;
375
376         ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
377         if (write && ret == 0)
378                 tcp_fastopen_active_timeout_reset();
379
380         return ret;
381 }
382
383 static int proc_tcp_available_ulp(struct ctl_table *ctl,
384                                   int write,
385                                   void __user *buffer, size_t *lenp,
386                                   loff_t *ppos)
387 {
388         struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, };
389         int ret;
390
391         tbl.data = kmalloc(tbl.maxlen, GFP_USER);
392         if (!tbl.data)
393                 return -ENOMEM;
394         tcp_get_available_ulp(tbl.data, TCP_ULP_BUF_MAX);
395         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
396         kfree(tbl.data);
397
398         return ret;
399 }
400
401 static struct ctl_table ipv4_table[] = {
402         {
403                 .procname       = "tcp_retrans_collapse",
404                 .data           = &sysctl_tcp_retrans_collapse,
405                 .maxlen         = sizeof(int),
406                 .mode           = 0644,
407                 .proc_handler   = proc_dointvec
408         },
409         {
410                 .procname       = "tcp_max_orphans",
411                 .data           = &sysctl_tcp_max_orphans,
412                 .maxlen         = sizeof(int),
413                 .mode           = 0644,
414                 .proc_handler   = proc_dointvec
415         },
416         {
417                 .procname       = "tcp_fastopen",
418                 .data           = &sysctl_tcp_fastopen,
419                 .maxlen         = sizeof(int),
420                 .mode           = 0644,
421                 .proc_handler   = proc_dointvec,
422         },
423         {
424                 .procname       = "tcp_fastopen_key",
425                 .mode           = 0600,
426                 .maxlen         = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
427                 .proc_handler   = proc_tcp_fastopen_key,
428         },
429         {
430                 .procname       = "tcp_fastopen_blackhole_timeout_sec",
431                 .data           = &sysctl_tcp_fastopen_blackhole_timeout,
432                 .maxlen         = sizeof(int),
433                 .mode           = 0644,
434                 .proc_handler   = proc_tfo_blackhole_detect_timeout,
435                 .extra1         = &zero,
436         },
437         {
438                 .procname       = "tcp_abort_on_overflow",
439                 .data           = &sysctl_tcp_abort_on_overflow,
440                 .maxlen         = sizeof(int),
441                 .mode           = 0644,
442                 .proc_handler   = proc_dointvec
443         },
444         {
445                 .procname       = "tcp_stdurg",
446                 .data           = &sysctl_tcp_stdurg,
447                 .maxlen         = sizeof(int),
448                 .mode           = 0644,
449                 .proc_handler   = proc_dointvec
450         },
451         {
452                 .procname       = "tcp_rfc1337",
453                 .data           = &sysctl_tcp_rfc1337,
454                 .maxlen         = sizeof(int),
455                 .mode           = 0644,
456                 .proc_handler   = proc_dointvec
457         },
458         {
459                 .procname       = "inet_peer_threshold",
460                 .data           = &inet_peer_threshold,
461                 .maxlen         = sizeof(int),
462                 .mode           = 0644,
463                 .proc_handler   = proc_dointvec
464         },
465         {
466                 .procname       = "inet_peer_minttl",
467                 .data           = &inet_peer_minttl,
468                 .maxlen         = sizeof(int),
469                 .mode           = 0644,
470                 .proc_handler   = proc_dointvec_jiffies,
471         },
472         {
473                 .procname       = "inet_peer_maxttl",
474                 .data           = &inet_peer_maxttl,
475                 .maxlen         = sizeof(int),
476                 .mode           = 0644,
477                 .proc_handler   = proc_dointvec_jiffies,
478         },
479         {
480                 .procname       = "tcp_fack",
481                 .data           = &sysctl_tcp_fack,
482                 .maxlen         = sizeof(int),
483                 .mode           = 0644,
484                 .proc_handler   = proc_dointvec
485         },
486         {
487                 .procname       = "tcp_recovery",
488                 .data           = &sysctl_tcp_recovery,
489                 .maxlen         = sizeof(int),
490                 .mode           = 0644,
491                 .proc_handler   = proc_dointvec,
492         },
493         {
494                 .procname       = "tcp_max_reordering",
495                 .data           = &sysctl_tcp_max_reordering,
496                 .maxlen         = sizeof(int),
497                 .mode           = 0644,
498                 .proc_handler   = proc_dointvec
499         },
500         {
501                 .procname       = "tcp_dsack",
502                 .data           = &sysctl_tcp_dsack,
503                 .maxlen         = sizeof(int),
504                 .mode           = 0644,
505                 .proc_handler   = proc_dointvec
506         },
507         {
508                 .procname       = "tcp_mem",
509                 .maxlen         = sizeof(sysctl_tcp_mem),
510                 .data           = &sysctl_tcp_mem,
511                 .mode           = 0644,
512                 .proc_handler   = proc_doulongvec_minmax,
513         },
514         {
515                 .procname       = "tcp_wmem",
516                 .data           = &sysctl_tcp_wmem,
517                 .maxlen         = sizeof(sysctl_tcp_wmem),
518                 .mode           = 0644,
519                 .proc_handler   = proc_dointvec_minmax,
520                 .extra1         = &one,
521         },
522         {
523                 .procname       = "tcp_rmem",
524                 .data           = &sysctl_tcp_rmem,
525                 .maxlen         = sizeof(sysctl_tcp_rmem),
526                 .mode           = 0644,
527                 .proc_handler   = proc_dointvec_minmax,
528                 .extra1         = &one,
529         },
530         {
531                 .procname       = "tcp_app_win",
532                 .data           = &sysctl_tcp_app_win,
533                 .maxlen         = sizeof(int),
534                 .mode           = 0644,
535                 .proc_handler   = proc_dointvec
536         },
537         {
538                 .procname       = "tcp_adv_win_scale",
539                 .data           = &sysctl_tcp_adv_win_scale,
540                 .maxlen         = sizeof(int),
541                 .mode           = 0644,
542                 .proc_handler   = proc_dointvec_minmax,
543                 .extra1         = &tcp_adv_win_scale_min,
544                 .extra2         = &tcp_adv_win_scale_max,
545         },
546         {
547                 .procname       = "tcp_frto",
548                 .data           = &sysctl_tcp_frto,
549                 .maxlen         = sizeof(int),
550                 .mode           = 0644,
551                 .proc_handler   = proc_dointvec
552         },
553         {
554                 .procname       = "tcp_min_rtt_wlen",
555                 .data           = &sysctl_tcp_min_rtt_wlen,
556                 .maxlen         = sizeof(int),
557                 .mode           = 0644,
558                 .proc_handler   = proc_dointvec_minmax,
559                 .extra1         = &zero,
560                 .extra2         = &one_day_secs
561         },
562         {
563                 .procname       = "tcp_low_latency",
564                 .data           = &sysctl_tcp_low_latency,
565                 .maxlen         = sizeof(int),
566                 .mode           = 0644,
567                 .proc_handler   = proc_dointvec
568         },
569         {
570                 .procname       = "tcp_no_metrics_save",
571                 .data           = &sysctl_tcp_nometrics_save,
572                 .maxlen         = sizeof(int),
573                 .mode           = 0644,
574                 .proc_handler   = proc_dointvec,
575         },
576         {
577                 .procname       = "tcp_moderate_rcvbuf",
578                 .data           = &sysctl_tcp_moderate_rcvbuf,
579                 .maxlen         = sizeof(int),
580                 .mode           = 0644,
581                 .proc_handler   = proc_dointvec,
582         },
583         {
584                 .procname       = "tcp_tso_win_divisor",
585                 .data           = &sysctl_tcp_tso_win_divisor,
586                 .maxlen         = sizeof(int),
587                 .mode           = 0644,
588                 .proc_handler   = proc_dointvec,
589         },
590         {
591                 .procname       = "tcp_congestion_control",
592                 .mode           = 0644,
593                 .maxlen         = TCP_CA_NAME_MAX,
594                 .proc_handler   = proc_tcp_congestion_control,
595         },
596         {
597                 .procname       = "tcp_workaround_signed_windows",
598                 .data           = &sysctl_tcp_workaround_signed_windows,
599                 .maxlen         = sizeof(int),
600                 .mode           = 0644,
601                 .proc_handler   = proc_dointvec
602         },
603         {
604                 .procname       = "tcp_limit_output_bytes",
605                 .data           = &sysctl_tcp_limit_output_bytes,
606                 .maxlen         = sizeof(int),
607                 .mode           = 0644,
608                 .proc_handler   = proc_dointvec
609         },
610         {
611                 .procname       = "tcp_challenge_ack_limit",
612                 .data           = &sysctl_tcp_challenge_ack_limit,
613                 .maxlen         = sizeof(int),
614                 .mode           = 0644,
615                 .proc_handler   = proc_dointvec
616         },
617         {
618                 .procname       = "tcp_slow_start_after_idle",
619                 .data           = &sysctl_tcp_slow_start_after_idle,
620                 .maxlen         = sizeof(int),
621                 .mode           = 0644,
622                 .proc_handler   = proc_dointvec
623         },
624 #ifdef CONFIG_NETLABEL
625         {
626                 .procname       = "cipso_cache_enable",
627                 .data           = &cipso_v4_cache_enabled,
628                 .maxlen         = sizeof(int),
629                 .mode           = 0644,
630                 .proc_handler   = proc_dointvec,
631         },
632         {
633                 .procname       = "cipso_cache_bucket_size",
634                 .data           = &cipso_v4_cache_bucketsize,
635                 .maxlen         = sizeof(int),
636                 .mode           = 0644,
637                 .proc_handler   = proc_dointvec,
638         },
639         {
640                 .procname       = "cipso_rbm_optfmt",
641                 .data           = &cipso_v4_rbm_optfmt,
642                 .maxlen         = sizeof(int),
643                 .mode           = 0644,
644                 .proc_handler   = proc_dointvec,
645         },
646         {
647                 .procname       = "cipso_rbm_strictvalid",
648                 .data           = &cipso_v4_rbm_strictvalid,
649                 .maxlen         = sizeof(int),
650                 .mode           = 0644,
651                 .proc_handler   = proc_dointvec,
652         },
653 #endif /* CONFIG_NETLABEL */
654         {
655                 .procname       = "tcp_available_congestion_control",
656                 .maxlen         = TCP_CA_BUF_MAX,
657                 .mode           = 0444,
658                 .proc_handler   = proc_tcp_available_congestion_control,
659         },
660         {
661                 .procname       = "tcp_allowed_congestion_control",
662                 .maxlen         = TCP_CA_BUF_MAX,
663                 .mode           = 0644,
664                 .proc_handler   = proc_allowed_congestion_control,
665         },
666         {
667                 .procname       = "tcp_thin_linear_timeouts",
668                 .data           = &sysctl_tcp_thin_linear_timeouts,
669                 .maxlen         = sizeof(int),
670                 .mode           = 0644,
671                 .proc_handler   = proc_dointvec
672         },
673         {
674                 .procname       = "tcp_early_retrans",
675                 .data           = &sysctl_tcp_early_retrans,
676                 .maxlen         = sizeof(int),
677                 .mode           = 0644,
678                 .proc_handler   = proc_dointvec_minmax,
679                 .extra1         = &zero,
680                 .extra2         = &four,
681         },
682         {
683                 .procname       = "tcp_min_tso_segs",
684                 .data           = &sysctl_tcp_min_tso_segs,
685                 .maxlen         = sizeof(int),
686                 .mode           = 0644,
687                 .proc_handler   = proc_dointvec_minmax,
688                 .extra1         = &one,
689                 .extra2         = &gso_max_segs,
690         },
691         {
692                 .procname       = "tcp_pacing_ss_ratio",
693                 .data           = &sysctl_tcp_pacing_ss_ratio,
694                 .maxlen         = sizeof(int),
695                 .mode           = 0644,
696                 .proc_handler   = proc_dointvec_minmax,
697                 .extra1         = &zero,
698                 .extra2         = &thousand,
699         },
700         {
701                 .procname       = "tcp_pacing_ca_ratio",
702                 .data           = &sysctl_tcp_pacing_ca_ratio,
703                 .maxlen         = sizeof(int),
704                 .mode           = 0644,
705                 .proc_handler   = proc_dointvec_minmax,
706                 .extra1         = &zero,
707                 .extra2         = &thousand,
708         },
709         {
710                 .procname       = "tcp_autocorking",
711                 .data           = &sysctl_tcp_autocorking,
712                 .maxlen         = sizeof(int),
713                 .mode           = 0644,
714                 .proc_handler   = proc_dointvec_minmax,
715                 .extra1         = &zero,
716                 .extra2         = &one,
717         },
718         {
719                 .procname       = "tcp_invalid_ratelimit",
720                 .data           = &sysctl_tcp_invalid_ratelimit,
721                 .maxlen         = sizeof(int),
722                 .mode           = 0644,
723                 .proc_handler   = proc_dointvec_ms_jiffies,
724         },
725         {
726                 .procname       = "tcp_available_ulp",
727                 .maxlen         = TCP_ULP_BUF_MAX,
728                 .mode           = 0444,
729                 .proc_handler   = proc_tcp_available_ulp,
730         },
731         {
732                 .procname       = "icmp_msgs_per_sec",
733                 .data           = &sysctl_icmp_msgs_per_sec,
734                 .maxlen         = sizeof(int),
735                 .mode           = 0644,
736                 .proc_handler   = proc_dointvec_minmax,
737                 .extra1         = &zero,
738         },
739         {
740                 .procname       = "icmp_msgs_burst",
741                 .data           = &sysctl_icmp_msgs_burst,
742                 .maxlen         = sizeof(int),
743                 .mode           = 0644,
744                 .proc_handler   = proc_dointvec_minmax,
745                 .extra1         = &zero,
746         },
747         {
748                 .procname       = "udp_mem",
749                 .data           = &sysctl_udp_mem,
750                 .maxlen         = sizeof(sysctl_udp_mem),
751                 .mode           = 0644,
752                 .proc_handler   = proc_doulongvec_minmax,
753         },
754         {
755                 .procname       = "udp_rmem_min",
756                 .data           = &sysctl_udp_rmem_min,
757                 .maxlen         = sizeof(sysctl_udp_rmem_min),
758                 .mode           = 0644,
759                 .proc_handler   = proc_dointvec_minmax,
760                 .extra1         = &one
761         },
762         {
763                 .procname       = "udp_wmem_min",
764                 .data           = &sysctl_udp_wmem_min,
765                 .maxlen         = sizeof(sysctl_udp_wmem_min),
766                 .mode           = 0644,
767                 .proc_handler   = proc_dointvec_minmax,
768                 .extra1         = &one
769         },
770         { }
771 };
772
773 static struct ctl_table ipv4_net_table[] = {
774         {
775                 .procname       = "icmp_echo_ignore_all",
776                 .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
777                 .maxlen         = sizeof(int),
778                 .mode           = 0644,
779                 .proc_handler   = proc_dointvec
780         },
781         {
782                 .procname       = "icmp_echo_ignore_broadcasts",
783                 .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
784                 .maxlen         = sizeof(int),
785                 .mode           = 0644,
786                 .proc_handler   = proc_dointvec
787         },
788         {
789                 .procname       = "icmp_ignore_bogus_error_responses",
790                 .data           = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
791                 .maxlen         = sizeof(int),
792                 .mode           = 0644,
793                 .proc_handler   = proc_dointvec
794         },
795         {
796                 .procname       = "icmp_errors_use_inbound_ifaddr",
797                 .data           = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
798                 .maxlen         = sizeof(int),
799                 .mode           = 0644,
800                 .proc_handler   = proc_dointvec
801         },
802         {
803                 .procname       = "icmp_ratelimit",
804                 .data           = &init_net.ipv4.sysctl_icmp_ratelimit,
805                 .maxlen         = sizeof(int),
806                 .mode           = 0644,
807                 .proc_handler   = proc_dointvec_ms_jiffies,
808         },
809         {
810                 .procname       = "icmp_ratemask",
811                 .data           = &init_net.ipv4.sysctl_icmp_ratemask,
812                 .maxlen         = sizeof(int),
813                 .mode           = 0644,
814                 .proc_handler   = proc_dointvec
815         },
816         {
817                 .procname       = "ping_group_range",
818                 .data           = &init_net.ipv4.ping_group_range.range,
819                 .maxlen         = sizeof(gid_t)*2,
820                 .mode           = 0644,
821                 .proc_handler   = ipv4_ping_group_range,
822         },
823         {
824                 .procname       = "tcp_ecn",
825                 .data           = &init_net.ipv4.sysctl_tcp_ecn,
826                 .maxlen         = sizeof(int),
827                 .mode           = 0644,
828                 .proc_handler   = proc_dointvec
829         },
830         {
831                 .procname       = "tcp_ecn_fallback",
832                 .data           = &init_net.ipv4.sysctl_tcp_ecn_fallback,
833                 .maxlen         = sizeof(int),
834                 .mode           = 0644,
835                 .proc_handler   = proc_dointvec
836         },
837         {
838                 .procname       = "ip_dynaddr",
839                 .data           = &init_net.ipv4.sysctl_ip_dynaddr,
840                 .maxlen         = sizeof(int),
841                 .mode           = 0644,
842                 .proc_handler   = proc_dointvec
843         },
844         {
845                 .procname       = "ip_early_demux",
846                 .data           = &init_net.ipv4.sysctl_ip_early_demux,
847                 .maxlen         = sizeof(int),
848                 .mode           = 0644,
849                 .proc_handler   = proc_dointvec
850         },
851         {
852                 .procname       = "udp_early_demux",
853                 .data           = &init_net.ipv4.sysctl_udp_early_demux,
854                 .maxlen         = sizeof(int),
855                 .mode           = 0644,
856                 .proc_handler   = proc_udp_early_demux
857         },
858         {
859                 .procname       = "tcp_early_demux",
860                 .data           = &init_net.ipv4.sysctl_tcp_early_demux,
861                 .maxlen         = sizeof(int),
862                 .mode           = 0644,
863                 .proc_handler   = proc_tcp_early_demux
864         },
865         {
866                 .procname       = "ip_default_ttl",
867                 .data           = &init_net.ipv4.sysctl_ip_default_ttl,
868                 .maxlen         = sizeof(int),
869                 .mode           = 0644,
870                 .proc_handler   = proc_dointvec_minmax,
871                 .extra1         = &ip_ttl_min,
872                 .extra2         = &ip_ttl_max,
873         },
874         {
875                 .procname       = "ip_local_port_range",
876                 .maxlen         = sizeof(init_net.ipv4.ip_local_ports.range),
877                 .data           = &init_net.ipv4.ip_local_ports.range,
878                 .mode           = 0644,
879                 .proc_handler   = ipv4_local_port_range,
880         },
881         {
882                 .procname       = "ip_local_reserved_ports",
883                 .data           = &init_net.ipv4.sysctl_local_reserved_ports,
884                 .maxlen         = 65536,
885                 .mode           = 0644,
886                 .proc_handler   = proc_do_large_bitmap,
887         },
888         {
889                 .procname       = "ip_no_pmtu_disc",
890                 .data           = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
891                 .maxlen         = sizeof(int),
892                 .mode           = 0644,
893                 .proc_handler   = proc_dointvec
894         },
895         {
896                 .procname       = "ip_forward_use_pmtu",
897                 .data           = &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
898                 .maxlen         = sizeof(int),
899                 .mode           = 0644,
900                 .proc_handler   = proc_dointvec,
901         },
902         {
903                 .procname       = "ip_nonlocal_bind",
904                 .data           = &init_net.ipv4.sysctl_ip_nonlocal_bind,
905                 .maxlen         = sizeof(int),
906                 .mode           = 0644,
907                 .proc_handler   = proc_dointvec
908         },
909         {
910                 .procname       = "fwmark_reflect",
911                 .data           = &init_net.ipv4.sysctl_fwmark_reflect,
912                 .maxlen         = sizeof(int),
913                 .mode           = 0644,
914                 .proc_handler   = proc_dointvec,
915         },
916         {
917                 .procname       = "tcp_fwmark_accept",
918                 .data           = &init_net.ipv4.sysctl_tcp_fwmark_accept,
919                 .maxlen         = sizeof(int),
920                 .mode           = 0644,
921                 .proc_handler   = proc_dointvec,
922         },
923 #ifdef CONFIG_NET_L3_MASTER_DEV
924         {
925                 .procname       = "tcp_l3mdev_accept",
926                 .data           = &init_net.ipv4.sysctl_tcp_l3mdev_accept,
927                 .maxlen         = sizeof(int),
928                 .mode           = 0644,
929                 .proc_handler   = proc_dointvec_minmax,
930                 .extra1         = &zero,
931                 .extra2         = &one,
932         },
933 #endif
934         {
935                 .procname       = "tcp_mtu_probing",
936                 .data           = &init_net.ipv4.sysctl_tcp_mtu_probing,
937                 .maxlen         = sizeof(int),
938                 .mode           = 0644,
939                 .proc_handler   = proc_dointvec,
940         },
941         {
942                 .procname       = "tcp_base_mss",
943                 .data           = &init_net.ipv4.sysctl_tcp_base_mss,
944                 .maxlen         = sizeof(int),
945                 .mode           = 0644,
946                 .proc_handler   = proc_dointvec,
947         },
948         {
949                 .procname       = "tcp_min_snd_mss",
950                 .data           = &init_net.ipv4.sysctl_tcp_min_snd_mss,
951                 .maxlen         = sizeof(int),
952                 .mode           = 0644,
953                 .proc_handler   = proc_dointvec_minmax,
954                 .extra1         = &tcp_min_snd_mss_min,
955                 .extra2         = &tcp_min_snd_mss_max,
956         },
957         {
958                 .procname       = "tcp_probe_threshold",
959                 .data           = &init_net.ipv4.sysctl_tcp_probe_threshold,
960                 .maxlen         = sizeof(int),
961                 .mode           = 0644,
962                 .proc_handler   = proc_dointvec,
963         },
964         {
965                 .procname       = "tcp_probe_interval",
966                 .data           = &init_net.ipv4.sysctl_tcp_probe_interval,
967                 .maxlen         = sizeof(int),
968                 .mode           = 0644,
969                 .proc_handler   = proc_dointvec,
970         },
971         {
972                 .procname       = "igmp_link_local_mcast_reports",
973                 .data           = &init_net.ipv4.sysctl_igmp_llm_reports,
974                 .maxlen         = sizeof(int),
975                 .mode           = 0644,
976                 .proc_handler   = proc_dointvec
977         },
978         {
979                 .procname       = "igmp_max_memberships",
980                 .data           = &init_net.ipv4.sysctl_igmp_max_memberships,
981                 .maxlen         = sizeof(int),
982                 .mode           = 0644,
983                 .proc_handler   = proc_dointvec
984         },
985         {
986                 .procname       = "igmp_max_msf",
987                 .data           = &init_net.ipv4.sysctl_igmp_max_msf,
988                 .maxlen         = sizeof(int),
989                 .mode           = 0644,
990                 .proc_handler   = proc_dointvec
991         },
992 #ifdef CONFIG_IP_MULTICAST
993         {
994                 .procname       = "igmp_qrv",
995                 .data           = &init_net.ipv4.sysctl_igmp_qrv,
996                 .maxlen         = sizeof(int),
997                 .mode           = 0644,
998                 .proc_handler   = proc_dointvec_minmax,
999                 .extra1         = &one
1000         },
1001 #endif
1002         {
1003                 .procname       = "tcp_keepalive_time",
1004                 .data           = &init_net.ipv4.sysctl_tcp_keepalive_time,
1005                 .maxlen         = sizeof(int),
1006                 .mode           = 0644,
1007                 .proc_handler   = proc_dointvec_jiffies,
1008         },
1009         {
1010                 .procname       = "tcp_keepalive_probes",
1011                 .data           = &init_net.ipv4.sysctl_tcp_keepalive_probes,
1012                 .maxlen         = sizeof(int),
1013                 .mode           = 0644,
1014                 .proc_handler   = proc_dointvec
1015         },
1016         {
1017                 .procname       = "tcp_keepalive_intvl",
1018                 .data           = &init_net.ipv4.sysctl_tcp_keepalive_intvl,
1019                 .maxlen         = sizeof(int),
1020                 .mode           = 0644,
1021                 .proc_handler   = proc_dointvec_jiffies,
1022         },
1023         {
1024                 .procname       = "tcp_syn_retries",
1025                 .data           = &init_net.ipv4.sysctl_tcp_syn_retries,
1026                 .maxlen         = sizeof(int),
1027                 .mode           = 0644,
1028                 .proc_handler   = proc_dointvec_minmax,
1029                 .extra1         = &tcp_syn_retries_min,
1030                 .extra2         = &tcp_syn_retries_max
1031         },
1032         {
1033                 .procname       = "tcp_synack_retries",
1034                 .data           = &init_net.ipv4.sysctl_tcp_synack_retries,
1035                 .maxlen         = sizeof(int),
1036                 .mode           = 0644,
1037                 .proc_handler   = proc_dointvec
1038         },
1039 #ifdef CONFIG_SYN_COOKIES
1040         {
1041                 .procname       = "tcp_syncookies",
1042                 .data           = &init_net.ipv4.sysctl_tcp_syncookies,
1043                 .maxlen         = sizeof(int),
1044                 .mode           = 0644,
1045                 .proc_handler   = proc_dointvec
1046         },
1047 #endif
1048         {
1049                 .procname       = "tcp_reordering",
1050                 .data           = &init_net.ipv4.sysctl_tcp_reordering,
1051                 .maxlen         = sizeof(int),
1052                 .mode           = 0644,
1053                 .proc_handler   = proc_dointvec
1054         },
1055         {
1056                 .procname       = "tcp_retries1",
1057                 .data           = &init_net.ipv4.sysctl_tcp_retries1,
1058                 .maxlen         = sizeof(int),
1059                 .mode           = 0644,
1060                 .proc_handler   = proc_dointvec_minmax,
1061                 .extra2         = &tcp_retr1_max
1062         },
1063         {
1064                 .procname       = "tcp_retries2",
1065                 .data           = &init_net.ipv4.sysctl_tcp_retries2,
1066                 .maxlen         = sizeof(int),
1067                 .mode           = 0644,
1068                 .proc_handler   = proc_dointvec
1069         },
1070         {
1071                 .procname       = "tcp_orphan_retries",
1072                 .data           = &init_net.ipv4.sysctl_tcp_orphan_retries,
1073                 .maxlen         = sizeof(int),
1074                 .mode           = 0644,
1075                 .proc_handler   = proc_dointvec
1076         },
1077         {
1078                 .procname       = "tcp_fin_timeout",
1079                 .data           = &init_net.ipv4.sysctl_tcp_fin_timeout,
1080                 .maxlen         = sizeof(int),
1081                 .mode           = 0644,
1082                 .proc_handler   = proc_dointvec_jiffies,
1083         },
1084         {
1085                 .procname       = "tcp_notsent_lowat",
1086                 .data           = &init_net.ipv4.sysctl_tcp_notsent_lowat,
1087                 .maxlen         = sizeof(unsigned int),
1088                 .mode           = 0644,
1089                 .proc_handler   = proc_douintvec,
1090         },
1091         {
1092                 .procname       = "tcp_tw_reuse",
1093                 .data           = &init_net.ipv4.sysctl_tcp_tw_reuse,
1094                 .maxlen         = sizeof(int),
1095                 .mode           = 0644,
1096                 .proc_handler   = proc_dointvec
1097         },
1098         {
1099                 .procname       = "tcp_max_tw_buckets",
1100                 .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
1101                 .maxlen         = sizeof(int),
1102                 .mode           = 0644,
1103                 .proc_handler   = proc_dointvec
1104         },
1105         {
1106                 .procname       = "tcp_max_syn_backlog",
1107                 .data           = &init_net.ipv4.sysctl_max_syn_backlog,
1108                 .maxlen         = sizeof(int),
1109                 .mode           = 0644,
1110                 .proc_handler   = proc_dointvec
1111         },
1112 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1113         {
1114                 .procname       = "fib_multipath_use_neigh",
1115                 .data           = &init_net.ipv4.sysctl_fib_multipath_use_neigh,
1116                 .maxlen         = sizeof(int),
1117                 .mode           = 0644,
1118                 .proc_handler   = proc_dointvec_minmax,
1119                 .extra1         = &zero,
1120                 .extra2         = &one,
1121         },
1122         {
1123                 .procname       = "fib_multipath_hash_policy",
1124                 .data           = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
1125                 .maxlen         = sizeof(int),
1126                 .mode           = 0644,
1127                 .proc_handler   = proc_dointvec_minmax,
1128                 .extra1         = &zero,
1129                 .extra2         = &one,
1130         },
1131 #endif
1132         {
1133                 .procname       = "ip_unprivileged_port_start",
1134                 .maxlen         = sizeof(int),
1135                 .data           = &init_net.ipv4.sysctl_ip_prot_sock,
1136                 .mode           = 0644,
1137                 .proc_handler   = ipv4_privileged_ports,
1138         },
1139 #ifdef CONFIG_NET_L3_MASTER_DEV
1140         {
1141                 .procname       = "udp_l3mdev_accept",
1142                 .data           = &init_net.ipv4.sysctl_udp_l3mdev_accept,
1143                 .maxlen         = sizeof(int),
1144                 .mode           = 0644,
1145                 .proc_handler   = proc_dointvec_minmax,
1146                 .extra1         = &zero,
1147                 .extra2         = &one,
1148         },
1149 #endif
1150         {
1151                 .procname       = "tcp_sack",
1152                 .data           = &init_net.ipv4.sysctl_tcp_sack,
1153                 .maxlen         = sizeof(int),
1154                 .mode           = 0644,
1155                 .proc_handler   = proc_dointvec
1156         },
1157         {
1158                 .procname       = "tcp_window_scaling",
1159                 .data           = &init_net.ipv4.sysctl_tcp_window_scaling,
1160                 .maxlen         = sizeof(int),
1161                 .mode           = 0644,
1162                 .proc_handler   = proc_dointvec
1163         },
1164         {
1165                 .procname       = "tcp_timestamps",
1166                 .data           = &init_net.ipv4.sysctl_tcp_timestamps,
1167                 .maxlen         = sizeof(int),
1168                 .mode           = 0644,
1169                 .proc_handler   = proc_dointvec
1170         },
1171         { }
1172 };
1173
1174 static __net_init int ipv4_sysctl_init_net(struct net *net)
1175 {
1176         struct ctl_table *table;
1177
1178         table = ipv4_net_table;
1179         if (!net_eq(net, &init_net)) {
1180                 int i;
1181
1182                 table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
1183                 if (!table)
1184                         goto err_alloc;
1185
1186                 /* Update the variables to point into the current struct net */
1187                 for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++)
1188                         table[i].data += (void *)net - (void *)&init_net;
1189         }
1190
1191         net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
1192         if (!net->ipv4.ipv4_hdr)
1193                 goto err_reg;
1194
1195         net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
1196         if (!net->ipv4.sysctl_local_reserved_ports)
1197                 goto err_ports;
1198
1199         return 0;
1200
1201 err_ports:
1202         unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1203 err_reg:
1204         if (!net_eq(net, &init_net))
1205                 kfree(table);
1206 err_alloc:
1207         return -ENOMEM;
1208 }
1209
1210 static __net_exit void ipv4_sysctl_exit_net(struct net *net)
1211 {
1212         struct ctl_table *table;
1213
1214         kfree(net->ipv4.sysctl_local_reserved_ports);
1215         table = net->ipv4.ipv4_hdr->ctl_table_arg;
1216         unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1217         kfree(table);
1218 }
1219
1220 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
1221         .init = ipv4_sysctl_init_net,
1222         .exit = ipv4_sysctl_exit_net,
1223 };
1224
1225 static __init int sysctl_ipv4_init(void)
1226 {
1227         struct ctl_table_header *hdr;
1228
1229         hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
1230         if (!hdr)
1231                 return -ENOMEM;
1232
1233         if (register_pernet_subsys(&ipv4_sysctl_ops)) {
1234                 unregister_net_sysctl_table(hdr);
1235                 return -ENOMEM;
1236         }
1237
1238         return 0;
1239 }
1240
1241 __initcall(sysctl_ipv4_init);