2 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
4 * Begun April 1, 1996, Mike Shaver.
5 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
9 #include <linux/module.h>
10 #include <linux/sysctl.h>
11 #include <linux/igmp.h>
12 #include <linux/inetdevice.h>
13 #include <linux/seqlock.h>
14 #include <linux/init.h>
15 #include <linux/slab.h>
16 #include <linux/nsproxy.h>
17 #include <linux/swap.h>
21 #include <net/route.h>
24 #include <net/cipso_ipv4.h>
25 #include <net/inet_frag.h>
27 #include <net/tcp_memcontrol.h>
30 static int tcp_retr1_max = 255;
31 static int ip_local_port_range_min[] = { 1, 1 };
32 static int ip_local_port_range_max[] = { 65535, 65535 };
33 static int tcp_adv_win_scale_min = -31;
34 static int tcp_adv_win_scale_max = 31;
35 static int ip_ttl_min = 1;
36 static int ip_ttl_max = 255;
37 static int ip_ping_group_range_min[] = { 0, 0 };
38 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
40 /* Update system visible IP port range */
41 static void set_local_port_range(int range[2])
43 write_seqlock(&sysctl_local_ports.lock);
44 sysctl_local_ports.range[0] = range[0];
45 sysctl_local_ports.range[1] = range[1];
46 write_sequnlock(&sysctl_local_ports.lock);
49 /* Validate changes from /proc interface. */
50 static int ipv4_local_port_range(ctl_table *table, int write,
52 size_t *lenp, loff_t *ppos)
58 .maxlen = sizeof(range),
60 .extra1 = &ip_local_port_range_min,
61 .extra2 = &ip_local_port_range_max,
64 inet_get_local_port_range(range, range + 1);
65 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
67 if (write && ret == 0) {
68 if (range[1] < range[0])
71 set_local_port_range(range);
78 static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
80 gid_t *data = table->data;
83 seq = read_seqbegin(&sysctl_local_ports.lock);
87 } while (read_seqretry(&sysctl_local_ports.lock, seq));
90 /* Update system visible IP port range */
91 static void set_ping_group_range(struct ctl_table *table, gid_t range[2])
93 gid_t *data = table->data;
94 write_seqlock(&sysctl_local_ports.lock);
97 write_sequnlock(&sysctl_local_ports.lock);
100 /* Validate changes from /proc interface. */
101 static int ipv4_ping_group_range(ctl_table *table, int write,
103 size_t *lenp, loff_t *ppos)
109 .maxlen = sizeof(range),
111 .extra1 = &ip_ping_group_range_min,
112 .extra2 = &ip_ping_group_range_max,
115 inet_get_ping_group_range_table(table, range, range + 1);
116 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
118 if (write && ret == 0)
119 set_ping_group_range(table, range);
124 static int proc_tcp_congestion_control(ctl_table *ctl, int write,
125 void __user *buffer, size_t *lenp, loff_t *ppos)
127 char val[TCP_CA_NAME_MAX];
130 .maxlen = TCP_CA_NAME_MAX,
134 tcp_get_default_congestion_control(val);
136 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
137 if (write && ret == 0)
138 ret = tcp_set_default_congestion_control(val);
142 static int proc_tcp_available_congestion_control(ctl_table *ctl,
144 void __user *buffer, size_t *lenp,
147 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
150 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
153 tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
154 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
159 static int proc_allowed_congestion_control(ctl_table *ctl,
161 void __user *buffer, size_t *lenp,
164 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
167 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
171 tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
172 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
173 if (write && ret == 0)
174 ret = tcp_set_allowed_congestion_control(tbl.data);
179 static int ipv4_tcp_mem(ctl_table *ctl, int write,
180 void __user *buffer, size_t *lenp,
184 unsigned long vec[3];
185 struct net *net = current->nsproxy->net_ns;
186 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
187 struct mem_cgroup *memcg;
192 .maxlen = sizeof(vec),
197 ctl->data = &net->ipv4.sysctl_tcp_mem;
198 return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
201 ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
205 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
207 memcg = mem_cgroup_from_task(current);
209 tcp_prot_mem(memcg, vec[0], 0);
210 tcp_prot_mem(memcg, vec[1], 1);
211 tcp_prot_mem(memcg, vec[2], 2);
215 net->ipv4.sysctl_tcp_mem[0] = vec[0];
216 net->ipv4.sysctl_tcp_mem[1] = vec[1];
217 net->ipv4.sysctl_tcp_mem[2] = vec[2];
222 static struct ctl_table ipv4_table[] = {
224 .procname = "tcp_timestamps",
225 .data = &sysctl_tcp_timestamps,
226 .maxlen = sizeof(int),
228 .proc_handler = proc_dointvec
231 .procname = "tcp_window_scaling",
232 .data = &sysctl_tcp_window_scaling,
233 .maxlen = sizeof(int),
235 .proc_handler = proc_dointvec
238 .procname = "tcp_sack",
239 .data = &sysctl_tcp_sack,
240 .maxlen = sizeof(int),
242 .proc_handler = proc_dointvec
245 .procname = "tcp_retrans_collapse",
246 .data = &sysctl_tcp_retrans_collapse,
247 .maxlen = sizeof(int),
249 .proc_handler = proc_dointvec
252 .procname = "ip_default_ttl",
253 .data = &sysctl_ip_default_ttl,
254 .maxlen = sizeof(int),
256 .proc_handler = proc_dointvec_minmax,
257 .extra1 = &ip_ttl_min,
258 .extra2 = &ip_ttl_max,
261 .procname = "ip_no_pmtu_disc",
262 .data = &ipv4_config.no_pmtu_disc,
263 .maxlen = sizeof(int),
265 .proc_handler = proc_dointvec
268 .procname = "ip_nonlocal_bind",
269 .data = &sysctl_ip_nonlocal_bind,
270 .maxlen = sizeof(int),
272 .proc_handler = proc_dointvec
275 .procname = "tcp_syn_retries",
276 .data = &sysctl_tcp_syn_retries,
277 .maxlen = sizeof(int),
279 .proc_handler = proc_dointvec
282 .procname = "tcp_synack_retries",
283 .data = &sysctl_tcp_synack_retries,
284 .maxlen = sizeof(int),
286 .proc_handler = proc_dointvec
289 .procname = "tcp_max_orphans",
290 .data = &sysctl_tcp_max_orphans,
291 .maxlen = sizeof(int),
293 .proc_handler = proc_dointvec
296 .procname = "tcp_max_tw_buckets",
297 .data = &tcp_death_row.sysctl_max_tw_buckets,
298 .maxlen = sizeof(int),
300 .proc_handler = proc_dointvec
303 .procname = "ip_dynaddr",
304 .data = &sysctl_ip_dynaddr,
305 .maxlen = sizeof(int),
307 .proc_handler = proc_dointvec
310 .procname = "tcp_keepalive_time",
311 .data = &sysctl_tcp_keepalive_time,
312 .maxlen = sizeof(int),
314 .proc_handler = proc_dointvec_jiffies,
317 .procname = "tcp_keepalive_probes",
318 .data = &sysctl_tcp_keepalive_probes,
319 .maxlen = sizeof(int),
321 .proc_handler = proc_dointvec
324 .procname = "tcp_keepalive_intvl",
325 .data = &sysctl_tcp_keepalive_intvl,
326 .maxlen = sizeof(int),
328 .proc_handler = proc_dointvec_jiffies,
331 .procname = "tcp_retries1",
332 .data = &sysctl_tcp_retries1,
333 .maxlen = sizeof(int),
335 .proc_handler = proc_dointvec_minmax,
336 .extra2 = &tcp_retr1_max
339 .procname = "tcp_retries2",
340 .data = &sysctl_tcp_retries2,
341 .maxlen = sizeof(int),
343 .proc_handler = proc_dointvec
346 .procname = "tcp_fin_timeout",
347 .data = &sysctl_tcp_fin_timeout,
348 .maxlen = sizeof(int),
350 .proc_handler = proc_dointvec_jiffies,
352 #ifdef CONFIG_SYN_COOKIES
354 .procname = "tcp_syncookies",
355 .data = &sysctl_tcp_syncookies,
356 .maxlen = sizeof(int),
358 .proc_handler = proc_dointvec
362 .procname = "tcp_tw_recycle",
363 .data = &tcp_death_row.sysctl_tw_recycle,
364 .maxlen = sizeof(int),
366 .proc_handler = proc_dointvec
369 .procname = "tcp_abort_on_overflow",
370 .data = &sysctl_tcp_abort_on_overflow,
371 .maxlen = sizeof(int),
373 .proc_handler = proc_dointvec
376 .procname = "tcp_stdurg",
377 .data = &sysctl_tcp_stdurg,
378 .maxlen = sizeof(int),
380 .proc_handler = proc_dointvec
383 .procname = "tcp_rfc1337",
384 .data = &sysctl_tcp_rfc1337,
385 .maxlen = sizeof(int),
387 .proc_handler = proc_dointvec
390 .procname = "tcp_max_syn_backlog",
391 .data = &sysctl_max_syn_backlog,
392 .maxlen = sizeof(int),
394 .proc_handler = proc_dointvec
397 .procname = "ip_local_port_range",
398 .data = &sysctl_local_ports.range,
399 .maxlen = sizeof(sysctl_local_ports.range),
401 .proc_handler = ipv4_local_port_range,
404 .procname = "ip_local_reserved_ports",
405 .data = NULL, /* initialized in sysctl_ipv4_init */
408 .proc_handler = proc_do_large_bitmap,
411 .procname = "igmp_max_memberships",
412 .data = &sysctl_igmp_max_memberships,
413 .maxlen = sizeof(int),
415 .proc_handler = proc_dointvec
418 .procname = "igmp_max_msf",
419 .data = &sysctl_igmp_max_msf,
420 .maxlen = sizeof(int),
422 .proc_handler = proc_dointvec
425 .procname = "inet_peer_threshold",
426 .data = &inet_peer_threshold,
427 .maxlen = sizeof(int),
429 .proc_handler = proc_dointvec
432 .procname = "inet_peer_minttl",
433 .data = &inet_peer_minttl,
434 .maxlen = sizeof(int),
436 .proc_handler = proc_dointvec_jiffies,
439 .procname = "inet_peer_maxttl",
440 .data = &inet_peer_maxttl,
441 .maxlen = sizeof(int),
443 .proc_handler = proc_dointvec_jiffies,
446 .procname = "tcp_orphan_retries",
447 .data = &sysctl_tcp_orphan_retries,
448 .maxlen = sizeof(int),
450 .proc_handler = proc_dointvec
453 .procname = "tcp_fack",
454 .data = &sysctl_tcp_fack,
455 .maxlen = sizeof(int),
457 .proc_handler = proc_dointvec
460 .procname = "tcp_reordering",
461 .data = &sysctl_tcp_reordering,
462 .maxlen = sizeof(int),
464 .proc_handler = proc_dointvec
467 .procname = "tcp_ecn",
468 .data = &sysctl_tcp_ecn,
469 .maxlen = sizeof(int),
471 .proc_handler = proc_dointvec
474 .procname = "tcp_dsack",
475 .data = &sysctl_tcp_dsack,
476 .maxlen = sizeof(int),
478 .proc_handler = proc_dointvec
481 .procname = "tcp_wmem",
482 .data = &sysctl_tcp_wmem,
483 .maxlen = sizeof(sysctl_tcp_wmem),
485 .proc_handler = proc_dointvec
488 .procname = "tcp_rmem",
489 .data = &sysctl_tcp_rmem,
490 .maxlen = sizeof(sysctl_tcp_rmem),
492 .proc_handler = proc_dointvec
495 .procname = "tcp_app_win",
496 .data = &sysctl_tcp_app_win,
497 .maxlen = sizeof(int),
499 .proc_handler = proc_dointvec
502 .procname = "tcp_adv_win_scale",
503 .data = &sysctl_tcp_adv_win_scale,
504 .maxlen = sizeof(int),
506 .proc_handler = proc_dointvec_minmax,
507 .extra1 = &tcp_adv_win_scale_min,
508 .extra2 = &tcp_adv_win_scale_max,
511 .procname = "tcp_tw_reuse",
512 .data = &sysctl_tcp_tw_reuse,
513 .maxlen = sizeof(int),
515 .proc_handler = proc_dointvec
518 .procname = "tcp_frto",
519 .data = &sysctl_tcp_frto,
520 .maxlen = sizeof(int),
522 .proc_handler = proc_dointvec
525 .procname = "tcp_frto_response",
526 .data = &sysctl_tcp_frto_response,
527 .maxlen = sizeof(int),
529 .proc_handler = proc_dointvec
532 .procname = "tcp_low_latency",
533 .data = &sysctl_tcp_low_latency,
534 .maxlen = sizeof(int),
536 .proc_handler = proc_dointvec
539 .procname = "tcp_no_metrics_save",
540 .data = &sysctl_tcp_nometrics_save,
541 .maxlen = sizeof(int),
543 .proc_handler = proc_dointvec,
546 .procname = "tcp_moderate_rcvbuf",
547 .data = &sysctl_tcp_moderate_rcvbuf,
548 .maxlen = sizeof(int),
550 .proc_handler = proc_dointvec,
553 .procname = "tcp_tso_win_divisor",
554 .data = &sysctl_tcp_tso_win_divisor,
555 .maxlen = sizeof(int),
557 .proc_handler = proc_dointvec,
560 .procname = "tcp_congestion_control",
562 .maxlen = TCP_CA_NAME_MAX,
563 .proc_handler = proc_tcp_congestion_control,
566 .procname = "tcp_abc",
567 .data = &sysctl_tcp_abc,
568 .maxlen = sizeof(int),
570 .proc_handler = proc_dointvec,
573 .procname = "tcp_mtu_probing",
574 .data = &sysctl_tcp_mtu_probing,
575 .maxlen = sizeof(int),
577 .proc_handler = proc_dointvec,
580 .procname = "tcp_base_mss",
581 .data = &sysctl_tcp_base_mss,
582 .maxlen = sizeof(int),
584 .proc_handler = proc_dointvec,
587 .procname = "tcp_workaround_signed_windows",
588 .data = &sysctl_tcp_workaround_signed_windows,
589 .maxlen = sizeof(int),
591 .proc_handler = proc_dointvec
593 #ifdef CONFIG_NET_DMA
595 .procname = "tcp_dma_copybreak",
596 .data = &sysctl_tcp_dma_copybreak,
597 .maxlen = sizeof(int),
599 .proc_handler = proc_dointvec
603 .procname = "tcp_slow_start_after_idle",
604 .data = &sysctl_tcp_slow_start_after_idle,
605 .maxlen = sizeof(int),
607 .proc_handler = proc_dointvec
609 #ifdef CONFIG_NETLABEL
611 .procname = "cipso_cache_enable",
612 .data = &cipso_v4_cache_enabled,
613 .maxlen = sizeof(int),
615 .proc_handler = proc_dointvec,
618 .procname = "cipso_cache_bucket_size",
619 .data = &cipso_v4_cache_bucketsize,
620 .maxlen = sizeof(int),
622 .proc_handler = proc_dointvec,
625 .procname = "cipso_rbm_optfmt",
626 .data = &cipso_v4_rbm_optfmt,
627 .maxlen = sizeof(int),
629 .proc_handler = proc_dointvec,
632 .procname = "cipso_rbm_strictvalid",
633 .data = &cipso_v4_rbm_strictvalid,
634 .maxlen = sizeof(int),
636 .proc_handler = proc_dointvec,
638 #endif /* CONFIG_NETLABEL */
640 .procname = "tcp_available_congestion_control",
641 .maxlen = TCP_CA_BUF_MAX,
643 .proc_handler = proc_tcp_available_congestion_control,
646 .procname = "tcp_allowed_congestion_control",
647 .maxlen = TCP_CA_BUF_MAX,
649 .proc_handler = proc_allowed_congestion_control,
652 .procname = "tcp_max_ssthresh",
653 .data = &sysctl_tcp_max_ssthresh,
654 .maxlen = sizeof(int),
656 .proc_handler = proc_dointvec,
659 .procname = "tcp_cookie_size",
660 .data = &sysctl_tcp_cookie_size,
661 .maxlen = sizeof(int),
663 .proc_handler = proc_dointvec
666 .procname = "tcp_thin_linear_timeouts",
667 .data = &sysctl_tcp_thin_linear_timeouts,
668 .maxlen = sizeof(int),
670 .proc_handler = proc_dointvec
673 .procname = "tcp_thin_dupack",
674 .data = &sysctl_tcp_thin_dupack,
675 .maxlen = sizeof(int),
677 .proc_handler = proc_dointvec
680 .procname = "udp_mem",
681 .data = &sysctl_udp_mem,
682 .maxlen = sizeof(sysctl_udp_mem),
684 .proc_handler = proc_doulongvec_minmax,
687 .procname = "udp_rmem_min",
688 .data = &sysctl_udp_rmem_min,
689 .maxlen = sizeof(sysctl_udp_rmem_min),
691 .proc_handler = proc_dointvec_minmax,
695 .procname = "udp_wmem_min",
696 .data = &sysctl_udp_wmem_min,
697 .maxlen = sizeof(sysctl_udp_wmem_min),
699 .proc_handler = proc_dointvec_minmax,
705 static struct ctl_table ipv4_net_table[] = {
707 .procname = "icmp_echo_ignore_all",
708 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
709 .maxlen = sizeof(int),
711 .proc_handler = proc_dointvec
714 .procname = "icmp_echo_ignore_broadcasts",
715 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
716 .maxlen = sizeof(int),
718 .proc_handler = proc_dointvec
721 .procname = "icmp_ignore_bogus_error_responses",
722 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
723 .maxlen = sizeof(int),
725 .proc_handler = proc_dointvec
728 .procname = "icmp_errors_use_inbound_ifaddr",
729 .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
730 .maxlen = sizeof(int),
732 .proc_handler = proc_dointvec
735 .procname = "icmp_ratelimit",
736 .data = &init_net.ipv4.sysctl_icmp_ratelimit,
737 .maxlen = sizeof(int),
739 .proc_handler = proc_dointvec_ms_jiffies,
742 .procname = "icmp_ratemask",
743 .data = &init_net.ipv4.sysctl_icmp_ratemask,
744 .maxlen = sizeof(int),
746 .proc_handler = proc_dointvec
749 .procname = "rt_cache_rebuild_count",
750 .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count,
751 .maxlen = sizeof(int),
753 .proc_handler = proc_dointvec
756 .procname = "ping_group_range",
757 .data = &init_net.ipv4.sysctl_ping_group_range,
758 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
760 .proc_handler = ipv4_ping_group_range,
763 .procname = "tcp_mem",
764 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem),
766 .proc_handler = ipv4_tcp_mem,
771 struct ctl_path net_ipv4_ctl_path[] = {
772 { .procname = "net", },
773 { .procname = "ipv4", },
776 EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
778 static __net_init int ipv4_sysctl_init_net(struct net *net)
780 struct ctl_table *table;
782 table = ipv4_net_table;
783 if (!net_eq(net, &init_net)) {
784 table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
789 &net->ipv4.sysctl_icmp_echo_ignore_all;
791 &net->ipv4.sysctl_icmp_echo_ignore_broadcasts;
793 &net->ipv4.sysctl_icmp_ignore_bogus_error_responses;
795 &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr;
797 &net->ipv4.sysctl_icmp_ratelimit;
799 &net->ipv4.sysctl_icmp_ratemask;
801 &net->ipv4.sysctl_rt_cache_rebuild_count;
803 &net->ipv4.sysctl_ping_group_range;
808 * Sane defaults - nobody may create ping sockets.
809 * Boot scripts should set this to distro-specific group.
811 net->ipv4.sysctl_ping_group_range[0] = 1;
812 net->ipv4.sysctl_ping_group_range[1] = 0;
814 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
818 net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
819 net_ipv4_ctl_path, table);
820 if (net->ipv4.ipv4_hdr == NULL)
826 if (!net_eq(net, &init_net))
832 static __net_exit void ipv4_sysctl_exit_net(struct net *net)
834 struct ctl_table *table;
836 table = net->ipv4.ipv4_hdr->ctl_table_arg;
837 unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
841 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
842 .init = ipv4_sysctl_init_net,
843 .exit = ipv4_sysctl_exit_net,
846 static __init int sysctl_ipv4_init(void)
848 struct ctl_table_header *hdr;
851 for (i = ipv4_table; i->procname; i++) {
852 if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
853 i->data = sysctl_local_reserved_ports;
860 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
864 if (register_pernet_subsys(&ipv4_sysctl_ops)) {
865 unregister_sysctl_table(hdr);
872 __initcall(sysctl_ipv4_init);