]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/core/sysctl_net_core.c
637a42e5d5896041b15ca123652f5422846d1d00
[karo-tx-linux.git] / net / core / sysctl_net_core.c
1 /* -*- linux-c -*-
2  * sysctl_net_core.c: sysctl interface to net core subsystem.
3  *
4  * Begun April 1, 1996, Mike Shaver.
5  * Added /proc/sys/net/core directory entry (empty =) ). [MS]
6  */
7
8 #include <linux/mm.h>
9 #include <linux/sysctl.h>
10 #include <linux/module.h>
11 #include <linux/socket.h>
12 #include <linux/netdevice.h>
13 #include <linux/ratelimit.h>
14 #include <linux/vmalloc.h>
15 #include <linux/init.h>
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18
19 #include <net/ip.h>
20 #include <net/sock.h>
21 #include <net/net_ratelimit.h>
22 #include <net/ll_poll.h>
23
24 static int one = 1;
25
26 #ifdef CONFIG_RPS
27 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
28                                 void __user *buffer, size_t *lenp, loff_t *ppos)
29 {
30         unsigned int orig_size, size;
31         int ret, i;
32         struct ctl_table tmp = {
33                 .data = &size,
34                 .maxlen = sizeof(size),
35                 .mode = table->mode
36         };
37         struct rps_sock_flow_table *orig_sock_table, *sock_table;
38         static DEFINE_MUTEX(sock_flow_mutex);
39
40         mutex_lock(&sock_flow_mutex);
41
42         orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
43                                         lockdep_is_held(&sock_flow_mutex));
44         size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
45
46         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
47
48         if (write) {
49                 if (size) {
50                         if (size > 1<<30) {
51                                 /* Enforce limit to prevent overflow */
52                                 mutex_unlock(&sock_flow_mutex);
53                                 return -EINVAL;
54                         }
55                         size = roundup_pow_of_two(size);
56                         if (size != orig_size) {
57                                 sock_table =
58                                     vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
59                                 if (!sock_table) {
60                                         mutex_unlock(&sock_flow_mutex);
61                                         return -ENOMEM;
62                                 }
63
64                                 sock_table->mask = size - 1;
65                         } else
66                                 sock_table = orig_sock_table;
67
68                         for (i = 0; i < size; i++)
69                                 sock_table->ents[i] = RPS_NO_CPU;
70                 } else
71                         sock_table = NULL;
72
73                 if (sock_table != orig_sock_table) {
74                         rcu_assign_pointer(rps_sock_flow_table, sock_table);
75                         if (sock_table)
76                                 static_key_slow_inc(&rps_needed);
77                         if (orig_sock_table) {
78                                 static_key_slow_dec(&rps_needed);
79                                 synchronize_rcu();
80                                 vfree(orig_sock_table);
81                         }
82                 }
83         }
84
85         mutex_unlock(&sock_flow_mutex);
86
87         return ret;
88 }
89 #endif /* CONFIG_RPS */
90
91 #ifdef CONFIG_NET_FLOW_LIMIT
92 static DEFINE_MUTEX(flow_limit_update_mutex);
93
94 static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
95                                  void __user *buffer, size_t *lenp,
96                                  loff_t *ppos)
97 {
98         struct sd_flow_limit *cur;
99         struct softnet_data *sd;
100         cpumask_var_t mask;
101         int i, len, ret = 0;
102
103         if (!alloc_cpumask_var(&mask, GFP_KERNEL))
104                 return -ENOMEM;
105
106         if (write) {
107                 ret = cpumask_parse_user(buffer, *lenp, mask);
108                 if (ret)
109                         goto done;
110
111                 mutex_lock(&flow_limit_update_mutex);
112                 len = sizeof(*cur) + netdev_flow_limit_table_len;
113                 for_each_possible_cpu(i) {
114                         sd = &per_cpu(softnet_data, i);
115                         cur = rcu_dereference_protected(sd->flow_limit,
116                                      lockdep_is_held(&flow_limit_update_mutex));
117                         if (cur && !cpumask_test_cpu(i, mask)) {
118                                 RCU_INIT_POINTER(sd->flow_limit, NULL);
119                                 synchronize_rcu();
120                                 kfree(cur);
121                         } else if (!cur && cpumask_test_cpu(i, mask)) {
122                                 cur = kzalloc(len, GFP_KERNEL);
123                                 if (!cur) {
124                                         /* not unwinding previous changes */
125                                         ret = -ENOMEM;
126                                         goto write_unlock;
127                                 }
128                                 cur->num_buckets = netdev_flow_limit_table_len;
129                                 rcu_assign_pointer(sd->flow_limit, cur);
130                         }
131                 }
132 write_unlock:
133                 mutex_unlock(&flow_limit_update_mutex);
134         } else {
135                 if (*ppos || !*lenp) {
136                         *lenp = 0;
137                         goto done;
138                 }
139
140                 cpumask_clear(mask);
141                 rcu_read_lock();
142                 for_each_possible_cpu(i) {
143                         sd = &per_cpu(softnet_data, i);
144                         if (rcu_dereference(sd->flow_limit))
145                                 cpumask_set_cpu(i, mask);
146                 }
147                 rcu_read_unlock();
148
149                 len = cpumask_scnprintf(buffer, *lenp, mask);
150                 *lenp = len + 1;
151                 *ppos += len + 1;
152         }
153
154 done:
155         free_cpumask_var(mask);
156         return ret;
157 }
158
159 static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
160                                        void __user *buffer, size_t *lenp,
161                                        loff_t *ppos)
162 {
163         unsigned int old, *ptr;
164         int ret;
165
166         mutex_lock(&flow_limit_update_mutex);
167
168         ptr = table->data;
169         old = *ptr;
170         ret = proc_dointvec(table, write, buffer, lenp, ppos);
171         if (!ret && write && !is_power_of_2(*ptr)) {
172                 *ptr = old;
173                 ret = -EINVAL;
174         }
175
176         mutex_unlock(&flow_limit_update_mutex);
177         return ret;
178 }
179 #endif /* CONFIG_NET_FLOW_LIMIT */
180
181 static struct ctl_table net_core_table[] = {
182 #ifdef CONFIG_NET
183         {
184                 .procname       = "wmem_max",
185                 .data           = &sysctl_wmem_max,
186                 .maxlen         = sizeof(int),
187                 .mode           = 0644,
188                 .proc_handler   = proc_dointvec_minmax,
189                 .extra1         = &one,
190         },
191         {
192                 .procname       = "rmem_max",
193                 .data           = &sysctl_rmem_max,
194                 .maxlen         = sizeof(int),
195                 .mode           = 0644,
196                 .proc_handler   = proc_dointvec_minmax,
197                 .extra1         = &one,
198         },
199         {
200                 .procname       = "wmem_default",
201                 .data           = &sysctl_wmem_default,
202                 .maxlen         = sizeof(int),
203                 .mode           = 0644,
204                 .proc_handler   = proc_dointvec_minmax,
205                 .extra1         = &one,
206         },
207         {
208                 .procname       = "rmem_default",
209                 .data           = &sysctl_rmem_default,
210                 .maxlen         = sizeof(int),
211                 .mode           = 0644,
212                 .proc_handler   = proc_dointvec_minmax,
213                 .extra1         = &one,
214         },
215         {
216                 .procname       = "dev_weight",
217                 .data           = &weight_p,
218                 .maxlen         = sizeof(int),
219                 .mode           = 0644,
220                 .proc_handler   = proc_dointvec
221         },
222         {
223                 .procname       = "netdev_max_backlog",
224                 .data           = &netdev_max_backlog,
225                 .maxlen         = sizeof(int),
226                 .mode           = 0644,
227                 .proc_handler   = proc_dointvec
228         },
229 #ifdef CONFIG_BPF_JIT
230         {
231                 .procname       = "bpf_jit_enable",
232                 .data           = &bpf_jit_enable,
233                 .maxlen         = sizeof(int),
234                 .mode           = 0644,
235                 .proc_handler   = proc_dointvec
236         },
237 #endif
238         {
239                 .procname       = "netdev_tstamp_prequeue",
240                 .data           = &netdev_tstamp_prequeue,
241                 .maxlen         = sizeof(int),
242                 .mode           = 0644,
243                 .proc_handler   = proc_dointvec
244         },
245         {
246                 .procname       = "message_cost",
247                 .data           = &net_ratelimit_state.interval,
248                 .maxlen         = sizeof(int),
249                 .mode           = 0644,
250                 .proc_handler   = proc_dointvec_jiffies,
251         },
252         {
253                 .procname       = "message_burst",
254                 .data           = &net_ratelimit_state.burst,
255                 .maxlen         = sizeof(int),
256                 .mode           = 0644,
257                 .proc_handler   = proc_dointvec,
258         },
259         {
260                 .procname       = "optmem_max",
261                 .data           = &sysctl_optmem_max,
262                 .maxlen         = sizeof(int),
263                 .mode           = 0644,
264                 .proc_handler   = proc_dointvec
265         },
266 #ifdef CONFIG_RPS
267         {
268                 .procname       = "rps_sock_flow_entries",
269                 .maxlen         = sizeof(int),
270                 .mode           = 0644,
271                 .proc_handler   = rps_sock_flow_sysctl
272         },
273 #endif
274 #ifdef CONFIG_NET_FLOW_LIMIT
275         {
276                 .procname       = "flow_limit_cpu_bitmap",
277                 .mode           = 0644,
278                 .proc_handler   = flow_limit_cpu_sysctl
279         },
280         {
281                 .procname       = "flow_limit_table_len",
282                 .data           = &netdev_flow_limit_table_len,
283                 .maxlen         = sizeof(int),
284                 .mode           = 0644,
285                 .proc_handler   = flow_limit_table_len_sysctl
286         },
287 #endif /* CONFIG_NET_FLOW_LIMIT */
288 #ifdef CONFIG_NET_LL_RX_POLL
289         {
290                 .procname       = "low_latency_poll",
291                 .data           = &sysctl_net_ll_poll,
292                 .maxlen         = sizeof(unsigned long),
293                 .mode           = 0644,
294                 .proc_handler   = proc_doulongvec_minmax
295         },
296 #endif
297 #endif /* CONFIG_NET */
298         {
299                 .procname       = "netdev_budget",
300                 .data           = &netdev_budget,
301                 .maxlen         = sizeof(int),
302                 .mode           = 0644,
303                 .proc_handler   = proc_dointvec
304         },
305         {
306                 .procname       = "warnings",
307                 .data           = &net_msg_warn,
308                 .maxlen         = sizeof(int),
309                 .mode           = 0644,
310                 .proc_handler   = proc_dointvec
311         },
312         { }
313 };
314
315 static struct ctl_table netns_core_table[] = {
316         {
317                 .procname       = "somaxconn",
318                 .data           = &init_net.core.sysctl_somaxconn,
319                 .maxlen         = sizeof(int),
320                 .mode           = 0644,
321                 .proc_handler   = proc_dointvec
322         },
323         { }
324 };
325
326 static __net_init int sysctl_core_net_init(struct net *net)
327 {
328         struct ctl_table *tbl;
329
330         net->core.sysctl_somaxconn = SOMAXCONN;
331
332         tbl = netns_core_table;
333         if (!net_eq(net, &init_net)) {
334                 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
335                 if (tbl == NULL)
336                         goto err_dup;
337
338                 tbl[0].data = &net->core.sysctl_somaxconn;
339
340                 /* Don't export any sysctls to unprivileged users */
341                 if (net->user_ns != &init_user_ns) {
342                         tbl[0].procname = NULL;
343                 }
344         }
345
346         net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);
347         if (net->core.sysctl_hdr == NULL)
348                 goto err_reg;
349
350         return 0;
351
352 err_reg:
353         if (tbl != netns_core_table)
354                 kfree(tbl);
355 err_dup:
356         return -ENOMEM;
357 }
358
359 static __net_exit void sysctl_core_net_exit(struct net *net)
360 {
361         struct ctl_table *tbl;
362
363         tbl = net->core.sysctl_hdr->ctl_table_arg;
364         unregister_net_sysctl_table(net->core.sysctl_hdr);
365         BUG_ON(tbl == netns_core_table);
366         kfree(tbl);
367 }
368
369 static __net_initdata struct pernet_operations sysctl_core_ops = {
370         .init = sysctl_core_net_init,
371         .exit = sysctl_core_net_exit,
372 };
373
374 static __init int sysctl_core_init(void)
375 {
376         register_net_sysctl(&init_net, "net/core", net_core_table);
377         return register_pernet_subsys(&sysctl_core_ops);
378 }
379
380 fs_initcall(sysctl_core_init);