]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/core/sock.c
cfed7d42c4850525fffb56f88a65ba9f011ba9f7
[karo-tx-linux.git] / net / core / sock.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              Generic socket support routines. Memory allocators, socket lock/release
7  *              handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:     Ross Biro
13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *              Florian La Roche, <flla@stud.uni-sb.de>
15  *              Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *              Alan Cox        :       Numerous verify_area() problems
19  *              Alan Cox        :       Connecting on a connecting socket
20  *                                      now returns an error for tcp.
21  *              Alan Cox        :       sock->protocol is set correctly.
22  *                                      and is not sometimes left as 0.
23  *              Alan Cox        :       connect handles icmp errors on a
24  *                                      connect properly. Unfortunately there
25  *                                      is a restart syscall nasty there. I
26  *                                      can't match BSD without hacking the C
27  *                                      library. Ideas urgently sought!
28  *              Alan Cox        :       Disallow bind() to addresses that are
29  *                                      not ours - especially broadcast ones!!
30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32  *                                      instead they leave that for the DESTROY timer.
33  *              Alan Cox        :       Clean up error flag in accept
34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35  *                                      was buggy. Put a remove_sock() in the handler
36  *                                      for memory when we hit 0. Also altered the timer
37  *                                      code. The ACK stuff can wait and needs major
38  *                                      TCP layer surgery.
39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40  *                                      and fixed timer/inet_bh race.
41  *              Alan Cox        :       Added zapped flag for TCP
42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49  *      Pauline Middelink       :       identd support
50  *              Alan Cox        :       Fixed connect() taking signals I think.
51  *              Alan Cox        :       SO_LINGER supported
52  *              Alan Cox        :       Error reporting fixes
53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54  *              Alan Cox        :       inet sockets don't set sk->type!
55  *              Alan Cox        :       Split socket option code
56  *              Alan Cox        :       Callbacks
57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58  *              Alex            :       Removed restriction on inet fioctl
59  *              Alan Cox        :       Splitting INET from NET core
60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62  *              Alan Cox        :       Split IP from generic code
63  *              Alan Cox        :       New kfree_skbmem()
64  *              Alan Cox        :       Make SO_DEBUG superuser only.
65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66  *                                      (compatibility fix)
67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68  *              Alan Cox        :       Allocator for a socket is settable.
69  *              Alan Cox        :       SO_ERROR includes soft errors.
70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71  *              Alan Cox        :       Generic socket allocation to make hooks
72  *                                      easier (suggested by Craig Metz).
73  *              Michael Pall    :       SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81  *              Andi Kleen      :       Fix write_space callback
82  *              Chris Evans     :       Security fixes - signedness again
83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *              This program is free software; you can redistribute it and/or
89  *              modify it under the terms of the GNU General Public License
90  *              as published by the Free Software Foundation; either version
91  *              2 of the License, or (at your option) any later version.
92  */
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126
127 #include <linux/filter.h>
128
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132
133 /*
134  * Each address family might have different locking rules, so we have
135  * one slock key per address family:
136  */
137 static struct lock_class_key af_family_keys[AF_MAX];
138 static struct lock_class_key af_family_slock_keys[AF_MAX];
139
140 #ifdef CONFIG_DEBUG_LOCK_ALLOC
141 /*
142  * Make lock validator output more readable. (we pre-construct these
143  * strings build-time, so that runtime initialization of socket
144  * locks is fast):
145  */
146 static const char *af_family_key_strings[AF_MAX+1] = {
147   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
148   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
149   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
150   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
151   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
152   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
153   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
154   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
155   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
156   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
157   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
158   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
159 };
160 static const char *af_family_slock_key_strings[AF_MAX+1] = {
161   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
162   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
163   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
164   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
165   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
166   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
167   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
168   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
169   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
170   "slock-27"       , "slock-28"          , "slock-29"          ,
171   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
172   "slock-AF_RXRPC" , "slock-AF_MAX"
173 };
174 static const char *af_family_clock_key_strings[AF_MAX+1] = {
175   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
176   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
177   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
178   "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
179   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
180   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
181   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
182   "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
183   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
184   "clock-27"       , "clock-28"          , "clock-29"          ,
185   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
186   "clock-AF_RXRPC" , "clock-AF_MAX"
187 };
188 #endif
189
190 /*
191  * sk_callback_lock locking rules are per-address-family,
192  * so split the lock classes by using a per-AF key:
193  */
194 static struct lock_class_key af_callback_keys[AF_MAX];
195
196 /* Take into consideration the size of the struct sk_buff overhead in the
197  * determination of these values, since that is non-constant across
198  * platforms.  This makes socket queueing behavior and performance
199  * not depend upon such differences.
200  */
201 #define _SK_MEM_PACKETS         256
202 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
203 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
204 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205
206 /* Run time adjustable parameters. */
207 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
208 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
209 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
210 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
211
212 /* Maximal space eaten by iovec or ancilliary data plus some space */
213 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
214
215 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
216 {
217         struct timeval tv;
218
219         if (optlen < sizeof(tv))
220                 return -EINVAL;
221         if (copy_from_user(&tv, optval, sizeof(tv)))
222                 return -EFAULT;
223         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
224                 return -EDOM;
225
226         if (tv.tv_sec < 0) {
227                 static int warned __read_mostly;
228
229                 *timeo_p = 0;
230                 if (warned < 10 && net_ratelimit())
231                         warned++;
232                         printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
233                                "tries to set negative timeout\n",
234                                 current->comm, current->pid);
235                 return 0;
236         }
237         *timeo_p = MAX_SCHEDULE_TIMEOUT;
238         if (tv.tv_sec == 0 && tv.tv_usec == 0)
239                 return 0;
240         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
241                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
242         return 0;
243 }
244
245 static void sock_warn_obsolete_bsdism(const char *name)
246 {
247         static int warned;
248         static char warncomm[TASK_COMM_LEN];
249         if (strcmp(warncomm, current->comm) && warned < 5) {
250                 strcpy(warncomm,  current->comm);
251                 printk(KERN_WARNING "process `%s' is using obsolete "
252                        "%s SO_BSDCOMPAT\n", warncomm, name);
253                 warned++;
254         }
255 }
256
257 static void sock_disable_timestamp(struct sock *sk)
258 {
259         if (sock_flag(sk, SOCK_TIMESTAMP)) {
260                 sock_reset_flag(sk, SOCK_TIMESTAMP);
261                 net_disable_timestamp();
262         }
263 }
264
265
266 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
267 {
268         int err = 0;
269         int skb_len;
270
271         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
272            number of warnings when compiling with -W --ANK
273          */
274         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
275             (unsigned)sk->sk_rcvbuf) {
276                 err = -ENOMEM;
277                 goto out;
278         }
279
280         err = sk_filter(sk, skb);
281         if (err)
282                 goto out;
283
284         skb->dev = NULL;
285         skb_set_owner_r(skb, sk);
286
287         /* Cache the SKB length before we tack it onto the receive
288          * queue.  Once it is added it no longer belongs to us and
289          * may be freed by other threads of control pulling packets
290          * from the queue.
291          */
292         skb_len = skb->len;
293
294         skb_queue_tail(&sk->sk_receive_queue, skb);
295
296         if (!sock_flag(sk, SOCK_DEAD))
297                 sk->sk_data_ready(sk, skb_len);
298 out:
299         return err;
300 }
301 EXPORT_SYMBOL(sock_queue_rcv_skb);
302
303 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
304 {
305         int rc = NET_RX_SUCCESS;
306
307         if (sk_filter(sk, skb))
308                 goto discard_and_relse;
309
310         skb->dev = NULL;
311
312         if (nested)
313                 bh_lock_sock_nested(sk);
314         else
315                 bh_lock_sock(sk);
316         if (!sock_owned_by_user(sk)) {
317                 /*
318                  * trylock + unlock semantics:
319                  */
320                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
321
322                 rc = sk->sk_backlog_rcv(sk, skb);
323
324                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
325         } else
326                 sk_add_backlog(sk, skb);
327         bh_unlock_sock(sk);
328 out:
329         sock_put(sk);
330         return rc;
331 discard_and_relse:
332         kfree_skb(skb);
333         goto out;
334 }
335 EXPORT_SYMBOL(sk_receive_skb);
336
337 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
338 {
339         struct dst_entry *dst = sk->sk_dst_cache;
340
341         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
342                 sk->sk_dst_cache = NULL;
343                 dst_release(dst);
344                 return NULL;
345         }
346
347         return dst;
348 }
349 EXPORT_SYMBOL(__sk_dst_check);
350
351 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
352 {
353         struct dst_entry *dst = sk_dst_get(sk);
354
355         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
356                 sk_dst_reset(sk);
357                 dst_release(dst);
358                 return NULL;
359         }
360
361         return dst;
362 }
363 EXPORT_SYMBOL(sk_dst_check);
364
365 /*
366  *      This is meant for all protocols to use and covers goings on
367  *      at the socket level. Everything here is generic.
368  */
369
370 int sock_setsockopt(struct socket *sock, int level, int optname,
371                     char __user *optval, int optlen)
372 {
373         struct sock *sk=sock->sk;
374         struct sk_filter *filter;
375         int val;
376         int valbool;
377         struct linger ling;
378         int ret = 0;
379
380         /*
381          *      Options without arguments
382          */
383
384 #ifdef SO_DONTLINGER            /* Compatibility item... */
385         if (optname == SO_DONTLINGER) {
386                 lock_sock(sk);
387                 sock_reset_flag(sk, SOCK_LINGER);
388                 release_sock(sk);
389                 return 0;
390         }
391 #endif
392
393         if (optlen < sizeof(int))
394                 return -EINVAL;
395
396         if (get_user(val, (int __user *)optval))
397                 return -EFAULT;
398
399         valbool = val?1:0;
400
401         lock_sock(sk);
402
403         switch(optname) {
404         case SO_DEBUG:
405                 if (val && !capable(CAP_NET_ADMIN)) {
406                         ret = -EACCES;
407                 }
408                 else if (valbool)
409                         sock_set_flag(sk, SOCK_DBG);
410                 else
411                         sock_reset_flag(sk, SOCK_DBG);
412                 break;
413         case SO_REUSEADDR:
414                 sk->sk_reuse = valbool;
415                 break;
416         case SO_TYPE:
417         case SO_ERROR:
418                 ret = -ENOPROTOOPT;
419                 break;
420         case SO_DONTROUTE:
421                 if (valbool)
422                         sock_set_flag(sk, SOCK_LOCALROUTE);
423                 else
424                         sock_reset_flag(sk, SOCK_LOCALROUTE);
425                 break;
426         case SO_BROADCAST:
427                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
428                 break;
429         case SO_SNDBUF:
430                 /* Don't error on this BSD doesn't and if you think
431                    about it this is right. Otherwise apps have to
432                    play 'guess the biggest size' games. RCVBUF/SNDBUF
433                    are treated in BSD as hints */
434
435                 if (val > sysctl_wmem_max)
436                         val = sysctl_wmem_max;
437 set_sndbuf:
438                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
439                 if ((val * 2) < SOCK_MIN_SNDBUF)
440                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
441                 else
442                         sk->sk_sndbuf = val * 2;
443
444                 /*
445                  *      Wake up sending tasks if we
446                  *      upped the value.
447                  */
448                 sk->sk_write_space(sk);
449                 break;
450
451         case SO_SNDBUFFORCE:
452                 if (!capable(CAP_NET_ADMIN)) {
453                         ret = -EPERM;
454                         break;
455                 }
456                 goto set_sndbuf;
457
458         case SO_RCVBUF:
459                 /* Don't error on this BSD doesn't and if you think
460                    about it this is right. Otherwise apps have to
461                    play 'guess the biggest size' games. RCVBUF/SNDBUF
462                    are treated in BSD as hints */
463
464                 if (val > sysctl_rmem_max)
465                         val = sysctl_rmem_max;
466 set_rcvbuf:
467                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
468                 /*
469                  * We double it on the way in to account for
470                  * "struct sk_buff" etc. overhead.   Applications
471                  * assume that the SO_RCVBUF setting they make will
472                  * allow that much actual data to be received on that
473                  * socket.
474                  *
475                  * Applications are unaware that "struct sk_buff" and
476                  * other overheads allocate from the receive buffer
477                  * during socket buffer allocation.
478                  *
479                  * And after considering the possible alternatives,
480                  * returning the value we actually used in getsockopt
481                  * is the most desirable behavior.
482                  */
483                 if ((val * 2) < SOCK_MIN_RCVBUF)
484                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
485                 else
486                         sk->sk_rcvbuf = val * 2;
487                 break;
488
489         case SO_RCVBUFFORCE:
490                 if (!capable(CAP_NET_ADMIN)) {
491                         ret = -EPERM;
492                         break;
493                 }
494                 goto set_rcvbuf;
495
496         case SO_KEEPALIVE:
497 #ifdef CONFIG_INET
498                 if (sk->sk_protocol == IPPROTO_TCP)
499                         tcp_set_keepalive(sk, valbool);
500 #endif
501                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
502                 break;
503
504         case SO_OOBINLINE:
505                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
506                 break;
507
508         case SO_NO_CHECK:
509                 sk->sk_no_check = valbool;
510                 break;
511
512         case SO_PRIORITY:
513                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
514                         sk->sk_priority = val;
515                 else
516                         ret = -EPERM;
517                 break;
518
519         case SO_LINGER:
520                 if (optlen < sizeof(ling)) {
521                         ret = -EINVAL;  /* 1003.1g */
522                         break;
523                 }
524                 if (copy_from_user(&ling,optval,sizeof(ling))) {
525                         ret = -EFAULT;
526                         break;
527                 }
528                 if (!ling.l_onoff)
529                         sock_reset_flag(sk, SOCK_LINGER);
530                 else {
531 #if (BITS_PER_LONG == 32)
532                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
533                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
534                         else
535 #endif
536                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
537                         sock_set_flag(sk, SOCK_LINGER);
538                 }
539                 break;
540
541         case SO_BSDCOMPAT:
542                 sock_warn_obsolete_bsdism("setsockopt");
543                 break;
544
545         case SO_PASSCRED:
546                 if (valbool)
547                         set_bit(SOCK_PASSCRED, &sock->flags);
548                 else
549                         clear_bit(SOCK_PASSCRED, &sock->flags);
550                 break;
551
552         case SO_TIMESTAMP:
553         case SO_TIMESTAMPNS:
554                 if (valbool)  {
555                         if (optname == SO_TIMESTAMP)
556                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
557                         else
558                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
559                         sock_set_flag(sk, SOCK_RCVTSTAMP);
560                         sock_enable_timestamp(sk);
561                 } else {
562                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
563                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
564                 }
565                 break;
566
567         case SO_RCVLOWAT:
568                 if (val < 0)
569                         val = INT_MAX;
570                 sk->sk_rcvlowat = val ? : 1;
571                 break;
572
573         case SO_RCVTIMEO:
574                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
575                 break;
576
577         case SO_SNDTIMEO:
578                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
579                 break;
580
581 #ifdef CONFIG_NETDEVICES
582         case SO_BINDTODEVICE:
583         {
584                 char devname[IFNAMSIZ];
585
586                 /* Sorry... */
587                 if (!capable(CAP_NET_RAW)) {
588                         ret = -EPERM;
589                         break;
590                 }
591
592                 /* Bind this socket to a particular device like "eth0",
593                  * as specified in the passed interface name. If the
594                  * name is "" or the option length is zero the socket
595                  * is not bound.
596                  */
597
598                 if (!valbool) {
599                         sk->sk_bound_dev_if = 0;
600                 } else {
601                         if (optlen > IFNAMSIZ - 1)
602                                 optlen = IFNAMSIZ - 1;
603                         memset(devname, 0, sizeof(devname));
604                         if (copy_from_user(devname, optval, optlen)) {
605                                 ret = -EFAULT;
606                                 break;
607                         }
608
609                         /* Remove any cached route for this socket. */
610                         sk_dst_reset(sk);
611
612                         if (devname[0] == '\0') {
613                                 sk->sk_bound_dev_if = 0;
614                         } else {
615                                 struct net_device *dev = dev_get_by_name(devname);
616                                 if (!dev) {
617                                         ret = -ENODEV;
618                                         break;
619                                 }
620                                 sk->sk_bound_dev_if = dev->ifindex;
621                                 dev_put(dev);
622                         }
623                 }
624                 break;
625         }
626 #endif
627
628
629         case SO_ATTACH_FILTER:
630                 ret = -EINVAL;
631                 if (optlen == sizeof(struct sock_fprog)) {
632                         struct sock_fprog fprog;
633
634                         ret = -EFAULT;
635                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
636                                 break;
637
638                         ret = sk_attach_filter(&fprog, sk);
639                 }
640                 break;
641
642         case SO_DETACH_FILTER:
643                 rcu_read_lock_bh();
644                 filter = rcu_dereference(sk->sk_filter);
645                 if (filter) {
646                         rcu_assign_pointer(sk->sk_filter, NULL);
647                         sk_filter_release(sk, filter);
648                         rcu_read_unlock_bh();
649                         break;
650                 }
651                 rcu_read_unlock_bh();
652                 ret = -ENONET;
653                 break;
654
655         case SO_PASSSEC:
656                 if (valbool)
657                         set_bit(SOCK_PASSSEC, &sock->flags);
658                 else
659                         clear_bit(SOCK_PASSSEC, &sock->flags);
660                 break;
661
662                 /* We implement the SO_SNDLOWAT etc to
663                    not be settable (1003.1g 5.3) */
664         default:
665                 ret = -ENOPROTOOPT;
666                 break;
667         }
668         release_sock(sk);
669         return ret;
670 }
671
672
673 int sock_getsockopt(struct socket *sock, int level, int optname,
674                     char __user *optval, int __user *optlen)
675 {
676         struct sock *sk = sock->sk;
677
678         union {
679                 int val;
680                 struct linger ling;
681                 struct timeval tm;
682         } v;
683
684         unsigned int lv = sizeof(int);
685         int len;
686
687         if (get_user(len, optlen))
688                 return -EFAULT;
689         if (len < 0)
690                 return -EINVAL;
691
692         switch(optname) {
693         case SO_DEBUG:
694                 v.val = sock_flag(sk, SOCK_DBG);
695                 break;
696
697         case SO_DONTROUTE:
698                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
699                 break;
700
701         case SO_BROADCAST:
702                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
703                 break;
704
705         case SO_SNDBUF:
706                 v.val = sk->sk_sndbuf;
707                 break;
708
709         case SO_RCVBUF:
710                 v.val = sk->sk_rcvbuf;
711                 break;
712
713         case SO_REUSEADDR:
714                 v.val = sk->sk_reuse;
715                 break;
716
717         case SO_KEEPALIVE:
718                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
719                 break;
720
721         case SO_TYPE:
722                 v.val = sk->sk_type;
723                 break;
724
725         case SO_ERROR:
726                 v.val = -sock_error(sk);
727                 if (v.val==0)
728                         v.val = xchg(&sk->sk_err_soft, 0);
729                 break;
730
731         case SO_OOBINLINE:
732                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
733                 break;
734
735         case SO_NO_CHECK:
736                 v.val = sk->sk_no_check;
737                 break;
738
739         case SO_PRIORITY:
740                 v.val = sk->sk_priority;
741                 break;
742
743         case SO_LINGER:
744                 lv              = sizeof(v.ling);
745                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
746                 v.ling.l_linger = sk->sk_lingertime / HZ;
747                 break;
748
749         case SO_BSDCOMPAT:
750                 sock_warn_obsolete_bsdism("getsockopt");
751                 break;
752
753         case SO_TIMESTAMP:
754                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
755                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
756                 break;
757
758         case SO_TIMESTAMPNS:
759                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
760                 break;
761
762         case SO_RCVTIMEO:
763                 lv=sizeof(struct timeval);
764                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
765                         v.tm.tv_sec = 0;
766                         v.tm.tv_usec = 0;
767                 } else {
768                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
769                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
770                 }
771                 break;
772
773         case SO_SNDTIMEO:
774                 lv=sizeof(struct timeval);
775                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
776                         v.tm.tv_sec = 0;
777                         v.tm.tv_usec = 0;
778                 } else {
779                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
780                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
781                 }
782                 break;
783
784         case SO_RCVLOWAT:
785                 v.val = sk->sk_rcvlowat;
786                 break;
787
788         case SO_SNDLOWAT:
789                 v.val=1;
790                 break;
791
792         case SO_PASSCRED:
793                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
794                 break;
795
796         case SO_PEERCRED:
797                 if (len > sizeof(sk->sk_peercred))
798                         len = sizeof(sk->sk_peercred);
799                 if (copy_to_user(optval, &sk->sk_peercred, len))
800                         return -EFAULT;
801                 goto lenout;
802
803         case SO_PEERNAME:
804         {
805                 char address[128];
806
807                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
808                         return -ENOTCONN;
809                 if (lv < len)
810                         return -EINVAL;
811                 if (copy_to_user(optval, address, len))
812                         return -EFAULT;
813                 goto lenout;
814         }
815
816         /* Dubious BSD thing... Probably nobody even uses it, but
817          * the UNIX standard wants it for whatever reason... -DaveM
818          */
819         case SO_ACCEPTCONN:
820                 v.val = sk->sk_state == TCP_LISTEN;
821                 break;
822
823         case SO_PASSSEC:
824                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
825                 break;
826
827         case SO_PEERSEC:
828                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
829
830         default:
831                 return -ENOPROTOOPT;
832         }
833
834         if (len > lv)
835                 len = lv;
836         if (copy_to_user(optval, &v, len))
837                 return -EFAULT;
838 lenout:
839         if (put_user(len, optlen))
840                 return -EFAULT;
841         return 0;
842 }
843
844 /*
845  * Initialize an sk_lock.
846  *
847  * (We also register the sk_lock with the lock validator.)
848  */
849 static inline void sock_lock_init(struct sock *sk)
850 {
851         sock_lock_init_class_and_name(sk,
852                         af_family_slock_key_strings[sk->sk_family],
853                         af_family_slock_keys + sk->sk_family,
854                         af_family_key_strings[sk->sk_family],
855                         af_family_keys + sk->sk_family);
856 }
857
858 /**
859  *      sk_alloc - All socket objects are allocated here
860  *      @family: protocol family
861  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
862  *      @prot: struct proto associated with this new sock instance
863  *      @zero_it: if we should zero the newly allocated sock
864  */
865 struct sock *sk_alloc(int family, gfp_t priority,
866                       struct proto *prot, int zero_it)
867 {
868         struct sock *sk = NULL;
869         struct kmem_cache *slab = prot->slab;
870
871         if (slab != NULL)
872                 sk = kmem_cache_alloc(slab, priority);
873         else
874                 sk = kmalloc(prot->obj_size, priority);
875
876         if (sk) {
877                 if (zero_it) {
878                         memset(sk, 0, prot->obj_size);
879                         sk->sk_family = family;
880                         /*
881                          * See comment in struct sock definition to understand
882                          * why we need sk_prot_creator -acme
883                          */
884                         sk->sk_prot = sk->sk_prot_creator = prot;
885                         sock_lock_init(sk);
886                 }
887
888                 if (security_sk_alloc(sk, family, priority))
889                         goto out_free;
890
891                 if (!try_module_get(prot->owner))
892                         goto out_free;
893         }
894         return sk;
895
896 out_free:
897         if (slab != NULL)
898                 kmem_cache_free(slab, sk);
899         else
900                 kfree(sk);
901         return NULL;
902 }
903
904 void sk_free(struct sock *sk)
905 {
906         struct sk_filter *filter;
907         struct module *owner = sk->sk_prot_creator->owner;
908
909         if (sk->sk_destruct)
910                 sk->sk_destruct(sk);
911
912         filter = rcu_dereference(sk->sk_filter);
913         if (filter) {
914                 sk_filter_release(sk, filter);
915                 rcu_assign_pointer(sk->sk_filter, NULL);
916         }
917
918         sock_disable_timestamp(sk);
919
920         if (atomic_read(&sk->sk_omem_alloc))
921                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
922                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
923
924         security_sk_free(sk);
925         if (sk->sk_prot_creator->slab != NULL)
926                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
927         else
928                 kfree(sk);
929         module_put(owner);
930 }
931
932 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
933 {
934         struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
935
936         if (newsk != NULL) {
937                 struct sk_filter *filter;
938
939                 sock_copy(newsk, sk);
940
941                 /* SANITY */
942                 sk_node_init(&newsk->sk_node);
943                 sock_lock_init(newsk);
944                 bh_lock_sock(newsk);
945                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
946
947                 atomic_set(&newsk->sk_rmem_alloc, 0);
948                 atomic_set(&newsk->sk_wmem_alloc, 0);
949                 atomic_set(&newsk->sk_omem_alloc, 0);
950                 skb_queue_head_init(&newsk->sk_receive_queue);
951                 skb_queue_head_init(&newsk->sk_write_queue);
952 #ifdef CONFIG_NET_DMA
953                 skb_queue_head_init(&newsk->sk_async_wait_queue);
954 #endif
955
956                 rwlock_init(&newsk->sk_dst_lock);
957                 rwlock_init(&newsk->sk_callback_lock);
958                 lockdep_set_class_and_name(&newsk->sk_callback_lock,
959                                 af_callback_keys + newsk->sk_family,
960                                 af_family_clock_key_strings[newsk->sk_family]);
961
962                 newsk->sk_dst_cache     = NULL;
963                 newsk->sk_wmem_queued   = 0;
964                 newsk->sk_forward_alloc = 0;
965                 newsk->sk_send_head     = NULL;
966                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
967
968                 sock_reset_flag(newsk, SOCK_DONE);
969                 skb_queue_head_init(&newsk->sk_error_queue);
970
971                 filter = newsk->sk_filter;
972                 if (filter != NULL)
973                         sk_filter_charge(newsk, filter);
974
975                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
976                         /* It is still raw copy of parent, so invalidate
977                          * destructor and make plain sk_free() */
978                         newsk->sk_destruct = NULL;
979                         sk_free(newsk);
980                         newsk = NULL;
981                         goto out;
982                 }
983
984                 newsk->sk_err      = 0;
985                 newsk->sk_priority = 0;
986                 atomic_set(&newsk->sk_refcnt, 2);
987
988                 /*
989                  * Increment the counter in the same struct proto as the master
990                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
991                  * is the same as sk->sk_prot->socks, as this field was copied
992                  * with memcpy).
993                  *
994                  * This _changes_ the previous behaviour, where
995                  * tcp_create_openreq_child always was incrementing the
996                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
997                  * to be taken into account in all callers. -acme
998                  */
999                 sk_refcnt_debug_inc(newsk);
1000                 newsk->sk_socket = NULL;
1001                 newsk->sk_sleep  = NULL;
1002
1003                 if (newsk->sk_prot->sockets_allocated)
1004                         atomic_inc(newsk->sk_prot->sockets_allocated);
1005         }
1006 out:
1007         return newsk;
1008 }
1009
1010 EXPORT_SYMBOL_GPL(sk_clone);
1011
1012 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1013 {
1014         __sk_dst_set(sk, dst);
1015         sk->sk_route_caps = dst->dev->features;
1016         if (sk->sk_route_caps & NETIF_F_GSO)
1017                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1018         if (sk_can_gso(sk)) {
1019                 if (dst->header_len)
1020                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1021                 else
1022                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1023         }
1024 }
1025 EXPORT_SYMBOL_GPL(sk_setup_caps);
1026
1027 void __init sk_init(void)
1028 {
1029         if (num_physpages <= 4096) {
1030                 sysctl_wmem_max = 32767;
1031                 sysctl_rmem_max = 32767;
1032                 sysctl_wmem_default = 32767;
1033                 sysctl_rmem_default = 32767;
1034         } else if (num_physpages >= 131072) {
1035                 sysctl_wmem_max = 131071;
1036                 sysctl_rmem_max = 131071;
1037         }
1038 }
1039
1040 /*
1041  *      Simple resource managers for sockets.
1042  */
1043
1044
1045 /*
1046  * Write buffer destructor automatically called from kfree_skb.
1047  */
1048 void sock_wfree(struct sk_buff *skb)
1049 {
1050         struct sock *sk = skb->sk;
1051
1052         /* In case it might be waiting for more memory. */
1053         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1054         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1055                 sk->sk_write_space(sk);
1056         sock_put(sk);
1057 }
1058
1059 /*
1060  * Read buffer destructor automatically called from kfree_skb.
1061  */
1062 void sock_rfree(struct sk_buff *skb)
1063 {
1064         struct sock *sk = skb->sk;
1065
1066         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1067 }
1068
1069
1070 int sock_i_uid(struct sock *sk)
1071 {
1072         int uid;
1073
1074         read_lock(&sk->sk_callback_lock);
1075         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1076         read_unlock(&sk->sk_callback_lock);
1077         return uid;
1078 }
1079
1080 unsigned long sock_i_ino(struct sock *sk)
1081 {
1082         unsigned long ino;
1083
1084         read_lock(&sk->sk_callback_lock);
1085         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1086         read_unlock(&sk->sk_callback_lock);
1087         return ino;
1088 }
1089
1090 /*
1091  * Allocate a skb from the socket's send buffer.
1092  */
1093 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1094                              gfp_t priority)
1095 {
1096         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1097                 struct sk_buff * skb = alloc_skb(size, priority);
1098                 if (skb) {
1099                         skb_set_owner_w(skb, sk);
1100                         return skb;
1101                 }
1102         }
1103         return NULL;
1104 }
1105
1106 /*
1107  * Allocate a skb from the socket's receive buffer.
1108  */
1109 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1110                              gfp_t priority)
1111 {
1112         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1113                 struct sk_buff *skb = alloc_skb(size, priority);
1114                 if (skb) {
1115                         skb_set_owner_r(skb, sk);
1116                         return skb;
1117                 }
1118         }
1119         return NULL;
1120 }
1121
1122 /*
1123  * Allocate a memory block from the socket's option memory buffer.
1124  */
1125 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1126 {
1127         if ((unsigned)size <= sysctl_optmem_max &&
1128             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1129                 void *mem;
1130                 /* First do the add, to avoid the race if kmalloc
1131                  * might sleep.
1132                  */
1133                 atomic_add(size, &sk->sk_omem_alloc);
1134                 mem = kmalloc(size, priority);
1135                 if (mem)
1136                         return mem;
1137                 atomic_sub(size, &sk->sk_omem_alloc);
1138         }
1139         return NULL;
1140 }
1141
1142 /*
1143  * Free an option memory block.
1144  */
1145 void sock_kfree_s(struct sock *sk, void *mem, int size)
1146 {
1147         kfree(mem);
1148         atomic_sub(size, &sk->sk_omem_alloc);
1149 }
1150
1151 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1152    I think, these locks should be removed for datagram sockets.
1153  */
1154 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1155 {
1156         DEFINE_WAIT(wait);
1157
1158         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1159         for (;;) {
1160                 if (!timeo)
1161                         break;
1162                 if (signal_pending(current))
1163                         break;
1164                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1165                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1166                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1167                         break;
1168                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1169                         break;
1170                 if (sk->sk_err)
1171                         break;
1172                 timeo = schedule_timeout(timeo);
1173         }
1174         finish_wait(sk->sk_sleep, &wait);
1175         return timeo;
1176 }
1177
1178
1179 /*
1180  *      Generic send/receive buffer handlers
1181  */
1182
1183 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1184                                             unsigned long header_len,
1185                                             unsigned long data_len,
1186                                             int noblock, int *errcode)
1187 {
1188         struct sk_buff *skb;
1189         gfp_t gfp_mask;
1190         long timeo;
1191         int err;
1192
1193         gfp_mask = sk->sk_allocation;
1194         if (gfp_mask & __GFP_WAIT)
1195                 gfp_mask |= __GFP_REPEAT;
1196
1197         timeo = sock_sndtimeo(sk, noblock);
1198         while (1) {
1199                 err = sock_error(sk);
1200                 if (err != 0)
1201                         goto failure;
1202
1203                 err = -EPIPE;
1204                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1205                         goto failure;
1206
1207                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1208                         skb = alloc_skb(header_len, gfp_mask);
1209                         if (skb) {
1210                                 int npages;
1211                                 int i;
1212
1213                                 /* No pages, we're done... */
1214                                 if (!data_len)
1215                                         break;
1216
1217                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1218                                 skb->truesize += data_len;
1219                                 skb_shinfo(skb)->nr_frags = npages;
1220                                 for (i = 0; i < npages; i++) {
1221                                         struct page *page;
1222                                         skb_frag_t *frag;
1223
1224                                         page = alloc_pages(sk->sk_allocation, 0);
1225                                         if (!page) {
1226                                                 err = -ENOBUFS;
1227                                                 skb_shinfo(skb)->nr_frags = i;
1228                                                 kfree_skb(skb);
1229                                                 goto failure;
1230                                         }
1231
1232                                         frag = &skb_shinfo(skb)->frags[i];
1233                                         frag->page = page;
1234                                         frag->page_offset = 0;
1235                                         frag->size = (data_len >= PAGE_SIZE ?
1236                                                       PAGE_SIZE :
1237                                                       data_len);
1238                                         data_len -= PAGE_SIZE;
1239                                 }
1240
1241                                 /* Full success... */
1242                                 break;
1243                         }
1244                         err = -ENOBUFS;
1245                         goto failure;
1246                 }
1247                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1248                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1249                 err = -EAGAIN;
1250                 if (!timeo)
1251                         goto failure;
1252                 if (signal_pending(current))
1253                         goto interrupted;
1254                 timeo = sock_wait_for_wmem(sk, timeo);
1255         }
1256
1257         skb_set_owner_w(skb, sk);
1258         return skb;
1259
1260 interrupted:
1261         err = sock_intr_errno(timeo);
1262 failure:
1263         *errcode = err;
1264         return NULL;
1265 }
1266
1267 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1268                                     int noblock, int *errcode)
1269 {
1270         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1271 }
1272
1273 static void __lock_sock(struct sock *sk)
1274 {
1275         DEFINE_WAIT(wait);
1276
1277         for (;;) {
1278                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1279                                         TASK_UNINTERRUPTIBLE);
1280                 spin_unlock_bh(&sk->sk_lock.slock);
1281                 schedule();
1282                 spin_lock_bh(&sk->sk_lock.slock);
1283                 if (!sock_owned_by_user(sk))
1284                         break;
1285         }
1286         finish_wait(&sk->sk_lock.wq, &wait);
1287 }
1288
1289 static void __release_sock(struct sock *sk)
1290 {
1291         struct sk_buff *skb = sk->sk_backlog.head;
1292
1293         do {
1294                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1295                 bh_unlock_sock(sk);
1296
1297                 do {
1298                         struct sk_buff *next = skb->next;
1299
1300                         skb->next = NULL;
1301                         sk->sk_backlog_rcv(sk, skb);
1302
1303                         /*
1304                          * We are in process context here with softirqs
1305                          * disabled, use cond_resched_softirq() to preempt.
1306                          * This is safe to do because we've taken the backlog
1307                          * queue private:
1308                          */
1309                         cond_resched_softirq();
1310
1311                         skb = next;
1312                 } while (skb != NULL);
1313
1314                 bh_lock_sock(sk);
1315         } while ((skb = sk->sk_backlog.head) != NULL);
1316 }
1317
1318 /**
1319  * sk_wait_data - wait for data to arrive at sk_receive_queue
1320  * @sk:    sock to wait on
1321  * @timeo: for how long
1322  *
1323  * Now socket state including sk->sk_err is changed only under lock,
1324  * hence we may omit checks after joining wait queue.
1325  * We check receive queue before schedule() only as optimization;
1326  * it is very likely that release_sock() added new data.
1327  */
1328 int sk_wait_data(struct sock *sk, long *timeo)
1329 {
1330         int rc;
1331         DEFINE_WAIT(wait);
1332
1333         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1334         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1335         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1336         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1337         finish_wait(sk->sk_sleep, &wait);
1338         return rc;
1339 }
1340
1341 EXPORT_SYMBOL(sk_wait_data);
1342
1343 /*
1344  * Set of default routines for initialising struct proto_ops when
1345  * the protocol does not support a particular function. In certain
1346  * cases where it makes no sense for a protocol to have a "do nothing"
1347  * function, some default processing is provided.
1348  */
1349
1350 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1351 {
1352         return -EOPNOTSUPP;
1353 }
1354
1355 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1356                     int len, int flags)
1357 {
1358         return -EOPNOTSUPP;
1359 }
1360
1361 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1362 {
1363         return -EOPNOTSUPP;
1364 }
1365
1366 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1367 {
1368         return -EOPNOTSUPP;
1369 }
1370
1371 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1372                     int *len, int peer)
1373 {
1374         return -EOPNOTSUPP;
1375 }
1376
1377 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1378 {
1379         return 0;
1380 }
1381
1382 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1383 {
1384         return -EOPNOTSUPP;
1385 }
1386
1387 int sock_no_listen(struct socket *sock, int backlog)
1388 {
1389         return -EOPNOTSUPP;
1390 }
1391
1392 int sock_no_shutdown(struct socket *sock, int how)
1393 {
1394         return -EOPNOTSUPP;
1395 }
1396
1397 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1398                     char __user *optval, int optlen)
1399 {
1400         return -EOPNOTSUPP;
1401 }
1402
1403 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1404                     char __user *optval, int __user *optlen)
1405 {
1406         return -EOPNOTSUPP;
1407 }
1408
1409 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1410                     size_t len)
1411 {
1412         return -EOPNOTSUPP;
1413 }
1414
1415 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1416                     size_t len, int flags)
1417 {
1418         return -EOPNOTSUPP;
1419 }
1420
1421 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1422 {
1423         /* Mirror missing mmap method error code */
1424         return -ENODEV;
1425 }
1426
1427 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1428 {
1429         ssize_t res;
1430         struct msghdr msg = {.msg_flags = flags};
1431         struct kvec iov;
1432         char *kaddr = kmap(page);
1433         iov.iov_base = kaddr + offset;
1434         iov.iov_len = size;
1435         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1436         kunmap(page);
1437         return res;
1438 }
1439
1440 /*
1441  *      Default Socket Callbacks
1442  */
1443
1444 static void sock_def_wakeup(struct sock *sk)
1445 {
1446         read_lock(&sk->sk_callback_lock);
1447         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1448                 wake_up_interruptible_all(sk->sk_sleep);
1449         read_unlock(&sk->sk_callback_lock);
1450 }
1451
1452 static void sock_def_error_report(struct sock *sk)
1453 {
1454         read_lock(&sk->sk_callback_lock);
1455         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1456                 wake_up_interruptible(sk->sk_sleep);
1457         sk_wake_async(sk,0,POLL_ERR);
1458         read_unlock(&sk->sk_callback_lock);
1459 }
1460
1461 static void sock_def_readable(struct sock *sk, int len)
1462 {
1463         read_lock(&sk->sk_callback_lock);
1464         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1465                 wake_up_interruptible(sk->sk_sleep);
1466         sk_wake_async(sk,1,POLL_IN);
1467         read_unlock(&sk->sk_callback_lock);
1468 }
1469
1470 static void sock_def_write_space(struct sock *sk)
1471 {
1472         read_lock(&sk->sk_callback_lock);
1473
1474         /* Do not wake up a writer until he can make "significant"
1475          * progress.  --DaveM
1476          */
1477         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1478                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1479                         wake_up_interruptible(sk->sk_sleep);
1480
1481                 /* Should agree with poll, otherwise some programs break */
1482                 if (sock_writeable(sk))
1483                         sk_wake_async(sk, 2, POLL_OUT);
1484         }
1485
1486         read_unlock(&sk->sk_callback_lock);
1487 }
1488
1489 static void sock_def_destruct(struct sock *sk)
1490 {
1491         kfree(sk->sk_protinfo);
1492 }
1493
1494 void sk_send_sigurg(struct sock *sk)
1495 {
1496         if (sk->sk_socket && sk->sk_socket->file)
1497                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1498                         sk_wake_async(sk, 3, POLL_PRI);
1499 }
1500
1501 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1502                     unsigned long expires)
1503 {
1504         if (!mod_timer(timer, expires))
1505                 sock_hold(sk);
1506 }
1507
1508 EXPORT_SYMBOL(sk_reset_timer);
1509
1510 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1511 {
1512         if (timer_pending(timer) && del_timer(timer))
1513                 __sock_put(sk);
1514 }
1515
1516 EXPORT_SYMBOL(sk_stop_timer);
1517
1518 void sock_init_data(struct socket *sock, struct sock *sk)
1519 {
1520         skb_queue_head_init(&sk->sk_receive_queue);
1521         skb_queue_head_init(&sk->sk_write_queue);
1522         skb_queue_head_init(&sk->sk_error_queue);
1523 #ifdef CONFIG_NET_DMA
1524         skb_queue_head_init(&sk->sk_async_wait_queue);
1525 #endif
1526
1527         sk->sk_send_head        =       NULL;
1528
1529         init_timer(&sk->sk_timer);
1530
1531         sk->sk_allocation       =       GFP_KERNEL;
1532         sk->sk_rcvbuf           =       sysctl_rmem_default;
1533         sk->sk_sndbuf           =       sysctl_wmem_default;
1534         sk->sk_state            =       TCP_CLOSE;
1535         sk->sk_socket           =       sock;
1536
1537         sock_set_flag(sk, SOCK_ZAPPED);
1538
1539         if (sock) {
1540                 sk->sk_type     =       sock->type;
1541                 sk->sk_sleep    =       &sock->wait;
1542                 sock->sk        =       sk;
1543         } else
1544                 sk->sk_sleep    =       NULL;
1545
1546         rwlock_init(&sk->sk_dst_lock);
1547         rwlock_init(&sk->sk_callback_lock);
1548         lockdep_set_class_and_name(&sk->sk_callback_lock,
1549                         af_callback_keys + sk->sk_family,
1550                         af_family_clock_key_strings[sk->sk_family]);
1551
1552         sk->sk_state_change     =       sock_def_wakeup;
1553         sk->sk_data_ready       =       sock_def_readable;
1554         sk->sk_write_space      =       sock_def_write_space;
1555         sk->sk_error_report     =       sock_def_error_report;
1556         sk->sk_destruct         =       sock_def_destruct;
1557
1558         sk->sk_sndmsg_page      =       NULL;
1559         sk->sk_sndmsg_off       =       0;
1560
1561         sk->sk_peercred.pid     =       0;
1562         sk->sk_peercred.uid     =       -1;
1563         sk->sk_peercred.gid     =       -1;
1564         sk->sk_write_pending    =       0;
1565         sk->sk_rcvlowat         =       1;
1566         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1567         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1568
1569         sk->sk_stamp = ktime_set(-1L, -1L);
1570
1571         atomic_set(&sk->sk_refcnt, 1);
1572 }
1573
1574 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1575 {
1576         might_sleep();
1577         spin_lock_bh(&sk->sk_lock.slock);
1578         if (sk->sk_lock.owner)
1579                 __lock_sock(sk);
1580         sk->sk_lock.owner = (void *)1;
1581         spin_unlock(&sk->sk_lock.slock);
1582         /*
1583          * The sk_lock has mutex_lock() semantics here:
1584          */
1585         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1586         local_bh_enable();
1587 }
1588
1589 EXPORT_SYMBOL(lock_sock_nested);
1590
1591 void fastcall release_sock(struct sock *sk)
1592 {
1593         /*
1594          * The sk_lock has mutex_unlock() semantics:
1595          */
1596         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1597
1598         spin_lock_bh(&sk->sk_lock.slock);
1599         if (sk->sk_backlog.tail)
1600                 __release_sock(sk);
1601         sk->sk_lock.owner = NULL;
1602         if (waitqueue_active(&sk->sk_lock.wq))
1603                 wake_up(&sk->sk_lock.wq);
1604         spin_unlock_bh(&sk->sk_lock.slock);
1605 }
1606 EXPORT_SYMBOL(release_sock);
1607
1608 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1609 {
1610         struct timeval tv;
1611         if (!sock_flag(sk, SOCK_TIMESTAMP))
1612                 sock_enable_timestamp(sk);
1613         tv = ktime_to_timeval(sk->sk_stamp);
1614         if (tv.tv_sec == -1)
1615                 return -ENOENT;
1616         if (tv.tv_sec == 0) {
1617                 sk->sk_stamp = ktime_get_real();
1618                 tv = ktime_to_timeval(sk->sk_stamp);
1619         }
1620         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1621 }
1622 EXPORT_SYMBOL(sock_get_timestamp);
1623
1624 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1625 {
1626         struct timespec ts;
1627         if (!sock_flag(sk, SOCK_TIMESTAMP))
1628                 sock_enable_timestamp(sk);
1629         ts = ktime_to_timespec(sk->sk_stamp);
1630         if (ts.tv_sec == -1)
1631                 return -ENOENT;
1632         if (ts.tv_sec == 0) {
1633                 sk->sk_stamp = ktime_get_real();
1634                 ts = ktime_to_timespec(sk->sk_stamp);
1635         }
1636         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1637 }
1638 EXPORT_SYMBOL(sock_get_timestampns);
1639
1640 void sock_enable_timestamp(struct sock *sk)
1641 {
1642         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1643                 sock_set_flag(sk, SOCK_TIMESTAMP);
1644                 net_enable_timestamp();
1645         }
1646 }
1647 EXPORT_SYMBOL(sock_enable_timestamp);
1648
1649 /*
1650  *      Get a socket option on an socket.
1651  *
1652  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1653  *      asynchronous errors should be reported by getsockopt. We assume
1654  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1655  */
1656 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1657                            char __user *optval, int __user *optlen)
1658 {
1659         struct sock *sk = sock->sk;
1660
1661         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1662 }
1663
1664 EXPORT_SYMBOL(sock_common_getsockopt);
1665
1666 #ifdef CONFIG_COMPAT
1667 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1668                                   char __user *optval, int __user *optlen)
1669 {
1670         struct sock *sk = sock->sk;
1671
1672         if (sk->sk_prot->compat_getsockopt != NULL)
1673                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1674                                                       optval, optlen);
1675         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1676 }
1677 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1678 #endif
1679
1680 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1681                         struct msghdr *msg, size_t size, int flags)
1682 {
1683         struct sock *sk = sock->sk;
1684         int addr_len = 0;
1685         int err;
1686
1687         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1688                                    flags & ~MSG_DONTWAIT, &addr_len);
1689         if (err >= 0)
1690                 msg->msg_namelen = addr_len;
1691         return err;
1692 }
1693
1694 EXPORT_SYMBOL(sock_common_recvmsg);
1695
1696 /*
1697  *      Set socket options on an inet socket.
1698  */
1699 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1700                            char __user *optval, int optlen)
1701 {
1702         struct sock *sk = sock->sk;
1703
1704         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1705 }
1706
1707 EXPORT_SYMBOL(sock_common_setsockopt);
1708
1709 #ifdef CONFIG_COMPAT
1710 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1711                                   char __user *optval, int optlen)
1712 {
1713         struct sock *sk = sock->sk;
1714
1715         if (sk->sk_prot->compat_setsockopt != NULL)
1716                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1717                                                       optval, optlen);
1718         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1719 }
1720 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1721 #endif
1722
1723 void sk_common_release(struct sock *sk)
1724 {
1725         if (sk->sk_prot->destroy)
1726                 sk->sk_prot->destroy(sk);
1727
1728         /*
1729          * Observation: when sock_common_release is called, processes have
1730          * no access to socket. But net still has.
1731          * Step one, detach it from networking:
1732          *
1733          * A. Remove from hash tables.
1734          */
1735
1736         sk->sk_prot->unhash(sk);
1737
1738         /*
1739          * In this point socket cannot receive new packets, but it is possible
1740          * that some packets are in flight because some CPU runs receiver and
1741          * did hash table lookup before we unhashed socket. They will achieve
1742          * receive queue and will be purged by socket destructor.
1743          *
1744          * Also we still have packets pending on receive queue and probably,
1745          * our own packets waiting in device queues. sock_destroy will drain
1746          * receive queue, but transmitted packets will delay socket destruction
1747          * until the last reference will be released.
1748          */
1749
1750         sock_orphan(sk);
1751
1752         xfrm_sk_free_policy(sk);
1753
1754         sk_refcnt_debug_release(sk);
1755         sock_put(sk);
1756 }
1757
1758 EXPORT_SYMBOL(sk_common_release);
1759
1760 static DEFINE_RWLOCK(proto_list_lock);
1761 static LIST_HEAD(proto_list);
1762
1763 int proto_register(struct proto *prot, int alloc_slab)
1764 {
1765         char *request_sock_slab_name = NULL;
1766         char *timewait_sock_slab_name;
1767         int rc = -ENOBUFS;
1768
1769         if (alloc_slab) {
1770                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1771                                                SLAB_HWCACHE_ALIGN, NULL);
1772
1773                 if (prot->slab == NULL) {
1774                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1775                                prot->name);
1776                         goto out;
1777                 }
1778
1779                 if (prot->rsk_prot != NULL) {
1780                         static const char mask[] = "request_sock_%s";
1781
1782                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1783                         if (request_sock_slab_name == NULL)
1784                                 goto out_free_sock_slab;
1785
1786                         sprintf(request_sock_slab_name, mask, prot->name);
1787                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1788                                                                  prot->rsk_prot->obj_size, 0,
1789                                                                  SLAB_HWCACHE_ALIGN, NULL);
1790
1791                         if (prot->rsk_prot->slab == NULL) {
1792                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1793                                        prot->name);
1794                                 goto out_free_request_sock_slab_name;
1795                         }
1796                 }
1797
1798                 if (prot->twsk_prot != NULL) {
1799                         static const char mask[] = "tw_sock_%s";
1800
1801                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1802
1803                         if (timewait_sock_slab_name == NULL)
1804                                 goto out_free_request_sock_slab;
1805
1806                         sprintf(timewait_sock_slab_name, mask, prot->name);
1807                         prot->twsk_prot->twsk_slab =
1808                                 kmem_cache_create(timewait_sock_slab_name,
1809                                                   prot->twsk_prot->twsk_obj_size,
1810                                                   0, SLAB_HWCACHE_ALIGN,
1811                                                   NULL);
1812                         if (prot->twsk_prot->twsk_slab == NULL)
1813                                 goto out_free_timewait_sock_slab_name;
1814                 }
1815         }
1816
1817         write_lock(&proto_list_lock);
1818         list_add(&prot->node, &proto_list);
1819         write_unlock(&proto_list_lock);
1820         rc = 0;
1821 out:
1822         return rc;
1823 out_free_timewait_sock_slab_name:
1824         kfree(timewait_sock_slab_name);
1825 out_free_request_sock_slab:
1826         if (prot->rsk_prot && prot->rsk_prot->slab) {
1827                 kmem_cache_destroy(prot->rsk_prot->slab);
1828                 prot->rsk_prot->slab = NULL;
1829         }
1830 out_free_request_sock_slab_name:
1831         kfree(request_sock_slab_name);
1832 out_free_sock_slab:
1833         kmem_cache_destroy(prot->slab);
1834         prot->slab = NULL;
1835         goto out;
1836 }
1837
1838 EXPORT_SYMBOL(proto_register);
1839
1840 void proto_unregister(struct proto *prot)
1841 {
1842         write_lock(&proto_list_lock);
1843         list_del(&prot->node);
1844         write_unlock(&proto_list_lock);
1845
1846         if (prot->slab != NULL) {
1847                 kmem_cache_destroy(prot->slab);
1848                 prot->slab = NULL;
1849         }
1850
1851         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1852                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1853
1854                 kmem_cache_destroy(prot->rsk_prot->slab);
1855                 kfree(name);
1856                 prot->rsk_prot->slab = NULL;
1857         }
1858
1859         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1860                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1861
1862                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1863                 kfree(name);
1864                 prot->twsk_prot->twsk_slab = NULL;
1865         }
1866 }
1867
1868 EXPORT_SYMBOL(proto_unregister);
1869
1870 #ifdef CONFIG_PROC_FS
1871 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1872 {
1873         read_lock(&proto_list_lock);
1874         return seq_list_start_head(&proto_list, *pos);
1875 }
1876
1877 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1878 {
1879         return seq_list_next(v, &proto_list, pos);
1880 }
1881
1882 static void proto_seq_stop(struct seq_file *seq, void *v)
1883 {
1884         read_unlock(&proto_list_lock);
1885 }
1886
1887 static char proto_method_implemented(const void *method)
1888 {
1889         return method == NULL ? 'n' : 'y';
1890 }
1891
1892 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1893 {
1894         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1895                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1896                    proto->name,
1897                    proto->obj_size,
1898                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1899                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1900                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1901                    proto->max_header,
1902                    proto->slab == NULL ? "no" : "yes",
1903                    module_name(proto->owner),
1904                    proto_method_implemented(proto->close),
1905                    proto_method_implemented(proto->connect),
1906                    proto_method_implemented(proto->disconnect),
1907                    proto_method_implemented(proto->accept),
1908                    proto_method_implemented(proto->ioctl),
1909                    proto_method_implemented(proto->init),
1910                    proto_method_implemented(proto->destroy),
1911                    proto_method_implemented(proto->shutdown),
1912                    proto_method_implemented(proto->setsockopt),
1913                    proto_method_implemented(proto->getsockopt),
1914                    proto_method_implemented(proto->sendmsg),
1915                    proto_method_implemented(proto->recvmsg),
1916                    proto_method_implemented(proto->sendpage),
1917                    proto_method_implemented(proto->bind),
1918                    proto_method_implemented(proto->backlog_rcv),
1919                    proto_method_implemented(proto->hash),
1920                    proto_method_implemented(proto->unhash),
1921                    proto_method_implemented(proto->get_port),
1922                    proto_method_implemented(proto->enter_memory_pressure));
1923 }
1924
1925 static int proto_seq_show(struct seq_file *seq, void *v)
1926 {
1927         if (v == &proto_list)
1928                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1929                            "protocol",
1930                            "size",
1931                            "sockets",
1932                            "memory",
1933                            "press",
1934                            "maxhdr",
1935                            "slab",
1936                            "module",
1937                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1938         else
1939                 proto_seq_printf(seq, list_entry(v, struct proto, node));
1940         return 0;
1941 }
1942
1943 static const struct seq_operations proto_seq_ops = {
1944         .start  = proto_seq_start,
1945         .next   = proto_seq_next,
1946         .stop   = proto_seq_stop,
1947         .show   = proto_seq_show,
1948 };
1949
1950 static int proto_seq_open(struct inode *inode, struct file *file)
1951 {
1952         return seq_open(file, &proto_seq_ops);
1953 }
1954
1955 static const struct file_operations proto_seq_fops = {
1956         .owner          = THIS_MODULE,
1957         .open           = proto_seq_open,
1958         .read           = seq_read,
1959         .llseek         = seq_lseek,
1960         .release        = seq_release,
1961 };
1962
1963 static int __init proto_init(void)
1964 {
1965         /* register /proc/net/protocols */
1966         return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1967 }
1968
1969 subsys_initcall(proto_init);
1970
1971 #endif /* PROC_FS */
1972
1973 EXPORT_SYMBOL(sk_alloc);
1974 EXPORT_SYMBOL(sk_free);
1975 EXPORT_SYMBOL(sk_send_sigurg);
1976 EXPORT_SYMBOL(sock_alloc_send_skb);
1977 EXPORT_SYMBOL(sock_init_data);
1978 EXPORT_SYMBOL(sock_kfree_s);
1979 EXPORT_SYMBOL(sock_kmalloc);
1980 EXPORT_SYMBOL(sock_no_accept);
1981 EXPORT_SYMBOL(sock_no_bind);
1982 EXPORT_SYMBOL(sock_no_connect);
1983 EXPORT_SYMBOL(sock_no_getname);
1984 EXPORT_SYMBOL(sock_no_getsockopt);
1985 EXPORT_SYMBOL(sock_no_ioctl);
1986 EXPORT_SYMBOL(sock_no_listen);
1987 EXPORT_SYMBOL(sock_no_mmap);
1988 EXPORT_SYMBOL(sock_no_poll);
1989 EXPORT_SYMBOL(sock_no_recvmsg);
1990 EXPORT_SYMBOL(sock_no_sendmsg);
1991 EXPORT_SYMBOL(sock_no_sendpage);
1992 EXPORT_SYMBOL(sock_no_setsockopt);
1993 EXPORT_SYMBOL(sock_no_shutdown);
1994 EXPORT_SYMBOL(sock_no_socketpair);
1995 EXPORT_SYMBOL(sock_rfree);
1996 EXPORT_SYMBOL(sock_setsockopt);
1997 EXPORT_SYMBOL(sock_wfree);
1998 EXPORT_SYMBOL(sock_wmalloc);
1999 EXPORT_SYMBOL(sock_i_uid);
2000 EXPORT_SYMBOL(sock_i_ino);
2001 EXPORT_SYMBOL(sysctl_optmem_max);
2002 #ifdef CONFIG_SYSCTL
2003 EXPORT_SYMBOL(sysctl_rmem_max);
2004 EXPORT_SYMBOL(sysctl_wmem_max);
2005 #endif