]> git.karo-electronics.de Git - karo-tx-linux.git/blob - ipc/shm.c
Force this on for -next/mm testing purposes.
[karo-tx-linux.git] / ipc / shm.c
1 /*
2  * linux/ipc/shm.c
3  * Copyright (C) 1992, 1993 Krishna Balasubramanian
4  *       Many improvements/fixes by Bruno Haible.
5  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7  *
8  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15  *
16  * support for audit of ipc object properties and permission changes
17  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18  *
19  * namespaces support
20  * OpenVZ, SWsoft Inc.
21  * Pavel Emelianov <xemul@openvz.org>
22  */
23
24 #include <linux/slab.h>
25 #include <linux/mm.h>
26 #include <linux/hugetlb.h>
27 #include <linux/shm.h>
28 #include <linux/init.h>
29 #include <linux/file.h>
30 #include <linux/mman.h>
31 #include <linux/shmem_fs.h>
32 #include <linux/security.h>
33 #include <linux/syscalls.h>
34 #include <linux/audit.h>
35 #include <linux/capability.h>
36 #include <linux/ptrace.h>
37 #include <linux/seq_file.h>
38 #include <linux/rwsem.h>
39 #include <linux/nsproxy.h>
40 #include <linux/mount.h>
41 #include <linux/ipc_namespace.h>
42
43 #include <asm/uaccess.h>
44
45 #include "util.h"
46
47 struct shm_file_data {
48         int id;
49         struct ipc_namespace *ns;
50         struct file *file;
51         const struct vm_operations_struct *vm_ops;
52 };
53
54 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
55
56 static const struct file_operations shm_file_operations;
57 static const struct vm_operations_struct shm_vm_ops;
58
59 #define shm_ids(ns)     ((ns)->ids[IPC_SHM_IDS])
60
61 #define shm_unlock(shp)                 \
62         ipc_unlock(&(shp)->shm_perm)
63
64 static int newseg(struct ipc_namespace *, struct ipc_params *);
65 static void shm_open(struct vm_area_struct *vma);
66 static void shm_close(struct vm_area_struct *vma);
67 static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
68 #ifdef CONFIG_PROC_FS
69 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
70 #endif
71
72 void shm_init_ns(struct ipc_namespace *ns)
73 {
74         ns->shm_ctlmax = SHMMAX;
75         ns->shm_ctlall = SHMALL;
76         ns->shm_ctlmni = SHMMNI;
77         ns->shm_rmid_forced = 1;
78         ns->shm_tot = 0;
79
80         /*
81          * For init_ipc_ns shm_ids().rw_mutex is statically initialized
82          * as kernel threads should be able to use it in do_exit() before
83          * shm_init(), which is called on do_initcall()
84          */
85         if (ns == &init_ipc_ns)
86                 __ipc_init_ids(&shm_ids(ns));
87         else
88                 ipc_init_ids(&shm_ids(ns));
89 }
90
91 /*
92  * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
93  * Only shm_ids.rw_mutex remains locked on exit.
94  */
95 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
96 {
97         struct shmid_kernel *shp;
98         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
99
100         if (shp->shm_nattch){
101                 shp->shm_perm.mode |= SHM_DEST;
102                 /* Do not find it any more */
103                 shp->shm_perm.key = IPC_PRIVATE;
104                 shm_unlock(shp);
105         } else
106                 shm_destroy(ns, shp);
107 }
108
109 #ifdef CONFIG_IPC_NS
110 void shm_exit_ns(struct ipc_namespace *ns)
111 {
112         free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
113         idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
114 }
115 #endif
116
117 static int __init ipc_ns_init(void)
118 {
119         shm_init_ns(&init_ipc_ns);
120         return 0;
121 }
122
123 pure_initcall(ipc_ns_init);
124
125 void __init shm_init (void)
126 {
127         ipc_init_proc_interface("sysvipc/shm",
128 #if BITS_PER_LONG <= 32
129                                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
130 #else
131                                 "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
132 #endif
133                                 IPC_SHM_IDS, sysvipc_shm_proc_show);
134 }
135
136 /*
137  * shm_lock_(check_) routines are called in the paths where the rw_mutex
138  * is not necessarily held.
139  */
140 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
141 {
142         struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
143
144         if (IS_ERR(ipcp))
145                 return (struct shmid_kernel *)ipcp;
146
147         return container_of(ipcp, struct shmid_kernel, shm_perm);
148 }
149
150 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
151 {
152         rcu_read_lock();
153         spin_lock(&ipcp->shm_perm.lock);
154 }
155
156 static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
157                                                 int id)
158 {
159         struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
160
161         if (IS_ERR(ipcp))
162                 return (struct shmid_kernel *)ipcp;
163
164         return container_of(ipcp, struct shmid_kernel, shm_perm);
165 }
166
167 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
168 {
169         ipc_rmid(&shm_ids(ns), &s->shm_perm);
170 }
171
172
173 /* This is called by fork, once for every shm attach. */
174 static void shm_open(struct vm_area_struct *vma)
175 {
176         struct file *file = vma->vm_file;
177         struct shm_file_data *sfd = shm_file_data(file);
178         struct shmid_kernel *shp;
179
180         shp = shm_lock(sfd->ns, sfd->id);
181         BUG_ON(IS_ERR(shp));
182         shp->shm_atim = get_seconds();
183         shp->shm_lprid = task_tgid_vnr(current);
184         shp->shm_nattch++;
185         shm_unlock(shp);
186 }
187
188 /*
189  * shm_destroy - free the struct shmid_kernel
190  *
191  * @ns: namespace
192  * @shp: struct to free
193  *
194  * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
195  * but returns with shp unlocked and freed.
196  */
197 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
198 {
199         ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
200         shm_rmid(ns, shp);
201         shm_unlock(shp);
202         if (!is_file_hugepages(shp->shm_file))
203                 shmem_lock(shp->shm_file, 0, shp->mlock_user);
204         else if (shp->mlock_user)
205                 user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
206                                                 shp->mlock_user);
207         fput (shp->shm_file);
208         security_shm_free(shp);
209         ipc_rcu_putref(shp);
210 }
211
212 /*
213  * shm_may_destroy - identifies whether shm segment should be destroyed now
214  *
215  * Returns true if and only if there are no active users of the segment and
216  * one of the following is true:
217  *
218  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
219  *
220  * 2) sysctl kernel.shm_rmid_forced is set to 1.
221  */
222 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
223 {
224         return (shp->shm_nattch == 0) &&
225                (ns->shm_rmid_forced ||
226                 (shp->shm_perm.mode & SHM_DEST));
227 }
228
229 /*
230  * remove the attach descriptor vma.
231  * free memory for segment if it is marked destroyed.
232  * The descriptor has already been removed from the current->mm->mmap list
233  * and will later be kfree()d.
234  */
235 static void shm_close(struct vm_area_struct *vma)
236 {
237         struct file * file = vma->vm_file;
238         struct shm_file_data *sfd = shm_file_data(file);
239         struct shmid_kernel *shp;
240         struct ipc_namespace *ns = sfd->ns;
241
242         down_write(&shm_ids(ns).rw_mutex);
243         /* remove from the list of attaches of the shm segment */
244         shp = shm_lock(ns, sfd->id);
245         BUG_ON(IS_ERR(shp));
246         shp->shm_lprid = task_tgid_vnr(current);
247         shp->shm_dtim = get_seconds();
248         shp->shm_nattch--;
249         if (shm_may_destroy(ns, shp))
250                 shm_destroy(ns, shp);
251         else
252                 shm_unlock(shp);
253         up_write(&shm_ids(ns).rw_mutex);
254 }
255
256 /* Called with ns->shm_ids(ns).rw_mutex locked */
257 static int shm_try_destroy_current(int id, void *p, void *data)
258 {
259         struct ipc_namespace *ns = data;
260         struct kern_ipc_perm *ipcp = p;
261         struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
262
263         if (shp->shm_creator != current)
264                 return 0;
265
266         /*
267          * Mark it as orphaned to destroy the segment when
268          * kernel.shm_rmid_forced is changed.
269          * It is noop if the following shm_may_destroy() returns true.
270          */
271         shp->shm_creator = NULL;
272
273         /*
274          * Don't even try to destroy it.  If shm_rmid_forced=0 and IPC_RMID
275          * is not set, it shouldn't be deleted here.
276          */
277         if (!ns->shm_rmid_forced)
278                 return 0;
279
280         if (shm_may_destroy(ns, shp)) {
281                 shm_lock_by_ptr(shp);
282                 shm_destroy(ns, shp);
283         }
284         return 0;
285 }
286
287 /* Called with ns->shm_ids(ns).rw_mutex locked */
288 static int shm_try_destroy_orphaned(int id, void *p, void *data)
289 {
290         struct ipc_namespace *ns = data;
291         struct kern_ipc_perm *ipcp = p;
292         struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
293
294         /*
295          * We want to destroy segments without users and with already
296          * exit'ed originating process.
297          *
298          * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
299          */
300         if (shp->shm_creator != NULL)
301                 return 0;
302
303         if (shm_may_destroy(ns, shp)) {
304                 shm_lock_by_ptr(shp);
305                 shm_destroy(ns, shp);
306         }
307         return 0;
308 }
309
310 void shm_destroy_orphaned(struct ipc_namespace *ns)
311 {
312         down_write(&shm_ids(ns).rw_mutex);
313         if (shm_ids(ns).in_use)
314                 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
315         up_write(&shm_ids(ns).rw_mutex);
316 }
317
318
319 void exit_shm(struct task_struct *task)
320 {
321         struct ipc_namespace *ns = task->nsproxy->ipc_ns;
322
323         if (shm_ids(ns).in_use == 0)
324                 return;
325
326         /* Destroy all already created segments, but not mapped yet */
327         down_write(&shm_ids(ns).rw_mutex);
328         if (shm_ids(ns).in_use)
329                 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
330         up_write(&shm_ids(ns).rw_mutex);
331 }
332
333 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
334 {
335         struct file *file = vma->vm_file;
336         struct shm_file_data *sfd = shm_file_data(file);
337
338         return sfd->vm_ops->fault(vma, vmf);
339 }
340
341 #ifdef CONFIG_NUMA
342 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
343 {
344         struct file *file = vma->vm_file;
345         struct shm_file_data *sfd = shm_file_data(file);
346         int err = 0;
347         if (sfd->vm_ops->set_policy)
348                 err = sfd->vm_ops->set_policy(vma, new);
349         return err;
350 }
351
352 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
353                                         unsigned long addr)
354 {
355         struct file *file = vma->vm_file;
356         struct shm_file_data *sfd = shm_file_data(file);
357         struct mempolicy *pol = NULL;
358
359         if (sfd->vm_ops->get_policy)
360                 pol = sfd->vm_ops->get_policy(vma, addr);
361         else if (vma->vm_policy)
362                 pol = vma->vm_policy;
363
364         return pol;
365 }
366 #endif
367
368 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
369 {
370         struct shm_file_data *sfd = shm_file_data(file);
371         int ret;
372
373         ret = sfd->file->f_op->mmap(sfd->file, vma);
374         if (ret != 0)
375                 return ret;
376         sfd->vm_ops = vma->vm_ops;
377 #ifdef CONFIG_MMU
378         BUG_ON(!sfd->vm_ops->fault);
379 #endif
380         vma->vm_ops = &shm_vm_ops;
381         shm_open(vma);
382
383         return ret;
384 }
385
386 static int shm_release(struct inode *ino, struct file *file)
387 {
388         struct shm_file_data *sfd = shm_file_data(file);
389
390         put_ipc_ns(sfd->ns);
391         shm_file_data(file) = NULL;
392         kfree(sfd);
393         return 0;
394 }
395
396 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
397 {
398         struct shm_file_data *sfd = shm_file_data(file);
399
400         if (!sfd->file->f_op->fsync)
401                 return -EINVAL;
402         return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
403 }
404
405 static unsigned long shm_get_unmapped_area(struct file *file,
406         unsigned long addr, unsigned long len, unsigned long pgoff,
407         unsigned long flags)
408 {
409         struct shm_file_data *sfd = shm_file_data(file);
410         return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
411                                                 pgoff, flags);
412 }
413
414 static const struct file_operations shm_file_operations = {
415         .mmap           = shm_mmap,
416         .fsync          = shm_fsync,
417         .release        = shm_release,
418 #ifndef CONFIG_MMU
419         .get_unmapped_area      = shm_get_unmapped_area,
420 #endif
421         .llseek         = noop_llseek,
422 };
423
424 static const struct file_operations shm_file_operations_huge = {
425         .mmap           = shm_mmap,
426         .fsync          = shm_fsync,
427         .release        = shm_release,
428         .get_unmapped_area      = shm_get_unmapped_area,
429         .llseek         = noop_llseek,
430 };
431
432 int is_file_shm_hugepages(struct file *file)
433 {
434         return file->f_op == &shm_file_operations_huge;
435 }
436
437 static const struct vm_operations_struct shm_vm_ops = {
438         .open   = shm_open,     /* callback for a new vm-area open */
439         .close  = shm_close,    /* callback for when the vm-area is released */
440         .fault  = shm_fault,
441 #if defined(CONFIG_NUMA)
442         .set_policy = shm_set_policy,
443         .get_policy = shm_get_policy,
444 #endif
445 };
446
447 /**
448  * newseg - Create a new shared memory segment
449  * @ns: namespace
450  * @params: ptr to the structure that contains key, size and shmflg
451  *
452  * Called with shm_ids.rw_mutex held as a writer.
453  */
454
455 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
456 {
457         key_t key = params->key;
458         int shmflg = params->flg;
459         size_t size = params->u.size;
460         int error;
461         struct shmid_kernel *shp;
462         int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
463         struct file * file;
464         char name[13];
465         int id;
466         vm_flags_t acctflag = 0;
467
468         if (size < SHMMIN || size > ns->shm_ctlmax)
469                 return -EINVAL;
470
471         if (ns->shm_tot + numpages > ns->shm_ctlall)
472                 return -ENOSPC;
473
474         shp = ipc_rcu_alloc(sizeof(*shp));
475         if (!shp)
476                 return -ENOMEM;
477
478         shp->shm_perm.key = key;
479         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
480         shp->mlock_user = NULL;
481
482         shp->shm_perm.security = NULL;
483         error = security_shm_alloc(shp);
484         if (error) {
485                 ipc_rcu_putref(shp);
486                 return error;
487         }
488
489         sprintf (name, "SYSV%08x", key);
490         if (shmflg & SHM_HUGETLB) {
491                 /* hugetlb_file_setup applies strict accounting */
492                 if (shmflg & SHM_NORESERVE)
493                         acctflag = VM_NORESERVE;
494                 file = hugetlb_file_setup(name, size, acctflag,
495                                         &shp->mlock_user, HUGETLB_SHMFS_INODE);
496         } else {
497                 /*
498                  * Do not allow no accounting for OVERCOMMIT_NEVER, even
499                  * if it's asked for.
500                  */
501                 if  ((shmflg & SHM_NORESERVE) &&
502                                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
503                         acctflag = VM_NORESERVE;
504                 file = shmem_file_setup(name, size, acctflag);
505         }
506         error = PTR_ERR(file);
507         if (IS_ERR(file))
508                 goto no_file;
509
510         id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
511         if (id < 0) {
512                 error = id;
513                 goto no_id;
514         }
515
516         shp->shm_cprid = task_tgid_vnr(current);
517         shp->shm_lprid = 0;
518         shp->shm_atim = shp->shm_dtim = 0;
519         shp->shm_ctim = get_seconds();
520         shp->shm_segsz = size;
521         shp->shm_nattch = 0;
522         shp->shm_file = file;
523         shp->shm_creator = current;
524         /*
525          * shmid gets reported as "inode#" in /proc/pid/maps.
526          * proc-ps tools use this. Changing this will break them.
527          */
528         file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
529
530         ns->shm_tot += numpages;
531         error = shp->shm_perm.id;
532         shm_unlock(shp);
533         return error;
534
535 no_id:
536         if (is_file_hugepages(file) && shp->mlock_user)
537                 user_shm_unlock(size, shp->mlock_user);
538         fput(file);
539 no_file:
540         security_shm_free(shp);
541         ipc_rcu_putref(shp);
542         return error;
543 }
544
545 /*
546  * Called with shm_ids.rw_mutex and ipcp locked.
547  */
548 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
549 {
550         struct shmid_kernel *shp;
551
552         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
553         return security_shm_associate(shp, shmflg);
554 }
555
556 /*
557  * Called with shm_ids.rw_mutex and ipcp locked.
558  */
559 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
560                                 struct ipc_params *params)
561 {
562         struct shmid_kernel *shp;
563
564         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
565         if (shp->shm_segsz < params->u.size)
566                 return -EINVAL;
567
568         return 0;
569 }
570
571 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
572 {
573         struct ipc_namespace *ns;
574         struct ipc_ops shm_ops;
575         struct ipc_params shm_params;
576
577         ns = current->nsproxy->ipc_ns;
578
579         shm_ops.getnew = newseg;
580         shm_ops.associate = shm_security;
581         shm_ops.more_checks = shm_more_checks;
582
583         shm_params.key = key;
584         shm_params.flg = shmflg;
585         shm_params.u.size = size;
586
587         return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
588 }
589
590 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
591 {
592         switch(version) {
593         case IPC_64:
594                 return copy_to_user(buf, in, sizeof(*in));
595         case IPC_OLD:
596             {
597                 struct shmid_ds out;
598
599                 memset(&out, 0, sizeof(out));
600                 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
601                 out.shm_segsz   = in->shm_segsz;
602                 out.shm_atime   = in->shm_atime;
603                 out.shm_dtime   = in->shm_dtime;
604                 out.shm_ctime   = in->shm_ctime;
605                 out.shm_cpid    = in->shm_cpid;
606                 out.shm_lpid    = in->shm_lpid;
607                 out.shm_nattch  = in->shm_nattch;
608
609                 return copy_to_user(buf, &out, sizeof(out));
610             }
611         default:
612                 return -EINVAL;
613         }
614 }
615
616 static inline unsigned long
617 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
618 {
619         switch(version) {
620         case IPC_64:
621                 if (copy_from_user(out, buf, sizeof(*out)))
622                         return -EFAULT;
623                 return 0;
624         case IPC_OLD:
625             {
626                 struct shmid_ds tbuf_old;
627
628                 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
629                         return -EFAULT;
630
631                 out->shm_perm.uid       = tbuf_old.shm_perm.uid;
632                 out->shm_perm.gid       = tbuf_old.shm_perm.gid;
633                 out->shm_perm.mode      = tbuf_old.shm_perm.mode;
634
635                 return 0;
636             }
637         default:
638                 return -EINVAL;
639         }
640 }
641
642 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
643 {
644         switch(version) {
645         case IPC_64:
646                 return copy_to_user(buf, in, sizeof(*in));
647         case IPC_OLD:
648             {
649                 struct shminfo out;
650
651                 if(in->shmmax > INT_MAX)
652                         out.shmmax = INT_MAX;
653                 else
654                         out.shmmax = (int)in->shmmax;
655
656                 out.shmmin      = in->shmmin;
657                 out.shmmni      = in->shmmni;
658                 out.shmseg      = in->shmseg;
659                 out.shmall      = in->shmall; 
660
661                 return copy_to_user(buf, &out, sizeof(out));
662             }
663         default:
664                 return -EINVAL;
665         }
666 }
667
668 /*
669  * Calculate and add used RSS and swap pages of a shm.
670  * Called with shm_ids.rw_mutex held as a reader
671  */
672 static void shm_add_rss_swap(struct shmid_kernel *shp,
673         unsigned long *rss_add, unsigned long *swp_add)
674 {
675         struct inode *inode;
676
677         inode = shp->shm_file->f_path.dentry->d_inode;
678
679         if (is_file_hugepages(shp->shm_file)) {
680                 struct address_space *mapping = inode->i_mapping;
681                 struct hstate *h = hstate_file(shp->shm_file);
682                 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
683         } else {
684 #ifdef CONFIG_SHMEM
685                 struct shmem_inode_info *info = SHMEM_I(inode);
686                 spin_lock(&info->lock);
687                 *rss_add += inode->i_mapping->nrpages;
688                 *swp_add += info->swapped;
689                 spin_unlock(&info->lock);
690 #else
691                 *rss_add += inode->i_mapping->nrpages;
692 #endif
693         }
694 }
695
696 /*
697  * Called with shm_ids.rw_mutex held as a reader
698  */
699 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
700                 unsigned long *swp)
701 {
702         int next_id;
703         int total, in_use;
704
705         *rss = 0;
706         *swp = 0;
707
708         in_use = shm_ids(ns).in_use;
709
710         for (total = 0, next_id = 0; total < in_use; next_id++) {
711                 struct kern_ipc_perm *ipc;
712                 struct shmid_kernel *shp;
713
714                 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
715                 if (ipc == NULL)
716                         continue;
717                 shp = container_of(ipc, struct shmid_kernel, shm_perm);
718
719                 shm_add_rss_swap(shp, rss, swp);
720
721                 total++;
722         }
723 }
724
725 /*
726  * This function handles some shmctl commands which require the rw_mutex
727  * to be held in write mode.
728  * NOTE: no locks must be held, the rw_mutex is taken inside this function.
729  */
730 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
731                        struct shmid_ds __user *buf, int version)
732 {
733         struct kern_ipc_perm *ipcp;
734         struct shmid64_ds shmid64;
735         struct shmid_kernel *shp;
736         int err;
737
738         if (cmd == IPC_SET) {
739                 if (copy_shmid_from_user(&shmid64, buf, version))
740                         return -EFAULT;
741         }
742
743         ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
744                                &shmid64.shm_perm, 0);
745         if (IS_ERR(ipcp))
746                 return PTR_ERR(ipcp);
747
748         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
749
750         err = security_shm_shmctl(shp, cmd);
751         if (err)
752                 goto out_unlock;
753         switch (cmd) {
754         case IPC_RMID:
755                 do_shm_rmid(ns, ipcp);
756                 goto out_up;
757         case IPC_SET:
758                 ipc_update_perm(&shmid64.shm_perm, ipcp);
759                 shp->shm_ctim = get_seconds();
760                 break;
761         default:
762                 err = -EINVAL;
763         }
764 out_unlock:
765         shm_unlock(shp);
766 out_up:
767         up_write(&shm_ids(ns).rw_mutex);
768         return err;
769 }
770
771 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
772 {
773         struct shmid_kernel *shp;
774         int err, version;
775         struct ipc_namespace *ns;
776
777         if (cmd < 0 || shmid < 0) {
778                 err = -EINVAL;
779                 goto out;
780         }
781
782         version = ipc_parse_version(&cmd);
783         ns = current->nsproxy->ipc_ns;
784
785         switch (cmd) { /* replace with proc interface ? */
786         case IPC_INFO:
787         {
788                 struct shminfo64 shminfo;
789
790                 err = security_shm_shmctl(NULL, cmd);
791                 if (err)
792                         return err;
793
794                 memset(&shminfo, 0, sizeof(shminfo));
795                 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
796                 shminfo.shmmax = ns->shm_ctlmax;
797                 shminfo.shmall = ns->shm_ctlall;
798
799                 shminfo.shmmin = SHMMIN;
800                 if(copy_shminfo_to_user (buf, &shminfo, version))
801                         return -EFAULT;
802
803                 down_read(&shm_ids(ns).rw_mutex);
804                 err = ipc_get_maxid(&shm_ids(ns));
805                 up_read(&shm_ids(ns).rw_mutex);
806
807                 if(err<0)
808                         err = 0;
809                 goto out;
810         }
811         case SHM_INFO:
812         {
813                 struct shm_info shm_info;
814
815                 err = security_shm_shmctl(NULL, cmd);
816                 if (err)
817                         return err;
818
819                 memset(&shm_info, 0, sizeof(shm_info));
820                 down_read(&shm_ids(ns).rw_mutex);
821                 shm_info.used_ids = shm_ids(ns).in_use;
822                 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
823                 shm_info.shm_tot = ns->shm_tot;
824                 shm_info.swap_attempts = 0;
825                 shm_info.swap_successes = 0;
826                 err = ipc_get_maxid(&shm_ids(ns));
827                 up_read(&shm_ids(ns).rw_mutex);
828                 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
829                         err = -EFAULT;
830                         goto out;
831                 }
832
833                 err = err < 0 ? 0 : err;
834                 goto out;
835         }
836         case SHM_STAT:
837         case IPC_STAT:
838         {
839                 struct shmid64_ds tbuf;
840                 int result;
841
842                 if (cmd == SHM_STAT) {
843                         shp = shm_lock(ns, shmid);
844                         if (IS_ERR(shp)) {
845                                 err = PTR_ERR(shp);
846                                 goto out;
847                         }
848                         result = shp->shm_perm.id;
849                 } else {
850                         shp = shm_lock_check(ns, shmid);
851                         if (IS_ERR(shp)) {
852                                 err = PTR_ERR(shp);
853                                 goto out;
854                         }
855                         result = 0;
856                 }
857                 err = -EACCES;
858                 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
859                         goto out_unlock;
860                 err = security_shm_shmctl(shp, cmd);
861                 if (err)
862                         goto out_unlock;
863                 memset(&tbuf, 0, sizeof(tbuf));
864                 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
865                 tbuf.shm_segsz  = shp->shm_segsz;
866                 tbuf.shm_atime  = shp->shm_atim;
867                 tbuf.shm_dtime  = shp->shm_dtim;
868                 tbuf.shm_ctime  = shp->shm_ctim;
869                 tbuf.shm_cpid   = shp->shm_cprid;
870                 tbuf.shm_lpid   = shp->shm_lprid;
871                 tbuf.shm_nattch = shp->shm_nattch;
872                 shm_unlock(shp);
873                 if(copy_shmid_to_user (buf, &tbuf, version))
874                         err = -EFAULT;
875                 else
876                         err = result;
877                 goto out;
878         }
879         case SHM_LOCK:
880         case SHM_UNLOCK:
881         {
882                 struct file *uninitialized_var(shm_file);
883
884                 lru_add_drain_all();  /* drain pagevecs to lru lists */
885
886                 shp = shm_lock_check(ns, shmid);
887                 if (IS_ERR(shp)) {
888                         err = PTR_ERR(shp);
889                         goto out;
890                 }
891
892                 audit_ipc_obj(&(shp->shm_perm));
893
894                 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
895                         uid_t euid = current_euid();
896                         err = -EPERM;
897                         if (euid != shp->shm_perm.uid &&
898                             euid != shp->shm_perm.cuid)
899                                 goto out_unlock;
900                         if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
901                                 goto out_unlock;
902                 }
903
904                 err = security_shm_shmctl(shp, cmd);
905                 if (err)
906                         goto out_unlock;
907                 
908                 if(cmd==SHM_LOCK) {
909                         struct user_struct *user = current_user();
910                         if (!is_file_hugepages(shp->shm_file)) {
911                                 err = shmem_lock(shp->shm_file, 1, user);
912                                 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
913                                         shp->shm_perm.mode |= SHM_LOCKED;
914                                         shp->mlock_user = user;
915                                 }
916                         }
917                 } else if (!is_file_hugepages(shp->shm_file)) {
918                         shmem_lock(shp->shm_file, 0, shp->mlock_user);
919                         shp->shm_perm.mode &= ~SHM_LOCKED;
920                         shp->mlock_user = NULL;
921                 }
922                 shm_unlock(shp);
923                 goto out;
924         }
925         case IPC_RMID:
926         case IPC_SET:
927                 err = shmctl_down(ns, shmid, cmd, buf, version);
928                 return err;
929         default:
930                 return -EINVAL;
931         }
932
933 out_unlock:
934         shm_unlock(shp);
935 out:
936         return err;
937 }
938
939 /*
940  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
941  *
942  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
943  * "raddr" thing points to kernel space, and there has to be a wrapper around
944  * this.
945  */
946 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
947 {
948         struct shmid_kernel *shp;
949         unsigned long addr;
950         unsigned long size;
951         struct file * file;
952         int    err;
953         unsigned long flags;
954         unsigned long prot;
955         int acc_mode;
956         unsigned long user_addr;
957         struct ipc_namespace *ns;
958         struct shm_file_data *sfd;
959         struct path path;
960         fmode_t f_mode;
961
962         err = -EINVAL;
963         if (shmid < 0)
964                 goto out;
965         else if ((addr = (ulong)shmaddr)) {
966                 if (addr & (SHMLBA-1)) {
967                         if (shmflg & SHM_RND)
968                                 addr &= ~(SHMLBA-1);       /* round down */
969                         else
970 #ifndef __ARCH_FORCE_SHMLBA
971                                 if (addr & ~PAGE_MASK)
972 #endif
973                                         goto out;
974                 }
975                 flags = MAP_SHARED | MAP_FIXED;
976         } else {
977                 if ((shmflg & SHM_REMAP))
978                         goto out;
979
980                 flags = MAP_SHARED;
981         }
982
983         if (shmflg & SHM_RDONLY) {
984                 prot = PROT_READ;
985                 acc_mode = S_IRUGO;
986                 f_mode = FMODE_READ;
987         } else {
988                 prot = PROT_READ | PROT_WRITE;
989                 acc_mode = S_IRUGO | S_IWUGO;
990                 f_mode = FMODE_READ | FMODE_WRITE;
991         }
992         if (shmflg & SHM_EXEC) {
993                 prot |= PROT_EXEC;
994                 acc_mode |= S_IXUGO;
995         }
996
997         /*
998          * We cannot rely on the fs check since SYSV IPC does have an
999          * additional creator id...
1000          */
1001         ns = current->nsproxy->ipc_ns;
1002         shp = shm_lock_check(ns, shmid);
1003         if (IS_ERR(shp)) {
1004                 err = PTR_ERR(shp);
1005                 goto out;
1006         }
1007
1008         err = -EACCES;
1009         if (ipcperms(ns, &shp->shm_perm, acc_mode))
1010                 goto out_unlock;
1011
1012         err = security_shm_shmat(shp, shmaddr, shmflg);
1013         if (err)
1014                 goto out_unlock;
1015
1016         path = shp->shm_file->f_path;
1017         path_get(&path);
1018         shp->shm_nattch++;
1019         size = i_size_read(path.dentry->d_inode);
1020         shm_unlock(shp);
1021
1022         err = -ENOMEM;
1023         sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1024         if (!sfd)
1025                 goto out_put_dentry;
1026
1027         file = alloc_file(&path, f_mode,
1028                           is_file_hugepages(shp->shm_file) ?
1029                                 &shm_file_operations_huge :
1030                                 &shm_file_operations);
1031         if (!file)
1032                 goto out_free;
1033
1034         file->private_data = sfd;
1035         file->f_mapping = shp->shm_file->f_mapping;
1036         sfd->id = shp->shm_perm.id;
1037         sfd->ns = get_ipc_ns(ns);
1038         sfd->file = shp->shm_file;
1039         sfd->vm_ops = NULL;
1040
1041         down_write(&current->mm->mmap_sem);
1042         if (addr && !(shmflg & SHM_REMAP)) {
1043                 err = -EINVAL;
1044                 if (find_vma_intersection(current->mm, addr, addr + size))
1045                         goto invalid;
1046                 /*
1047                  * If shm segment goes below stack, make sure there is some
1048                  * space left for the stack to grow (at least 4 pages).
1049                  */
1050                 if (addr < current->mm->start_stack &&
1051                     addr > current->mm->start_stack - size - PAGE_SIZE * 5)
1052                         goto invalid;
1053         }
1054                 
1055         user_addr = do_mmap (file, addr, size, prot, flags, 0);
1056         *raddr = user_addr;
1057         err = 0;
1058         if (IS_ERR_VALUE(user_addr))
1059                 err = (long)user_addr;
1060 invalid:
1061         up_write(&current->mm->mmap_sem);
1062
1063         fput(file);
1064
1065 out_nattch:
1066         down_write(&shm_ids(ns).rw_mutex);
1067         shp = shm_lock(ns, shmid);
1068         BUG_ON(IS_ERR(shp));
1069         shp->shm_nattch--;
1070         if (shm_may_destroy(ns, shp))
1071                 shm_destroy(ns, shp);
1072         else
1073                 shm_unlock(shp);
1074         up_write(&shm_ids(ns).rw_mutex);
1075
1076 out:
1077         return err;
1078
1079 out_unlock:
1080         shm_unlock(shp);
1081         goto out;
1082
1083 out_free:
1084         kfree(sfd);
1085 out_put_dentry:
1086         path_put(&path);
1087         goto out_nattch;
1088 }
1089
1090 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1091 {
1092         unsigned long ret;
1093         long err;
1094
1095         err = do_shmat(shmid, shmaddr, shmflg, &ret);
1096         if (err)
1097                 return err;
1098         force_successful_syscall_return();
1099         return (long)ret;
1100 }
1101
1102 /*
1103  * detach and kill segment if marked destroyed.
1104  * The work is done in shm_close.
1105  */
1106 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1107 {
1108         struct mm_struct *mm = current->mm;
1109         struct vm_area_struct *vma;
1110         unsigned long addr = (unsigned long)shmaddr;
1111         int retval = -EINVAL;
1112 #ifdef CONFIG_MMU
1113         loff_t size = 0;
1114         struct vm_area_struct *next;
1115 #endif
1116
1117         if (addr & ~PAGE_MASK)
1118                 return retval;
1119
1120         down_write(&mm->mmap_sem);
1121
1122         /*
1123          * This function tries to be smart and unmap shm segments that
1124          * were modified by partial mlock or munmap calls:
1125          * - It first determines the size of the shm segment that should be
1126          *   unmapped: It searches for a vma that is backed by shm and that
1127          *   started at address shmaddr. It records it's size and then unmaps
1128          *   it.
1129          * - Then it unmaps all shm vmas that started at shmaddr and that
1130          *   are within the initially determined size.
1131          * Errors from do_munmap are ignored: the function only fails if
1132          * it's called with invalid parameters or if it's called to unmap
1133          * a part of a vma. Both calls in this function are for full vmas,
1134          * the parameters are directly copied from the vma itself and always
1135          * valid - therefore do_munmap cannot fail. (famous last words?)
1136          */
1137         /*
1138          * If it had been mremap()'d, the starting address would not
1139          * match the usual checks anyway. So assume all vma's are
1140          * above the starting address given.
1141          */
1142         vma = find_vma(mm, addr);
1143
1144 #ifdef CONFIG_MMU
1145         while (vma) {
1146                 next = vma->vm_next;
1147
1148                 /*
1149                  * Check if the starting address would match, i.e. it's
1150                  * a fragment created by mprotect() and/or munmap(), or it
1151                  * otherwise it starts at this address with no hassles.
1152                  */
1153                 if ((vma->vm_ops == &shm_vm_ops) &&
1154                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1155
1156
1157                         size = vma->vm_file->f_path.dentry->d_inode->i_size;
1158                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1159                         /*
1160                          * We discovered the size of the shm segment, so
1161                          * break out of here and fall through to the next
1162                          * loop that uses the size information to stop
1163                          * searching for matching vma's.
1164                          */
1165                         retval = 0;
1166                         vma = next;
1167                         break;
1168                 }
1169                 vma = next;
1170         }
1171
1172         /*
1173          * We need look no further than the maximum address a fragment
1174          * could possibly have landed at. Also cast things to loff_t to
1175          * prevent overflows and make comparisons vs. equal-width types.
1176          */
1177         size = PAGE_ALIGN(size);
1178         while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1179                 next = vma->vm_next;
1180
1181                 /* finding a matching vma now does not alter retval */
1182                 if ((vma->vm_ops == &shm_vm_ops) &&
1183                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1184
1185                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1186                 vma = next;
1187         }
1188
1189 #else /* CONFIG_MMU */
1190         /* under NOMMU conditions, the exact address to be destroyed must be
1191          * given */
1192         retval = -EINVAL;
1193         if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1194                 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1195                 retval = 0;
1196         }
1197
1198 #endif
1199
1200         up_write(&mm->mmap_sem);
1201         return retval;
1202 }
1203
1204 #ifdef CONFIG_PROC_FS
1205 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1206 {
1207         struct shmid_kernel *shp = it;
1208         unsigned long rss = 0, swp = 0;
1209
1210         shm_add_rss_swap(shp, &rss, &swp);
1211
1212 #if BITS_PER_LONG <= 32
1213 #define SIZE_SPEC "%10lu"
1214 #else
1215 #define SIZE_SPEC "%21lu"
1216 #endif
1217
1218         return seq_printf(s,
1219                           "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1220                           "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
1221                           SIZE_SPEC " " SIZE_SPEC "\n",
1222                           shp->shm_perm.key,
1223                           shp->shm_perm.id,
1224                           shp->shm_perm.mode,
1225                           shp->shm_segsz,
1226                           shp->shm_cprid,
1227                           shp->shm_lprid,
1228                           shp->shm_nattch,
1229                           shp->shm_perm.uid,
1230                           shp->shm_perm.gid,
1231                           shp->shm_perm.cuid,
1232                           shp->shm_perm.cgid,
1233                           shp->shm_atim,
1234                           shp->shm_dtim,
1235                           shp->shm_ctim,
1236                           rss * PAGE_SIZE,
1237                           swp * PAGE_SIZE);
1238 }
1239 #endif