]> git.karo-electronics.de Git - karo-tx-linux.git/blob - mm/process_vm_access.c
The basic idea behind cross memory attach is to allow MPI programs doing
[karo-tx-linux.git] / mm / process_vm_access.c
1 /*
2  *  linux/mm/process_vm_access.c
3  *
4  *  Copyright (C) 2010-2011 Christopher Yeoh <cyeoh@au1.ibm.com>, IBM Corp.
5  */
6
7 #include <linux/mm.h>
8 #include <linux/uio.h>
9 #include <linux/sched.h>
10 #include <linux/highmem.h>
11 #include <linux/ptrace.h>
12 #include <linux/slab.h>
13 #include <linux/syscalls.h>
14
15 #ifdef CONFIG_COMPAT
16 #include <linux/compat.h>
17 #endif
18
19 /*
20  * process_vm_rw_pages - read/write pages from task specified
21  * @task: task to read/write from
22  * @mm: mm for task
23  * @process_pages: struct pages area that can store at least
24  *  nr_pages_to_copy struct page pointers
25  * @pa: address of page in task to start copying from/to
26  * @start_offset: offset in page to start copying from/to
27  * @len: number of bytes to copy
28  * @lvec: iovec array specifying where to copy to/from
29  * @lvec_cnt: number of elements in iovec array
30  * @lvec_current: index in iovec array we are up to
31  * @lvec_offset: offset in bytes from current iovec iov_base we are up to
32  * @vm_write: 0 means copy from, 1 means copy to
33  * @nr_pages_to_copy: number of pages to copy
34  */
35 static ssize_t process_vm_rw_pages(struct task_struct *task,
36                                    struct mm_struct *mm,
37                                    struct page **process_pages,
38                                    unsigned long pa,
39                                    unsigned long start_offset,
40                                    unsigned long len,
41                                    const struct iovec *lvec,
42                                    unsigned long lvec_cnt,
43                                    unsigned long *lvec_current,
44                                    size_t *lvec_offset,
45                                    int vm_write,
46                                    unsigned int nr_pages_to_copy)
47 {
48         int pages_pinned;
49         void *target_kaddr;
50         int pgs_copied = 0;
51         int j;
52         int ret;
53         ssize_t bytes_to_copy;
54         ssize_t bytes_copied = 0;
55         ssize_t rc = -EFAULT;
56
57         /* Get the pages we're interested in */
58         down_read(&mm->mmap_sem);
59         pages_pinned = get_user_pages(task, mm, pa,
60                                       nr_pages_to_copy,
61                                       vm_write, 0, process_pages, NULL);
62         up_read(&mm->mmap_sem);
63
64         if (pages_pinned != nr_pages_to_copy)
65                 goto end;
66
67         /* Do the copy for each page */
68         for (pgs_copied = 0;
69              (pgs_copied < nr_pages_to_copy) && (*lvec_current < lvec_cnt);
70              pgs_copied++) {
71                 /* Make sure we have a non zero length iovec */
72                 while (*lvec_current < lvec_cnt
73                        && lvec[*lvec_current].iov_len == 0)
74                         (*lvec_current)++;
75                 if (*lvec_current == lvec_cnt)
76                         break;
77
78                 /*
79                  * Will copy smallest of:
80                  * - bytes remaining in page
81                  * - bytes remaining in destination iovec
82                  */
83                 bytes_to_copy = min_t(ssize_t, PAGE_SIZE - start_offset,
84                                       len - bytes_copied);
85                 bytes_to_copy = min_t(ssize_t, bytes_to_copy,
86                                       lvec[*lvec_current].iov_len
87                                       - *lvec_offset);
88
89                 target_kaddr = kmap(process_pages[pgs_copied]) + start_offset;
90
91                 if (vm_write)
92                         ret = copy_from_user(target_kaddr,
93                                              lvec[*lvec_current].iov_base
94                                              + *lvec_offset,
95                                              bytes_to_copy);
96                 else
97                         ret = copy_to_user(lvec[*lvec_current].iov_base
98                                            + *lvec_offset,
99                                            target_kaddr, bytes_to_copy);
100                 kunmap(process_pages[pgs_copied]);
101                 if (ret) {
102                         pgs_copied++;
103                         goto end;
104                 }
105                 bytes_copied += bytes_to_copy;
106                 *lvec_offset += bytes_to_copy;
107                 if (*lvec_offset == lvec[*lvec_current].iov_len) {
108                         /*
109                          * Need to copy remaining part of page into the
110                          * next iovec if there are any bytes left in page
111                          */
112                         (*lvec_current)++;
113                         *lvec_offset = 0;
114                         start_offset = (start_offset + bytes_to_copy)
115                                 % PAGE_SIZE;
116                         if (start_offset)
117                                 pgs_copied--;
118                 } else {
119                         start_offset = 0;
120                 }
121         }
122
123         rc = bytes_copied;
124
125 end:
126         if (vm_write) {
127                 for (j = 0; j < pages_pinned; j++) {
128                         if (j < pgs_copied)
129                                 set_page_dirty_lock(process_pages[j]);
130                         put_page(process_pages[j]);
131                 }
132         } else {
133                 for (j = 0; j < pages_pinned; j++)
134                         put_page(process_pages[j]);
135         }
136
137         return rc;
138 }
139
140 /* Maximum number of pages kmalloc'd to hold struct page's during copy */
141 #define PVM_MAX_KMALLOC_PAGES (PAGE_SIZE * 2)
142
143 /*
144  * process_vm_rw_single_vec - read/write pages from task specified
145  * @addr: start memory address of target process
146  * @len: size of area to copy to/from
147  * @lvec: iovec array specifying where to copy to/from locally
148  * @lvec_cnt: number of elements in iovec array
149  * @lvec_current: index in iovec array we are up to
150  * @lvec_offset: offset in bytes from current iovec iov_base we are up to
151  * @process_pages: struct pages area that can store at least
152  *  nr_pages_to_copy struct page pointers
153  * @mm: mm for task
154  * @task: task to read/write from
155  * @vm_write: 0 means copy from, 1 means copy to
156  */
157 static ssize_t process_vm_rw_single_vec(unsigned long addr,
158                                         unsigned long len,
159                                         const struct iovec *lvec,
160                                         unsigned long lvec_cnt,
161                                         unsigned long *lvec_current,
162                                         size_t *lvec_offset,
163                                         struct page **process_pages,
164                                         struct mm_struct *mm,
165                                         struct task_struct *task,
166                                         int vm_write)
167 {
168         unsigned long pa = addr & PAGE_MASK;
169         unsigned long start_offset = addr - pa;
170         unsigned long nr_pages;
171         ssize_t bytes_copied = 0;
172         ssize_t rc;
173         unsigned long nr_pages_copied = 0;
174         unsigned long nr_pages_to_copy;
175         unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
176                 / sizeof(struct pages *);
177
178
179         /* Work out address and page range required */
180         if (len == 0)
181                 return 0;
182         nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
183
184
185         while ((nr_pages_copied < nr_pages) && (*lvec_current < lvec_cnt)) {
186                 nr_pages_to_copy = min(nr_pages - nr_pages_copied,
187                                        max_pages_per_loop);
188
189                 rc = process_vm_rw_pages(task, mm, process_pages, pa,
190                                          start_offset, len,
191                                          lvec, lvec_cnt,
192                                          lvec_current, lvec_offset,
193                                          vm_write, nr_pages_to_copy);
194                 start_offset = 0;
195
196                 if (rc < 0)
197                         return rc;
198                 else {
199                         bytes_copied += rc;
200                         len -= rc;
201                         nr_pages_copied += nr_pages_to_copy;
202                         pa += nr_pages_to_copy * PAGE_SIZE;
203                 }
204         }
205
206         rc = bytes_copied;
207         return rc;
208 }
209
210 static ssize_t process_vm_rw(pid_t pid, const struct iovec *lvec,
211                              unsigned long liovcnt,
212                              const struct iovec *rvec,
213                              unsigned long riovcnt,
214                              unsigned long flags, int vm_write)
215 {
216         struct task_struct *task;
217         struct page **process_pages = NULL;
218         struct mm_struct *mm;
219         unsigned long i;
220         ssize_t rc;
221         ssize_t bytes_copied;
222         unsigned long nr_pages = 0;
223         unsigned long nr_pages_iov;
224         unsigned long iov_l_curr_idx = 0;
225         size_t iov_l_curr_offset = 0;
226         ssize_t iov_len;
227
228         /*
229          * Work out how many pages of struct pages we're going to need
230          * when eventually calling get_user_pages
231          */
232         for (i = 0; i < riovcnt; i++) {
233                 iov_len = rvec[i].iov_len;
234                 if (iov_len > 0) {
235                         nr_pages_iov = ((unsigned long)rvec[i].iov_base
236                                         + iov_len)
237                                 / PAGE_SIZE - (unsigned long)rvec[i].iov_base
238                                 / PAGE_SIZE + 1;
239                         nr_pages = max(nr_pages, nr_pages_iov);
240                 }
241         }
242
243         if (nr_pages == 0)
244                 return 0;
245
246         /* For reliability don't try to kmalloc more than 2 pages worth */
247         process_pages = kmalloc(min_t(size_t, PVM_MAX_KMALLOC_PAGES,
248                                       sizeof(struct pages *)*nr_pages),
249                                 GFP_KERNEL);
250
251         if (!process_pages)
252                 return -ENOMEM;
253
254         /* Get process information */
255         rcu_read_lock();
256         task = find_task_by_vpid(pid);
257         if (task)
258                 get_task_struct(task);
259         rcu_read_unlock();
260         if (!task) {
261                 rc = -ESRCH;
262                 goto free_proc_pages;
263         }
264
265         task_lock(task);
266         if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
267                 task_unlock(task);
268                 rc = -EPERM;
269                 goto put_task_struct;
270         }
271         mm = task->mm;
272
273         if (!mm || (task->flags & PF_KTHREAD)) {
274                 task_unlock(task);
275                 rc = -EINVAL;
276                 goto put_task_struct;
277         }
278
279         atomic_inc(&mm->mm_users);
280         task_unlock(task);
281
282         rc = 0;
283         for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) {
284                 bytes_copied = process_vm_rw_single_vec(
285                         (unsigned long)rvec[i].iov_base, rvec[i].iov_len,
286                         lvec, liovcnt, &iov_l_curr_idx, &iov_l_curr_offset,
287                         process_pages, mm, task, vm_write);
288                 if (bytes_copied < 0) {
289                         rc = bytes_copied;
290                         goto put_mm;
291                 } else {
292                         rc += bytes_copied;
293                 }
294         }
295
296 put_mm:
297         mmput(mm);
298
299 put_task_struct:
300         put_task_struct(task);
301
302
303 free_proc_pages:
304         kfree(process_pages);
305         return rc;
306 }
307
308 static ssize_t process_vm_rw_check_iovecs(pid_t pid,
309                                           const struct iovec __user *lvec,
310                                           unsigned long liovcnt,
311                                           const struct iovec __user *rvec,
312                                           unsigned long riovcnt,
313                                           unsigned long flags, int vm_write)
314 {
315         struct iovec iovstack_l[UIO_FASTIOV];
316         struct iovec iovstack_r[UIO_FASTIOV];
317         struct iovec *iov_l = iovstack_l;
318         struct iovec *iov_r = iovstack_r;
319         ssize_t rc;
320
321         if (flags != 0)
322                 return -EINVAL;
323
324         /* Check iovecs */
325         if (vm_write)
326                 rc = rw_copy_check_uvector(WRITE, lvec, liovcnt, UIO_FASTIOV,
327                                            iovstack_l, &iov_l, 1);
328         else
329                 rc = rw_copy_check_uvector(READ, lvec, liovcnt, UIO_FASTIOV,
330                                            iovstack_l, &iov_l, 1);
331         if (rc <= 0)
332                 goto free_iovecs;
333
334         rc = rw_copy_check_uvector(READ, rvec, riovcnt, UIO_FASTIOV,
335                                    iovstack_r, &iov_r, 0);
336         if (rc <= 0)
337                 goto free_iovecs;
338
339         rc = process_vm_rw(pid, iov_l, liovcnt, iov_r, riovcnt, flags,
340                             vm_write);
341
342 free_iovecs:
343         if (iov_r != iovstack_r)
344                 kfree(iov_r);
345         if (iov_l != iovstack_l)
346                 kfree(iov_l);
347
348         return rc;
349 }
350
351 SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec,
352                 unsigned long, liovcnt, const struct iovec __user *, rvec,
353                 unsigned long, riovcnt, unsigned long, flags)
354 {
355         return process_vm_rw_check_iovecs(pid, lvec, liovcnt, rvec, riovcnt,
356                                           flags, 0);
357 }
358
359 SYSCALL_DEFINE6(process_vm_writev, pid_t, pid,
360                 const struct iovec __user *, lvec,
361                 unsigned long, liovcnt, const struct iovec __user *, rvec,
362                 unsigned long, riovcnt, unsigned long, flags)
363 {
364         return process_vm_rw_check_iovecs(pid, lvec, liovcnt, rvec, riovcnt,
365                                           flags, 1);
366 }
367
368 #ifdef CONFIG_COMPAT
369
370 asmlinkage ssize_t
371 compat_process_vm_rw_check_iovecs(compat_pid_t pid,
372                                   const struct compat_iovec __user *lvec,
373                                   unsigned long liovcnt,
374                                   const struct compat_iovec __user *rvec,
375                                   unsigned long riovcnt,
376                                   unsigned long flags, int vm_write)
377 {
378         struct iovec iovstack_l[UIO_FASTIOV];
379         struct iovec iovstack_r[UIO_FASTIOV];
380         struct iovec *iov_l = iovstack_l;
381         struct iovec *iov_r = iovstack_r;
382         ssize_t rc = -EFAULT;
383
384         if (flags != 0)
385                 return -EINVAL;
386
387         if (!access_ok(VERIFY_READ, lvec, liovcnt * sizeof(*lvec)))
388                 goto out;
389
390         if (!access_ok(VERIFY_READ, rvec, riovcnt * sizeof(*rvec)))
391                 goto out;
392
393         if (vm_write)
394                 rc = compat_rw_copy_check_uvector(WRITE, lvec, liovcnt,
395                                                   UIO_FASTIOV, iovstack_l,
396                                                   &iov_l, 1);
397         else
398                 rc = compat_rw_copy_check_uvector(READ, lvec, liovcnt,
399                                                   UIO_FASTIOV, iovstack_l,
400                                                   &iov_l, 1);
401         if (rc <= 0)
402                 goto free_iovecs;
403         rc = compat_rw_copy_check_uvector(READ, rvec, riovcnt,
404                                           UIO_FASTIOV, iovstack_r,
405                                           &iov_r, 0);
406         if (rc <= 0)
407                 goto free_iovecs;
408
409         rc = process_vm_rw(pid, iov_l, liovcnt, iov_r, riovcnt, flags,
410                             vm_write);
411
412 free_iovecs:
413         if (iov_r != iovstack_r)
414                 kfree(iov_r);
415         if (iov_l != iovstack_l)
416                 kfree(iov_l);
417
418 out:
419         return rc;
420 }
421
422 asmlinkage ssize_t
423 compat_sys_process_vm_readv(compat_pid_t pid,
424                             const struct compat_iovec __user *lvec,
425                             unsigned long liovcnt,
426                             const struct compat_iovec __user *rvec,
427                             unsigned long riovcnt,
428                             unsigned long flags)
429 {
430         return compat_process_vm_rw_check_iovecs(pid, lvec, liovcnt, rvec,
431                                                  riovcnt, flags, 0);
432 }
433
434 asmlinkage ssize_t
435 compat_sys_process_vm_writev(compat_pid_t pid,
436                              const struct compat_iovec __user *lvec,
437                              unsigned long liovcnt,
438                              const struct compat_iovec __user *rvec,
439                              unsigned long riovcnt,
440                              unsigned long flags)
441 {
442         return compat_process_vm_rw_check_iovecs(pid, lvec, liovcnt, rvec,
443                                                  riovcnt, flags, 1);
444 }
445
446 #endif