]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
> You might get some speed benefit by optimising for the small copies
authorChristopher Yeoh <cyeoh@au1.ibm.com>
Wed, 24 Aug 2011 23:46:42 +0000 (09:46 +1000)
committerStephen Rothwell <sfr@canb.auug.org.au>
Mon, 12 Sep 2011 05:27:15 +0000 (15:27 +1000)
> here.  Define a local on-stack array of N page*'s and point
> process_pages at that if the number of pages is <= N.  Saves a
> malloc/free and is more cache-friendly.  But only if the result is
> measurable!

I have done some benchmarking on this, and it gains about 5-7% on a
microbenchmark with 4kb size copies and about a 1% gain with a more
realistic (but modified for smaller copies) hpcc benchmark. The
performance gain disappears into the noise by about 64kb sized copies.
No measurable overhead for larger copies. So I think its worth including

Included below is the patch (based on v4) - for ease of review the first diff
is just against the latest version of CMA which has been posted here previously.
The second is the entire CMA patch.

Signed-off-by: Chris Yeoh <cyeoh@au1.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: <linux-man@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/process_vm_access.c

index 5ac8abc0d09bcbc25ef178e6003012454aefbf67..e920aa3ce104c35b902e8412864c78102fa3177e 100644 (file)
@@ -221,6 +221,10 @@ static int process_vm_rw_single_vec(unsigned long addr,
        return rc;
 }
 
+/* Maximum number of entries for process pages array
+   which lives on stack */
+#define PVM_MAX_PP_ARRAY_COUNT 16
+
 /**
  * process_vm_rw_core - core of reading/writing pages from task specified
  * @pid: PID of process to read/write from/to
@@ -241,7 +245,8 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
                                  unsigned long flags, int vm_write)
 {
        struct task_struct *task;
-       struct page **process_pages = NULL;
+       struct page *pp_stack[PVM_MAX_PP_ARRAY_COUNT];
+       struct page **process_pages = pp_stack;
        struct mm_struct *mm;
        unsigned long i;
        ssize_t rc = 0;
@@ -271,13 +276,16 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
        if (nr_pages == 0)
                return 0;
 
-       /* For reliability don't try to kmalloc more than 2 pages worth */
-       process_pages = kmalloc(min_t(size_t, PVM_MAX_KMALLOC_PAGES,
-                                     sizeof(struct pages *)*nr_pages),
-                               GFP_KERNEL);
+       if (nr_pages > PVM_MAX_PP_ARRAY_COUNT) {
+               /* For reliability don't try to kmalloc more than
+                  2 pages worth */
+               process_pages = kmalloc(min_t(size_t, PVM_MAX_KMALLOC_PAGES,
+                                             sizeof(struct pages *)*nr_pages),
+                                       GFP_KERNEL);
 
-       if (!process_pages)
-               return -ENOMEM;
+               if (!process_pages)
+                       return -ENOMEM;
+       }
 
        /* Get process information */
        rcu_read_lock();
@@ -331,7 +339,8 @@ put_task_struct:
        put_task_struct(task);
 
 free_proc_pages:
-       kfree(process_pages);
+       if (process_pages != pp_stack)
+               kfree(process_pages);
        return rc;
 }