mm/madvise.c

   1 /*
   2  *      linux/mm/madvise.c
   3  *
   4  * Copyright (C) 1999  Linus Torvalds
   5  * Copyright (C) 2002  Christoph Hellwig
   6  */
   7
   8 #include <linux/mman.h>
   9 #include <linux/pagemap.h>
  10 #include <linux/syscalls.h>
  11 #include <linux/mempolicy.h>
  12 #include <linux/page-isolation.h>
  13 #include <linux/hugetlb.h>
  14 #include <linux/falloc.h>
  15 #include <linux/sched.h>
  16 #include <linux/ksm.h>
  17 #include <linux/fs.h>
  18 #include <linux/file.h>
  19 #include <linux/blkdev.h>
  20 #include <linux/backing-dev.h>
  21 #include <linux/swap.h>
  22 #include <linux/swapops.h>
  23 #include <linux/mmu_notifier.h>
  24
  25 #include <asm/tlb.h>
  26
  27 struct madvise_free_private {
  28         struct vm_area_struct *vma;
  29         struct mmu_gather *tlb;
  30 };
  31
  32 /*
  33  * Any behaviour which results in changes to the vma->vm_flags needs to
  34  * take mmap_sem for writing. Others, which simply traverse vmas, need
  35  * to only take it for reading.
  36  */
  37 static int madvise_need_mmap_write(int behavior)
  38 {
  39         switch (behavior) {
  40         case MADV_REMOVE:
  41         case MADV_WILLNEED:
  42         case MADV_DONTNEED:
  43         case MADV_FREE:
  44                 return 0;
  45         default:
  46                 /* be safe, default to 1. list exceptions explicitly */
  47                 return 1;
  48         }
  49 }
  50
  51 /*
  52  * We can potentially split a vm area into separate
  53  * areas, each area with its own behavior.
  54  */
  55 static long madvise_behavior(struct vm_area_struct *vma,
  56                      struct vm_area_struct **prev,
  57                      unsigned long start, unsigned long end, int behavior)
  58 {
  59         struct mm_struct *mm = vma->vm_mm;
  60         int error = 0;
  61         pgoff_t pgoff;
  62         unsigned long new_flags = vma->vm_flags;
  63
  64         switch (behavior) {
  65         case MADV_NORMAL:
  66                 new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
  67                 break;
  68         case MADV_SEQUENTIAL:
  69                 new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
  70                 break;
  71         case MADV_RANDOM:
  72                 new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
  73                 break;
  74         case MADV_DONTFORK:
  75                 new_flags |= VM_DONTCOPY;
  76                 break;
  77         case MADV_DOFORK:
  78                 if (vma->vm_flags & VM_IO) {
  79                         error = -EINVAL;
  80                         goto out;
  81                 }
  82                 new_flags &= ~VM_DONTCOPY;
  83                 break;
  84         case MADV_DONTDUMP:
  85                 new_flags |= VM_DONTDUMP;
  86                 break;
  87         case MADV_DODUMP:
  88                 if (new_flags & VM_SPECIAL) {
  89                         error = -EINVAL;
  90                         goto out;
  91                 }
  92                 new_flags &= ~VM_DONTDUMP;
  93                 break;
  94         case MADV_MERGEABLE:
  95         case MADV_UNMERGEABLE:
  96                 error = ksm_madvise(vma, start, end, behavior, &new_flags);
  97                 if (error)
  98                         goto out;
  99                 break;
 100         case MADV_HUGEPAGE:
 101         case MADV_NOHUGEPAGE:
 102                 error = hugepage_madvise(vma, &new_flags, behavior);
 103                 if (error)
 104                         goto out;
 105                 break;
 106         }
 107
 108         if (new_flags == vma->vm_flags) {
 109                 *prev = vma;
 110                 goto out;
 111         }
 112
 113         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 114         *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
 115                           vma->vm_file, pgoff, vma_policy(vma),
 116                           vma->vm_userfaultfd_ctx);
 117         if (*prev) {
 118                 vma = *prev;
 119                 goto success;
 120         }
 121
 122         *prev = vma;
 123
 124         if (start != vma->vm_start) {
 125                 error = split_vma(mm, vma, start, 1);
 126                 if (error)
 127                         goto out;
 128         }
 129
 130         if (end != vma->vm_end) {
 131                 error = split_vma(mm, vma, end, 0);
 132                 if (error)
 133                         goto out;
 134         }
 135
 136 success:
 137         /*
 138          * vm_flags is protected by the mmap_sem held in write mode.
 139          */
 140         vma->vm_flags = new_flags;
 141
 142 out:
 143         if (error == -ENOMEM)
 144                 error = -EAGAIN;
 145         return error;
 146 }
 147
 148 #ifdef CONFIG_SWAP
 149 static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 150         unsigned long end, struct mm_walk *walk)
 151 {
 152         pte_t *orig_pte;
 153         struct vm_area_struct *vma = walk->private;
 154         unsigned long index;
 155
 156         if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 157                 return 0;
 158
 159         for (index = start; index != end; index += PAGE_SIZE) {
 160                 pte_t pte;
 161                 swp_entry_t entry;
 162                 struct page *page;
 163                 spinlock_t *ptl;
 164
 165                 orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
 166                 pte = *(orig_pte + ((index - start) / PAGE_SIZE));
 167                 pte_unmap_unlock(orig_pte, ptl);
 168
 169                 if (pte_present(pte) || pte_none(pte))
 170                         continue;
 171                 entry = pte_to_swp_entry(pte);
 172                 if (unlikely(non_swap_entry(entry)))
 173                         continue;
 174
 175                 page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
 176                                                                 vma, index);
 177                 if (page)
 178                         page_cache_release(page);
 179         }
 180
 181         return 0;
 182 }
 183
 184 static void force_swapin_readahead(struct vm_area_struct *vma,
 185                 unsigned long start, unsigned long end)
 186 {
 187         struct mm_walk walk = {
 188                 .mm = vma->vm_mm,
 189                 .pmd_entry = swapin_walk_pmd_entry,
 190                 .private = vma,
 191         };
 192
 193         walk_page_range(start, end, &walk);
 194
 195         lru_add_drain();        /* Push any new pages onto the LRU now */
 196 }
 197
 198 static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 199                 unsigned long start, unsigned long end,
 200                 struct address_space *mapping)
 201 {
 202         pgoff_t index;
 203         struct page *page;
 204         swp_entry_t swap;
 205
 206         for (; start < end; start += PAGE_SIZE) {
 207                 index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 208
 209                 page = find_get_entry(mapping, index);
 210                 if (!radix_tree_exceptional_entry(page)) {
 211                         if (page)
 212                                 page_cache_release(page);
 213                         continue;
 214                 }
 215                 swap = radix_to_swp_entry(page);
 216                 page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
 217                                                                 NULL, 0);
 218                 if (page)
 219                         page_cache_release(page);
 220         }
 221
 222         lru_add_drain();        /* Push any new pages onto the LRU now */
 223 }
 224 #endif          /* CONFIG_SWAP */
 225
 226 /*
 227  * Schedule all required I/O operations.  Do not wait for completion.
 228  */
 229 static long madvise_willneed(struct vm_area_struct *vma,
 230                              struct vm_area_struct **prev,
 231                              unsigned long start, unsigned long end)
 232 {
 233         struct file *file = vma->vm_file;
 234
 235 #ifdef CONFIG_SWAP
 236         if (!file) {
 237                 *prev = vma;
 238                 force_swapin_readahead(vma, start, end);
 239                 return 0;
 240         }
 241
 242         if (shmem_mapping(file->f_mapping)) {
 243                 *prev = vma;
 244                 force_shm_swapin_readahead(vma, start, end,
 245                                         file->f_mapping);
 246                 return 0;
 247         }
 248 #else
 249         if (!file)
 250                 return -EBADF;
 251 #endif
 252
 253         if (IS_DAX(file_inode(file))) {
 254                 /* no bad return value, but ignore advice */
 255                 return 0;
 256         }
 257
 258         *prev = vma;
 259         start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 260         if (end > vma->vm_end)
 261                 end = vma->vm_end;
 262         end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 263
 264         force_page_cache_readahead(file->f_mapping, file, start, end - start);
 265         return 0;
 266 }
 267
 268 static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 269                                 unsigned long end, struct mm_walk *walk)
 270
 271 {
 272         struct madvise_free_private *fp = walk->private;
 273         struct mmu_gather *tlb = fp->tlb;
 274         struct mm_struct *mm = tlb->mm;
 275         struct vm_area_struct *vma = fp->vma;
 276         spinlock_t *ptl;
 277         pte_t *pte, ptent;
 278         struct page *page;
 279
 280         split_huge_page_pmd(vma, addr, pmd);
 281         if (pmd_trans_unstable(pmd))
 282                 return 0;
 283
 284         pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 285         arch_enter_lazy_mmu_mode();
 286         for (; addr != end; pte++, addr += PAGE_SIZE) {
 287                 ptent = *pte;
 288
 289                 if (!pte_present(ptent))
 290                         continue;
 291
 292                 page = vm_normal_page(vma, addr, ptent);
 293                 if (!page)
 294                         continue;
 295
 296                 if (PageSwapCache(page)) {
 297                         if (!trylock_page(page))
 298                                 continue;
 299
 300                         if (!try_to_free_swap(page)) {
 301                                 unlock_page(page);
 302                                 continue;
 303                         }
 304
 305                         ClearPageDirty(page);
 306                         unlock_page(page);
 307                 }
 308
 309                 /*
 310                  * Some of architecture(ex, PPC) don't update TLB
 311                  * with set_pte_at and tlb_remove_tlb_entry so for
 312                  * the portability, remap the pte with old|clean
 313                  * after pte clearing.
 314                  */
 315                 ptent = ptep_get_and_clear_full(mm, addr, pte,
 316                                                 tlb->fullmm);
 317                 ptent = pte_mkold(ptent);
 318                 ptent = pte_mkclean(ptent);
 319                 set_pte_at(mm, addr, pte, ptent);
 320                 tlb_remove_tlb_entry(tlb, pte, addr);
 321         }
 322         arch_leave_lazy_mmu_mode();
 323         pte_unmap_unlock(pte - 1, ptl);
 324         cond_resched();
 325         return 0;
 326 }
 327
 328 static void madvise_free_page_range(struct mmu_gather *tlb,
 329                              struct vm_area_struct *vma,
 330                              unsigned long addr, unsigned long end)
 331 {
 332         struct madvise_free_private fp = {
 333                 .vma = vma,
 334                 .tlb = tlb,
 335         };
 336
 337         struct mm_walk free_walk = {
 338                 .pmd_entry = madvise_free_pte_range,
 339                 .mm = vma->vm_mm,
 340                 .private = &fp,
 341         };
 342
 343         BUG_ON(addr >= end);
 344         tlb_start_vma(tlb, vma);
 345         walk_page_range(addr, end, &free_walk);
 346         tlb_end_vma(tlb, vma);
 347 }
 348
 349 static int madvise_free_single_vma(struct vm_area_struct *vma,
 350                         unsigned long start_addr, unsigned long end_addr)
 351 {
 352         unsigned long start, end;
 353         struct mm_struct *mm = vma->vm_mm;
 354         struct mmu_gather tlb;
 355
 356         if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
 357                 return -EINVAL;
 358
 359         /* MADV_FREE works for only anon vma at the moment */
 360         if (vma->vm_file)
 361                 return -EINVAL;
 362
 363         start = max(vma->vm_start, start_addr);
 364         if (start >= vma->vm_end)
 365                 return -EINVAL;
 366         end = min(vma->vm_end, end_addr);
 367         if (end <= vma->vm_start)
 368                 return -EINVAL;
 369
 370         lru_add_drain();
 371         tlb_gather_mmu(&tlb, mm, start, end);
 372         update_hiwater_rss(mm);
 373
 374         mmu_notifier_invalidate_range_start(mm, start, end);
 375         madvise_free_page_range(&tlb, vma, start, end);
 376         mmu_notifier_invalidate_range_end(mm, start, end);
 377         tlb_finish_mmu(&tlb, start, end);
 378
 379         return 0;
 380 }
 381
 382 static long madvise_free(struct vm_area_struct *vma,
 383                              struct vm_area_struct **prev,
 384                              unsigned long start, unsigned long end)
 385 {
 386         *prev = vma;
 387         return madvise_free_single_vma(vma, start, end);
 388 }
 389
 390 /*
 391  * Application no longer needs these pages.  If the pages are dirty,
 392  * it's OK to just throw them away.  The app will be more careful about
 393  * data it wants to keep.  Be sure to free swap resources too.  The
 394  * zap_page_range call sets things up for shrink_active_list to actually free
 395  * these pages later if no one else has touched them in the meantime,
 396  * although we could add these pages to a global reuse list for
 397  * shrink_active_list to pick up before reclaiming other pages.
 398  *
 399  * NB: This interface discards data rather than pushes it out to swap,
 400  * as some implementations do.  This has performance implications for
 401  * applications like large transactional databases which want to discard
 402  * pages in anonymous maps after committing to backing store the data
 403  * that was kept in them.  There is no reason to write this data out to
 404  * the swap area if the application is discarding it.
 405  *
 406  * An interface that causes the system to free clean pages and flush
 407  * dirty pages is already available as msync(MS_INVALIDATE).
 408  */
 409 static long madvise_dontneed(struct vm_area_struct *vma,
 410                              struct vm_area_struct **prev,
 411                              unsigned long start, unsigned long end)
 412 {
 413         *prev = vma;
 414         if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
 415                 return -EINVAL;
 416
 417         zap_page_range(vma, start, end - start, NULL);
 418         return 0;
 419 }
 420
 421 /*
 422  * Application wants to free up the pages and associated backing store.
 423  * This is effectively punching a hole into the middle of a file.
 424  */
 425 static long madvise_remove(struct vm_area_struct *vma,
 426                                 struct vm_area_struct **prev,
 427                                 unsigned long start, unsigned long end)
 428 {
 429         loff_t offset;
 430         int error;
 431         struct file *f;
 432
 433         *prev = NULL;   /* tell sys_madvise we drop mmap_sem */
 434
 435         if (vma->vm_flags & VM_LOCKED)
 436                 return -EINVAL;
 437
 438         f = vma->vm_file;
 439
 440         if (!f || !f->f_mapping || !f->f_mapping->host) {
 441                         return -EINVAL;
 442         }
 443
 444         if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
 445                 return -EACCES;
 446
 447         offset = (loff_t)(start - vma->vm_start)
 448                         + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 449
 450         /*
 451          * Filesystem's fallocate may need to take i_mutex.  We need to
 452          * explicitly grab a reference because the vma (and hence the
 453          * vma's reference to the file) can go away as soon as we drop
 454          * mmap_sem.
 455          */
 456         get_file(f);
 457         up_read(&current->mm->mmap_sem);
 458         error = vfs_fallocate(f,
 459                                 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 460                                 offset, end - start);
 461         fput(f);
 462         down_read(&current->mm->mmap_sem);
 463         return error;
 464 }
 465
 466 #ifdef CONFIG_MEMORY_FAILURE
 467 /*
 468  * Error injection support for memory error handling.
 469  */
 470 static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
 471 {
 472         struct page *p;
 473         if (!capable(CAP_SYS_ADMIN))
 474                 return -EPERM;
 475         for (; start < end; start += PAGE_SIZE <<
 476                                 compound_order(compound_head(p))) {
 477                 int ret;
 478
 479                 ret = get_user_pages_fast(start, 1, 0, &p);
 480                 if (ret != 1)
 481                         return ret;
 482
 483                 if (PageHWPoison(p)) {
 484                         put_page(p);
 485                         continue;
 486                 }
 487                 if (bhv == MADV_SOFT_OFFLINE) {
 488                         pr_info("Soft offlining page %#lx at %#lx\n",
 489                                 page_to_pfn(p), start);
 490                         ret = soft_offline_page(p, MF_COUNT_INCREASED);
 491                         if (ret)
 492                                 return ret;
 493                         continue;
 494                 }
 495                 pr_info("Injecting memory failure for page %#lx at %#lx\n",
 496                        page_to_pfn(p), start);
 497                 /* Ignore return value for now */
 498                 memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
 499         }
 500         return 0;
 501 }
 502 #endif
 503
 504 static long
 505 madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 506                 unsigned long start, unsigned long end, int behavior)
 507 {
 508         switch (behavior) {
 509         case MADV_REMOVE:
 510                 return madvise_remove(vma, prev, start, end);
 511         case MADV_WILLNEED:
 512                 return madvise_willneed(vma, prev, start, end);
 513         case MADV_FREE:
 514                 /*
 515                  * XXX: In this implementation, MADV_FREE works like
 516                  * MADV_DONTNEED on swapless system or full swap.
 517                  */
 518                 if (get_nr_swap_pages() > 0)
 519                         return madvise_free(vma, prev, start, end);
 520                 /* passthrough */
 521         case MADV_DONTNEED:
 522                 return madvise_dontneed(vma, prev, start, end);
 523         default:
 524                 return madvise_behavior(vma, prev, start, end, behavior);
 525         }
 526 }
 527
 528 static bool
 529 madvise_behavior_valid(int behavior)
 530 {
 531         switch (behavior) {
 532         case MADV_DOFORK:
 533         case MADV_DONTFORK:
 534         case MADV_NORMAL:
 535         case MADV_SEQUENTIAL:
 536         case MADV_RANDOM:
 537         case MADV_REMOVE:
 538         case MADV_WILLNEED:
 539         case MADV_DONTNEED:
 540         case MADV_FREE:
 541 #ifdef CONFIG_KSM
 542         case MADV_MERGEABLE:
 543         case MADV_UNMERGEABLE:
 544 #endif
 545 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 546         case MADV_HUGEPAGE:
 547         case MADV_NOHUGEPAGE:
 548 #endif
 549         case MADV_DONTDUMP:
 550         case MADV_DODUMP:
 551                 return true;
 552
 553         default:
 554                 return false;
 555         }
 556 }
 557
 558 /*
 559  * The madvise(2) system call.
 560  *
 561  * Applications can use madvise() to advise the kernel how it should
 562  * handle paging I/O in this VM area.  The idea is to help the kernel
 563  * use appropriate read-ahead and caching techniques.  The information
 564  * provided is advisory only, and can be safely disregarded by the
 565  * kernel without affecting the correct operation of the application.
 566  *
 567  * behavior values:
 568  *  MADV_NORMAL - the default behavior is to read clusters.  This
 569  *              results in some read-ahead and read-behind.
 570  *  MADV_RANDOM - the system should read the minimum amount of data
 571  *              on any access, since it is unlikely that the appli-
 572  *              cation will need more than what it asks for.
 573  *  MADV_SEQUENTIAL - pages in the given range will probably be accessed
 574  *              once, so they can be aggressively read ahead, and
 575  *              can be freed soon after they are accessed.
 576  *  MADV_WILLNEED - the application is notifying the system to read
 577  *              some pages ahead.
 578  *  MADV_DONTNEED - the application is finished with the given range,
 579  *              so the kernel can free resources associated with it.
 580  *  MADV_REMOVE - the application wants to free up the given range of
 581  *              pages and associated backing store.
 582  *  MADV_DONTFORK - omit this area from child's address space when forking:
 583  *              typically, to avoid COWing pages pinned by get_user_pages().
 584  *  MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
 585  *  MADV_MERGEABLE - the application recommends that KSM try to merge pages in
 586  *              this area with pages of identical content from other such areas.
 587  *  MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
 588  *
 589  * return values:
 590  *  zero    - success
 591  *  -EINVAL - start + len < 0, start is not page-aligned,
 592  *              "behavior" is not a valid value, or application
 593  *              is attempting to release locked or shared pages.
 594  *  -ENOMEM - addresses in the specified range are not currently
 595  *              mapped, or are outside the AS of the process.
 596  *  -EIO    - an I/O error occurred while paging in data.
 597  *  -EBADF  - map exists, but area maps something that isn't a file.
 598  *  -EAGAIN - a kernel resource was temporarily unavailable.
 599  */
 600 SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 601 {
 602         unsigned long end, tmp;
 603         struct vm_area_struct *vma, *prev;
 604         int unmapped_error = 0;
 605         int error = -EINVAL;
 606         int write;
 607         size_t len;
 608         struct blk_plug plug;
 609
 610 #ifdef CONFIG_MEMORY_FAILURE
 611         if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
 612                 return madvise_hwpoison(behavior, start, start+len_in);
 613 #endif
 614         if (!madvise_behavior_valid(behavior))
 615                 return error;
 616
 617         if (start & ~PAGE_MASK)
 618                 return error;
 619         len = (len_in + ~PAGE_MASK) & PAGE_MASK;
 620
 621         /* Check to see whether len was rounded up from small -ve to zero */
 622         if (len_in && !len)
 623                 return error;
 624
 625         end = start + len;
 626         if (end < start)
 627                 return error;
 628
 629         error = 0;
 630         if (end == start)
 631                 return error;
 632
 633         write = madvise_need_mmap_write(behavior);
 634         if (write)
 635                 down_write(&current->mm->mmap_sem);
 636         else
 637                 down_read(&current->mm->mmap_sem);
 638
 639         /*
 640          * If the interval [start,end) covers some unmapped address
 641          * ranges, just ignore them, but return -ENOMEM at the end.
 642          * - different from the way of handling in mlock etc.
 643          */
 644         vma = find_vma_prev(current->mm, start, &prev);
 645         if (vma && start > vma->vm_start)
 646                 prev = vma;
 647
 648         blk_start_plug(&plug);
 649         for (;;) {
 650                 /* Still start < end. */
 651                 error = -ENOMEM;
 652                 if (!vma)
 653                         goto out;
 654
 655                 /* Here start < (end|vma->vm_end). */
 656                 if (start < vma->vm_start) {
 657                         unmapped_error = -ENOMEM;
 658                         start = vma->vm_start;
 659                         if (start >= end)
 660                                 goto out;
 661                 }
 662
 663                 /* Here vma->vm_start <= start < (end|vma->vm_end) */
 664                 tmp = vma->vm_end;
 665                 if (end < tmp)
 666                         tmp = end;
 667
 668                 /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
 669                 error = madvise_vma(vma, &prev, start, tmp, behavior);
 670                 if (error)
 671                         goto out;
 672                 start = tmp;
 673                 if (prev && start < prev->vm_end)
 674                         start = prev->vm_end;
 675                 error = unmapped_error;
 676                 if (start >= end)
 677                         goto out;
 678                 if (prev)
 679                         vma = prev->vm_next;
 680                 else    /* madvise_remove dropped mmap_sem */
 681                         vma = find_vma(current->mm, start);
 682         }
 683 out:
 684         blk_finish_plug(&plug);
 685         if (write)
 686                 up_write(&current->mm->mmap_sem);
 687         else
 688                 up_read(&current->mm->mmap_sem);
 689
 690         return error;
 691 }