fs/btrfs/raid56.c

   1 /*
   2  * Copyright (C) 2012 Fusion-io  All rights reserved.
   3  * Copyright (C) 2012 Intel Corp. All rights reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public
   7  * License v2 as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public
  15  * License along with this program; if not, write to the
  16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  17  * Boston, MA 021110-1307, USA.
  18  */
  19 #include <linux/sched.h>
  20 #include <linux/wait.h>
  21 #include <linux/bio.h>
  22 #include <linux/slab.h>
  23 #include <linux/buffer_head.h>
  24 #include <linux/blkdev.h>
  25 #include <linux/random.h>
  26 #include <linux/iocontext.h>
  27 #include <linux/capability.h>
  28 #include <linux/ratelimit.h>
  29 #include <linux/kthread.h>
  30 #include <linux/raid/pq.h>
  31 #include <linux/hash.h>
  32 #include <linux/list_sort.h>
  33 #include <linux/raid/xor.h>
  34 #include <linux/vmalloc.h>
  35 #include <asm/div64.h>
  36 #include "ctree.h"
  37 #include "extent_map.h"
  38 #include "disk-io.h"
  39 #include "transaction.h"
  40 #include "print-tree.h"
  41 #include "volumes.h"
  42 #include "raid56.h"
  43 #include "async-thread.h"
  44 #include "check-integrity.h"
  45 #include "rcu-string.h"
  46
  47 /* set when additional merges to this rbio are not allowed */
  48 #define RBIO_RMW_LOCKED_BIT     1
  49
  50 /*
  51  * set when this rbio is sitting in the hash, but it is just a cache
  52  * of past RMW
  53  */
  54 #define RBIO_CACHE_BIT          2
  55
  56 /*
  57  * set when it is safe to trust the stripe_pages for caching
  58  */
  59 #define RBIO_CACHE_READY_BIT    3
  60
  61 #define RBIO_CACHE_SIZE 1024
  62
  63 struct btrfs_raid_bio {
  64         struct btrfs_fs_info *fs_info;
  65         struct btrfs_bio *bbio;
  66
  67         /*
  68          * logical block numbers for the start of each stripe
  69          * The last one or two are p/q.  These are sorted,
  70          * so raid_map[0] is the start of our full stripe
  71          */
  72         u64 *raid_map;
  73
  74         /* while we're doing rmw on a stripe
  75          * we put it into a hash table so we can
  76          * lock the stripe and merge more rbios
  77          * into it.
  78          */
  79         struct list_head hash_list;
  80
  81         /*
  82          * LRU list for the stripe cache
  83          */
  84         struct list_head stripe_cache;
  85
  86         /*
  87          * for scheduling work in the helper threads
  88          */
  89         struct btrfs_work work;
  90
  91         /*
  92          * bio list and bio_list_lock are used
  93          * to add more bios into the stripe
  94          * in hopes of avoiding the full rmw
  95          */
  96         struct bio_list bio_list;
  97         spinlock_t bio_list_lock;
  98
  99         /* also protected by the bio_list_lock, the
 100          * plug list is used by the plugging code
 101          * to collect partial bios while plugged.  The
 102          * stripe locking code also uses it to hand off
 103          * the stripe lock to the next pending IO
 104          */
 105         struct list_head plug_list;
 106
 107         /*
 108          * flags that tell us if it is safe to
 109          * merge with this bio
 110          */
 111         unsigned long flags;
 112
 113         /* size of each individual stripe on disk */
 114         int stripe_len;
 115
 116         /* number of data stripes (no p/q) */
 117         int nr_data;
 118
 119         /*
 120          * set if we're doing a parity rebuild
 121          * for a read from higher up, which is handled
 122          * differently from a parity rebuild as part of
 123          * rmw
 124          */
 125         int read_rebuild;
 126
 127         /* first bad stripe */
 128         int faila;
 129
 130         /* second bad stripe (for raid6 use) */
 131         int failb;
 132
 133         /*
 134          * number of pages needed to represent the full
 135          * stripe
 136          */
 137         int nr_pages;
 138
 139         /*
 140          * size of all the bios in the bio_list.  This
 141          * helps us decide if the rbio maps to a full
 142          * stripe or not
 143          */
 144         int bio_list_bytes;
 145
 146         atomic_t refs;
 147
 148
 149         atomic_t stripes_pending;
 150
 151         atomic_t error;
 152         /*
 153          * these are two arrays of pointers.  We allocate the
 154          * rbio big enough to hold them both and setup their
 155          * locations when the rbio is allocated
 156          */
 157
 158         /* pointers to pages that we allocated for
 159          * reading/writing stripes directly from the disk (including P/Q)
 160          */
 161         struct page **stripe_pages;
 162
 163         /*
 164          * pointers to the pages in the bio_list.  Stored
 165          * here for faster lookup
 166          */
 167         struct page **bio_pages;
 168 };
 169
 170 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
 171 static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
 172 static void rmw_work(struct btrfs_work *work);
 173 static void read_rebuild_work(struct btrfs_work *work);
 174 static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
 175 static void async_read_rebuild(struct btrfs_raid_bio *rbio);
 176 static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
 177 static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
 178 static void __free_raid_bio(struct btrfs_raid_bio *rbio);
 179 static void index_rbio_pages(struct btrfs_raid_bio *rbio);
 180 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
 181
 182 /*
 183  * the stripe hash table is used for locking, and to collect
 184  * bios in hopes of making a full stripe
 185  */
 186 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
 187 {
 188         struct btrfs_stripe_hash_table *table;
 189         struct btrfs_stripe_hash_table *x;
 190         struct btrfs_stripe_hash *cur;
 191         struct btrfs_stripe_hash *h;
 192         int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
 193         int i;
 194         int table_size;
 195
 196         if (info->stripe_hash_table)
 197                 return 0;
 198
 199         /*
 200          * The table is large, starting with order 4 and can go as high as
 201          * order 7 in case lock debugging is turned on.
 202          *
 203          * Try harder to allocate and fallback to vmalloc to lower the chance
 204          * of a failing mount.
 205          */
 206         table_size = sizeof(*table) + sizeof(*h) * num_entries;
 207         table = kzalloc(table_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
 208         if (!table) {
 209                 table = vzalloc(table_size);
 210                 if (!table)
 211                         return -ENOMEM;
 212         }
 213
 214         spin_lock_init(&table->cache_lock);
 215         INIT_LIST_HEAD(&table->stripe_cache);
 216
 217         h = table->table;
 218
 219         for (i = 0; i < num_entries; i++) {
 220                 cur = h + i;
 221                 INIT_LIST_HEAD(&cur->hash_list);
 222                 spin_lock_init(&cur->lock);
 223                 init_waitqueue_head(&cur->wait);
 224         }
 225
 226         x = cmpxchg(&info->stripe_hash_table, NULL, table);
 227         if (x) {
 228                 if (is_vmalloc_addr(x))
 229                         vfree(x);
 230                 else
 231                         kfree(x);
 232         }
 233         return 0;
 234 }
 235
 236 /*
 237  * caching an rbio means to copy anything from the
 238  * bio_pages array into the stripe_pages array.  We
 239  * use the page uptodate bit in the stripe cache array
 240  * to indicate if it has valid data
 241  *
 242  * once the caching is done, we set the cache ready
 243  * bit.
 244  */
 245 static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
 246 {
 247         int i;
 248         char *s;
 249         char *d;
 250         int ret;
 251
 252         ret = alloc_rbio_pages(rbio);
 253         if (ret)
 254                 return;
 255
 256         for (i = 0; i < rbio->nr_pages; i++) {
 257                 if (!rbio->bio_pages[i])
 258                         continue;
 259
 260                 s = kmap(rbio->bio_pages[i]);
 261                 d = kmap(rbio->stripe_pages[i]);
 262
 263                 memcpy(d, s, PAGE_CACHE_SIZE);
 264
 265                 kunmap(rbio->bio_pages[i]);
 266                 kunmap(rbio->stripe_pages[i]);
 267                 SetPageUptodate(rbio->stripe_pages[i]);
 268         }
 269         set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
 270 }
 271
 272 /*
 273  * we hash on the first logical address of the stripe
 274  */
 275 static int rbio_bucket(struct btrfs_raid_bio *rbio)
 276 {
 277         u64 num = rbio->raid_map[0];
 278
 279         /*
 280          * we shift down quite a bit.  We're using byte
 281          * addressing, and most of the lower bits are zeros.
 282          * This tends to upset hash_64, and it consistently
 283          * returns just one or two different values.
 284          *
 285          * shifting off the lower bits fixes things.
 286          */
 287         return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
 288 }
 289
 290 /*
 291  * stealing an rbio means taking all the uptodate pages from the stripe
 292  * array in the source rbio and putting them into the destination rbio
 293  */
 294 static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
 295 {
 296         int i;
 297         struct page *s;
 298         struct page *d;
 299
 300         if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
 301                 return;
 302
 303         for (i = 0; i < dest->nr_pages; i++) {
 304                 s = src->stripe_pages[i];
 305                 if (!s || !PageUptodate(s)) {
 306                         continue;
 307                 }
 308
 309                 d = dest->stripe_pages[i];
 310                 if (d)
 311                         __free_page(d);
 312
 313                 dest->stripe_pages[i] = s;
 314                 src->stripe_pages[i] = NULL;
 315         }
 316 }
 317
 318 /*
 319  * merging means we take the bio_list from the victim and
 320  * splice it into the destination.  The victim should
 321  * be discarded afterwards.
 322  *
 323  * must be called with dest->rbio_list_lock held
 324  */
 325 static void merge_rbio(struct btrfs_raid_bio *dest,
 326                        struct btrfs_raid_bio *victim)
 327 {
 328         bio_list_merge(&dest->bio_list, &victim->bio_list);
 329         dest->bio_list_bytes += victim->bio_list_bytes;
 330         bio_list_init(&victim->bio_list);
 331 }
 332
 333 /*
 334  * used to prune items that are in the cache.  The caller
 335  * must hold the hash table lock.
 336  */
 337 static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
 338 {
 339         int bucket = rbio_bucket(rbio);
 340         struct btrfs_stripe_hash_table *table;
 341         struct btrfs_stripe_hash *h;
 342         int freeit = 0;
 343
 344         /*
 345          * check the bit again under the hash table lock.
 346          */
 347         if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
 348                 return;
 349
 350         table = rbio->fs_info->stripe_hash_table;
 351         h = table->table + bucket;
 352
 353         /* hold the lock for the bucket because we may be
 354          * removing it from the hash table
 355          */
 356         spin_lock(&h->lock);
 357
 358         /*
 359          * hold the lock for the bio list because we need
 360          * to make sure the bio list is empty
 361          */
 362         spin_lock(&rbio->bio_list_lock);
 363
 364         if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
 365                 list_del_init(&rbio->stripe_cache);
 366                 table->cache_size -= 1;
 367                 freeit = 1;
 368
 369                 /* if the bio list isn't empty, this rbio is
 370                  * still involved in an IO.  We take it out
 371                  * of the cache list, and drop the ref that
 372                  * was held for the list.
 373                  *
 374                  * If the bio_list was empty, we also remove
 375                  * the rbio from the hash_table, and drop
 376                  * the corresponding ref
 377                  */
 378                 if (bio_list_empty(&rbio->bio_list)) {
 379                         if (!list_empty(&rbio->hash_list)) {
 380                                 list_del_init(&rbio->hash_list);
 381                                 atomic_dec(&rbio->refs);
 382                                 BUG_ON(!list_empty(&rbio->plug_list));
 383                         }
 384                 }
 385         }
 386
 387         spin_unlock(&rbio->bio_list_lock);
 388         spin_unlock(&h->lock);
 389
 390         if (freeit)
 391                 __free_raid_bio(rbio);
 392 }
 393
 394 /*
 395  * prune a given rbio from the cache
 396  */
 397 static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
 398 {
 399         struct btrfs_stripe_hash_table *table;
 400         unsigned long flags;
 401
 402         if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
 403                 return;
 404
 405         table = rbio->fs_info->stripe_hash_table;
 406
 407         spin_lock_irqsave(&table->cache_lock, flags);
 408         __remove_rbio_from_cache(rbio);
 409         spin_unlock_irqrestore(&table->cache_lock, flags);
 410 }
 411
 412 /*
 413  * remove everything in the cache
 414  */
 415 static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
 416 {
 417         struct btrfs_stripe_hash_table *table;
 418         unsigned long flags;
 419         struct btrfs_raid_bio *rbio;
 420
 421         table = info->stripe_hash_table;
 422
 423         spin_lock_irqsave(&table->cache_lock, flags);
 424         while (!list_empty(&table->stripe_cache)) {
 425                 rbio = list_entry(table->stripe_cache.next,
 426                                   struct btrfs_raid_bio,
 427                                   stripe_cache);
 428                 __remove_rbio_from_cache(rbio);
 429         }
 430         spin_unlock_irqrestore(&table->cache_lock, flags);
 431 }
 432
 433 /*
 434  * remove all cached entries and free the hash table
 435  * used by unmount
 436  */
 437 void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
 438 {
 439         if (!info->stripe_hash_table)
 440                 return;
 441         btrfs_clear_rbio_cache(info);
 442         if (is_vmalloc_addr(info->stripe_hash_table))
 443                 vfree(info->stripe_hash_table);
 444         else
 445                 kfree(info->stripe_hash_table);
 446         info->stripe_hash_table = NULL;
 447 }
 448
 449 /*
 450  * insert an rbio into the stripe cache.  It
 451  * must have already been prepared by calling
 452  * cache_rbio_pages
 453  *
 454  * If this rbio was already cached, it gets
 455  * moved to the front of the lru.
 456  *
 457  * If the size of the rbio cache is too big, we
 458  * prune an item.
 459  */
 460 static void cache_rbio(struct btrfs_raid_bio *rbio)
 461 {
 462         struct btrfs_stripe_hash_table *table;
 463         unsigned long flags;
 464
 465         if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
 466                 return;
 467
 468         table = rbio->fs_info->stripe_hash_table;
 469
 470         spin_lock_irqsave(&table->cache_lock, flags);
 471         spin_lock(&rbio->bio_list_lock);
 472
 473         /* bump our ref if we were not in the list before */
 474         if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
 475                 atomic_inc(&rbio->refs);
 476
 477         if (!list_empty(&rbio->stripe_cache)){
 478                 list_move(&rbio->stripe_cache, &table->stripe_cache);
 479         } else {
 480                 list_add(&rbio->stripe_cache, &table->stripe_cache);
 481                 table->cache_size += 1;
 482         }
 483
 484         spin_unlock(&rbio->bio_list_lock);
 485
 486         if (table->cache_size > RBIO_CACHE_SIZE) {
 487                 struct btrfs_raid_bio *found;
 488
 489                 found = list_entry(table->stripe_cache.prev,
 490                                   struct btrfs_raid_bio,
 491                                   stripe_cache);
 492
 493                 if (found != rbio)
 494                         __remove_rbio_from_cache(found);
 495         }
 496
 497         spin_unlock_irqrestore(&table->cache_lock, flags);
 498         return;
 499 }
 500
 501 /*
 502  * helper function to run the xor_blocks api.  It is only
 503  * able to do MAX_XOR_BLOCKS at a time, so we need to
 504  * loop through.
 505  */
 506 static void run_xor(void **pages, int src_cnt, ssize_t len)
 507 {
 508         int src_off = 0;
 509         int xor_src_cnt = 0;
 510         void *dest = pages[src_cnt];
 511
 512         while(src_cnt > 0) {
 513                 xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
 514                 xor_blocks(xor_src_cnt, len, dest, pages + src_off);
 515
 516                 src_cnt -= xor_src_cnt;
 517                 src_off += xor_src_cnt;
 518         }
 519 }
 520
 521 /*
 522  * returns true if the bio list inside this rbio
 523  * covers an entire stripe (no rmw required).
 524  * Must be called with the bio list lock held, or
 525  * at a time when you know it is impossible to add
 526  * new bios into the list
 527  */
 528 static int __rbio_is_full(struct btrfs_raid_bio *rbio)
 529 {
 530         unsigned long size = rbio->bio_list_bytes;
 531         int ret = 1;
 532
 533         if (size != rbio->nr_data * rbio->stripe_len)
 534                 ret = 0;
 535
 536         BUG_ON(size > rbio->nr_data * rbio->stripe_len);
 537         return ret;
 538 }
 539
 540 static int rbio_is_full(struct btrfs_raid_bio *rbio)
 541 {
 542         unsigned long flags;
 543         int ret;
 544
 545         spin_lock_irqsave(&rbio->bio_list_lock, flags);
 546         ret = __rbio_is_full(rbio);
 547         spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
 548         return ret;
 549 }
 550
 551 /*
 552  * returns 1 if it is safe to merge two rbios together.
 553  * The merging is safe if the two rbios correspond to
 554  * the same stripe and if they are both going in the same
 555  * direction (read vs write), and if neither one is
 556  * locked for final IO
 557  *
 558  * The caller is responsible for locking such that
 559  * rmw_locked is safe to test
 560  */
 561 static int rbio_can_merge(struct btrfs_raid_bio *last,
 562                           struct btrfs_raid_bio *cur)
 563 {
 564         if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) ||
 565             test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
 566                 return 0;
 567
 568         /*
 569          * we can't merge with cached rbios, since the
 570          * idea is that when we merge the destination
 571          * rbio is going to run our IO for us.  We can
 572          * steal from cached rbio's though, other functions
 573          * handle that.
 574          */
 575         if (test_bit(RBIO_CACHE_BIT, &last->flags) ||
 576             test_bit(RBIO_CACHE_BIT, &cur->flags))
 577                 return 0;
 578
 579         if (last->raid_map[0] !=
 580             cur->raid_map[0])
 581                 return 0;
 582
 583         /* reads can't merge with writes */
 584         if (last->read_rebuild !=
 585             cur->read_rebuild) {
 586                 return 0;
 587         }
 588
 589         return 1;
 590 }
 591
 592 /*
 593  * helper to index into the pstripe
 594  */
 595 static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
 596 {
 597         index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
 598         return rbio->stripe_pages[index];
 599 }
 600
 601 /*
 602  * helper to index into the qstripe, returns null
 603  * if there is no qstripe
 604  */
 605 static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
 606 {
 607         if (rbio->nr_data + 1 == rbio->bbio->num_stripes)
 608                 return NULL;
 609
 610         index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
 611                 PAGE_CACHE_SHIFT;
 612         return rbio->stripe_pages[index];
 613 }
 614
 615 /*
 616  * The first stripe in the table for a logical address
 617  * has the lock.  rbios are added in one of three ways:
 618  *
 619  * 1) Nobody has the stripe locked yet.  The rbio is given
 620  * the lock and 0 is returned.  The caller must start the IO
 621  * themselves.
 622  *
 623  * 2) Someone has the stripe locked, but we're able to merge
 624  * with the lock owner.  The rbio is freed and the IO will
 625  * start automatically along with the existing rbio.  1 is returned.
 626  *
 627  * 3) Someone has the stripe locked, but we're not able to merge.
 628  * The rbio is added to the lock owner's plug list, or merged into
 629  * an rbio already on the plug list.  When the lock owner unlocks,
 630  * the next rbio on the list is run and the IO is started automatically.
 631  * 1 is returned
 632  *
 633  * If we return 0, the caller still owns the rbio and must continue with
 634  * IO submission.  If we return 1, the caller must assume the rbio has
 635  * already been freed.
 636  */
 637 static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
 638 {
 639         int bucket = rbio_bucket(rbio);
 640         struct btrfs_stripe_hash *h = rbio->fs_info->stripe_hash_table->table + bucket;
 641         struct btrfs_raid_bio *cur;
 642         struct btrfs_raid_bio *pending;
 643         unsigned long flags;
 644         DEFINE_WAIT(wait);
 645         struct btrfs_raid_bio *freeit = NULL;
 646         struct btrfs_raid_bio *cache_drop = NULL;
 647         int ret = 0;
 648         int walk = 0;
 649
 650         spin_lock_irqsave(&h->lock, flags);
 651         list_for_each_entry(cur, &h->hash_list, hash_list) {
 652                 walk++;
 653                 if (cur->raid_map[0] == rbio->raid_map[0]) {
 654                         spin_lock(&cur->bio_list_lock);
 655
 656                         /* can we steal this cached rbio's pages? */
 657                         if (bio_list_empty(&cur->bio_list) &&
 658                             list_empty(&cur->plug_list) &&
 659                             test_bit(RBIO_CACHE_BIT, &cur->flags) &&
 660                             !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
 661                                 list_del_init(&cur->hash_list);
 662                                 atomic_dec(&cur->refs);
 663
 664                                 steal_rbio(cur, rbio);
 665                                 cache_drop = cur;
 666                                 spin_unlock(&cur->bio_list_lock);
 667
 668                                 goto lockit;
 669                         }
 670
 671                         /* can we merge into the lock owner? */
 672                         if (rbio_can_merge(cur, rbio)) {
 673                                 merge_rbio(cur, rbio);
 674                                 spin_unlock(&cur->bio_list_lock);
 675                                 freeit = rbio;
 676                                 ret = 1;
 677                                 goto out;
 678                         }
 679
 680
 681                         /*
 682                          * we couldn't merge with the running
 683                          * rbio, see if we can merge with the
 684                          * pending ones.  We don't have to
 685                          * check for rmw_locked because there
 686                          * is no way they are inside finish_rmw
 687                          * right now
 688                          */
 689                         list_for_each_entry(pending, &cur->plug_list,
 690                                             plug_list) {
 691                                 if (rbio_can_merge(pending, rbio)) {
 692                                         merge_rbio(pending, rbio);
 693                                         spin_unlock(&cur->bio_list_lock);
 694                                         freeit = rbio;
 695                                         ret = 1;
 696                                         goto out;
 697                                 }
 698                         }
 699
 700                         /* no merging, put us on the tail of the plug list,
 701                          * our rbio will be started with the currently
 702                          * running rbio unlocks
 703                          */
 704                         list_add_tail(&rbio->plug_list, &cur->plug_list);
 705                         spin_unlock(&cur->bio_list_lock);
 706                         ret = 1;
 707                         goto out;
 708                 }
 709         }
 710 lockit:
 711         atomic_inc(&rbio->refs);
 712         list_add(&rbio->hash_list, &h->hash_list);
 713 out:
 714         spin_unlock_irqrestore(&h->lock, flags);
 715         if (cache_drop)
 716                 remove_rbio_from_cache(cache_drop);
 717         if (freeit)
 718                 __free_raid_bio(freeit);
 719         return ret;
 720 }
 721
 722 /*
 723  * called as rmw or parity rebuild is completed.  If the plug list has more
 724  * rbios waiting for this stripe, the next one on the list will be started
 725  */
 726 static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 727 {
 728         int bucket;
 729         struct btrfs_stripe_hash *h;
 730         unsigned long flags;
 731         int keep_cache = 0;
 732
 733         bucket = rbio_bucket(rbio);
 734         h = rbio->fs_info->stripe_hash_table->table + bucket;
 735
 736         if (list_empty(&rbio->plug_list))
 737                 cache_rbio(rbio);
 738
 739         spin_lock_irqsave(&h->lock, flags);
 740         spin_lock(&rbio->bio_list_lock);
 741
 742         if (!list_empty(&rbio->hash_list)) {
 743                 /*
 744                  * if we're still cached and there is no other IO
 745                  * to perform, just leave this rbio here for others
 746                  * to steal from later
 747                  */
 748                 if (list_empty(&rbio->plug_list) &&
 749                     test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
 750                         keep_cache = 1;
 751                         clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
 752                         BUG_ON(!bio_list_empty(&rbio->bio_list));
 753                         goto done;
 754                 }
 755
 756                 list_del_init(&rbio->hash_list);
 757                 atomic_dec(&rbio->refs);
 758
 759                 /*
 760                  * we use the plug list to hold all the rbios
 761                  * waiting for the chance to lock this stripe.
 762                  * hand the lock over to one of them.
 763                  */
 764                 if (!list_empty(&rbio->plug_list)) {
 765                         struct btrfs_raid_bio *next;
 766                         struct list_head *head = rbio->plug_list.next;
 767
 768                         next = list_entry(head, struct btrfs_raid_bio,
 769                                           plug_list);
 770
 771                         list_del_init(&rbio->plug_list);
 772
 773                         list_add(&next->hash_list, &h->hash_list);
 774                         atomic_inc(&next->refs);
 775                         spin_unlock(&rbio->bio_list_lock);
 776                         spin_unlock_irqrestore(&h->lock, flags);
 777
 778                         if (next->read_rebuild)
 779                                 async_read_rebuild(next);
 780                         else {
 781                                 steal_rbio(rbio, next);
 782                                 async_rmw_stripe(next);
 783                         }
 784
 785                         goto done_nolock;
 786                 } else  if (waitqueue_active(&h->wait)) {
 787                         spin_unlock(&rbio->bio_list_lock);
 788                         spin_unlock_irqrestore(&h->lock, flags);
 789                         wake_up(&h->wait);
 790                         goto done_nolock;
 791                 }
 792         }
 793 done:
 794         spin_unlock(&rbio->bio_list_lock);
 795         spin_unlock_irqrestore(&h->lock, flags);
 796
 797 done_nolock:
 798         if (!keep_cache)
 799                 remove_rbio_from_cache(rbio);
 800 }
 801
 802 static void __free_raid_bio(struct btrfs_raid_bio *rbio)
 803 {
 804         int i;
 805
 806         WARN_ON(atomic_read(&rbio->refs) < 0);
 807         if (!atomic_dec_and_test(&rbio->refs))
 808                 return;
 809
 810         WARN_ON(!list_empty(&rbio->stripe_cache));
 811         WARN_ON(!list_empty(&rbio->hash_list));
 812         WARN_ON(!bio_list_empty(&rbio->bio_list));
 813
 814         for (i = 0; i < rbio->nr_pages; i++) {
 815                 if (rbio->stripe_pages[i]) {
 816                         __free_page(rbio->stripe_pages[i]);
 817                         rbio->stripe_pages[i] = NULL;
 818                 }
 819         }
 820         kfree(rbio->raid_map);
 821         kfree(rbio->bbio);
 822         kfree(rbio);
 823 }
 824
 825 static void free_raid_bio(struct btrfs_raid_bio *rbio)
 826 {
 827         unlock_stripe(rbio);
 828         __free_raid_bio(rbio);
 829 }
 830
 831 /*
 832  * this frees the rbio and runs through all the bios in the
 833  * bio_list and calls end_io on them
 834  */
 835 static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
 836 {
 837         struct bio *cur = bio_list_get(&rbio->bio_list);
 838         struct bio *next;
 839         free_raid_bio(rbio);
 840
 841         while (cur) {
 842                 next = cur->bi_next;
 843                 cur->bi_next = NULL;
 844                 if (uptodate)
 845                         set_bit(BIO_UPTODATE, &cur->bi_flags);
 846                 bio_endio(cur, err);
 847                 cur = next;
 848         }
 849 }
 850
 851 /*
 852  * end io function used by finish_rmw.  When we finally
 853  * get here, we've written a full stripe
 854  */
 855 static void raid_write_end_io(struct bio *bio, int err)
 856 {
 857         struct btrfs_raid_bio *rbio = bio->bi_private;
 858
 859         if (err)
 860                 fail_bio_stripe(rbio, bio);
 861
 862         bio_put(bio);
 863
 864         if (!atomic_dec_and_test(&rbio->stripes_pending))
 865                 return;
 866
 867         err = 0;
 868
 869         /* OK, we have read all the stripes we need to. */
 870         if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
 871                 err = -EIO;
 872
 873         rbio_orig_end_io(rbio, err, 0);
 874         return;
 875 }
 876
 877 /*
 878  * the read/modify/write code wants to use the original bio for
 879  * any pages it included, and then use the rbio for everything
 880  * else.  This function decides if a given index (stripe number)
 881  * and page number in that stripe fall inside the original bio
 882  * or the rbio.
 883  *
 884  * if you set bio_list_only, you'll get a NULL back for any ranges
 885  * that are outside the bio_list
 886  *
 887  * This doesn't take any refs on anything, you get a bare page pointer
 888  * and the caller must bump refs as required.
 889  *
 890  * You must call index_rbio_pages once before you can trust
 891  * the answers from this function.
 892  */
 893 static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
 894                                  int index, int pagenr, int bio_list_only)
 895 {
 896         int chunk_page;
 897         struct page *p = NULL;
 898
 899         chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr;
 900
 901         spin_lock_irq(&rbio->bio_list_lock);
 902         p = rbio->bio_pages[chunk_page];
 903         spin_unlock_irq(&rbio->bio_list_lock);
 904
 905         if (p || bio_list_only)
 906                 return p;
 907
 908         return rbio->stripe_pages[chunk_page];
 909 }
 910
 911 /*
 912  * number of pages we need for the entire stripe across all the
 913  * drives
 914  */
 915 static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
 916 {
 917         unsigned long nr = stripe_len * nr_stripes;
 918         return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
 919 }
 920
 921 /*
 922  * allocation and initial setup for the btrfs_raid_bio.  Not
 923  * this does not allocate any pages for rbio->pages.
 924  */
 925 static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
 926                           struct btrfs_bio *bbio, u64 *raid_map,
 927                           u64 stripe_len)
 928 {
 929         struct btrfs_raid_bio *rbio;
 930         int nr_data = 0;
 931         int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes);
 932         void *p;
 933
 934         rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2,
 935                         GFP_NOFS);
 936         if (!rbio) {
 937                 kfree(raid_map);
 938                 kfree(bbio);
 939                 return ERR_PTR(-ENOMEM);
 940         }
 941
 942         bio_list_init(&rbio->bio_list);
 943         INIT_LIST_HEAD(&rbio->plug_list);
 944         spin_lock_init(&rbio->bio_list_lock);
 945         INIT_LIST_HEAD(&rbio->stripe_cache);
 946         INIT_LIST_HEAD(&rbio->hash_list);
 947         rbio->bbio = bbio;
 948         rbio->raid_map = raid_map;
 949         rbio->fs_info = root->fs_info;
 950         rbio->stripe_len = stripe_len;
 951         rbio->nr_pages = num_pages;
 952         rbio->faila = -1;
 953         rbio->failb = -1;
 954         atomic_set(&rbio->refs, 1);
 955         atomic_set(&rbio->error, 0);
 956         atomic_set(&rbio->stripes_pending, 0);
 957
 958         /*
 959          * the stripe_pages and bio_pages array point to the extra
 960          * memory we allocated past the end of the rbio
 961          */
 962         p = rbio + 1;
 963         rbio->stripe_pages = p;
 964         rbio->bio_pages = p + sizeof(struct page *) * num_pages;
 965
 966         if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
 967                 nr_data = bbio->num_stripes - 2;
 968         else
 969                 nr_data = bbio->num_stripes - 1;
 970
 971         rbio->nr_data = nr_data;
 972         return rbio;
 973 }
 974
 975 /* allocate pages for all the stripes in the bio, including parity */
 976 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
 977 {
 978         int i;
 979         struct page *page;
 980
 981         for (i = 0; i < rbio->nr_pages; i++) {
 982                 if (rbio->stripe_pages[i])
 983                         continue;
 984                 page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
 985                 if (!page)
 986                         return -ENOMEM;
 987                 rbio->stripe_pages[i] = page;
 988                 ClearPageUptodate(page);
 989         }
 990         return 0;
 991 }
 992
 993 /* allocate pages for just the p/q stripes */
 994 static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
 995 {
 996         int i;
 997         struct page *page;
 998
 999         i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
1000
1001         for (; i < rbio->nr_pages; i++) {
1002                 if (rbio->stripe_pages[i])
1003                         continue;
1004                 page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1005                 if (!page)
1006                         return -ENOMEM;
1007                 rbio->stripe_pages[i] = page;
1008         }
1009         return 0;
1010 }
1011
1012 /*
1013  * add a single page from a specific stripe into our list of bios for IO
1014  * this will try to merge into existing bios if possible, and returns
1015  * zero if all went well.
1016  */
1017 static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1018                             struct bio_list *bio_list,
1019                             struct page *page,
1020                             int stripe_nr,
1021                             unsigned long page_index,
1022                             unsigned long bio_max_len)
1023 {
1024         struct bio *last = bio_list->tail;
1025         u64 last_end = 0;
1026         int ret;
1027         struct bio *bio;
1028         struct btrfs_bio_stripe *stripe;
1029         u64 disk_start;
1030
1031         stripe = &rbio->bbio->stripes[stripe_nr];
1032         disk_start = stripe->physical + (page_index << PAGE_CACHE_SHIFT);
1033
1034         /* if the device is missing, just fail this stripe */
1035         if (!stripe->dev->bdev)
1036                 return fail_rbio_index(rbio, stripe_nr);
1037
1038         /* see if we can add this page onto our existing bio */
1039         if (last) {
1040                 last_end = (u64)last->bi_iter.bi_sector << 9;
1041                 last_end += last->bi_iter.bi_size;
1042
1043                 /*
1044                  * we can't merge these if they are from different
1045                  * devices or if they are not contiguous
1046                  */
1047                 if (last_end == disk_start && stripe->dev->bdev &&
1048                     test_bit(BIO_UPTODATE, &last->bi_flags) &&
1049                     last->bi_bdev == stripe->dev->bdev) {
1050                         ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0);
1051                         if (ret == PAGE_CACHE_SIZE)
1052                                 return 0;
1053                 }
1054         }
1055
1056         /* put a new bio on the list */
1057         bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
1058         if (!bio)
1059                 return -ENOMEM;
1060
1061         bio->bi_iter.bi_size = 0;
1062         bio->bi_bdev = stripe->dev->bdev;
1063         bio->bi_iter.bi_sector = disk_start >> 9;
1064         set_bit(BIO_UPTODATE, &bio->bi_flags);
1065
1066         bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
1067         bio_list_add(bio_list, bio);
1068         return 0;
1069 }
1070
1071 /*
1072  * while we're doing the read/modify/write cycle, we could
1073  * have errors in reading pages off the disk.  This checks
1074  * for errors and if we're not able to read the page it'll
1075  * trigger parity reconstruction.  The rmw will be finished
1076  * after we've reconstructed the failed stripes
1077  */
1078 static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
1079 {
1080         if (rbio->faila >= 0 || rbio->failb >= 0) {
1081                 BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1);
1082                 __raid56_parity_recover(rbio);
1083         } else {
1084                 finish_rmw(rbio);
1085         }
1086 }
1087
1088 /*
1089  * these are just the pages from the rbio array, not from anything
1090  * the FS sent down to us
1091  */
1092 static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
1093 {
1094         int index;
1095         index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
1096         index += page;
1097         return rbio->stripe_pages[index];
1098 }
1099
1100 /*
1101  * helper function to walk our bio list and populate the bio_pages array with
1102  * the result.  This seems expensive, but it is faster than constantly
1103  * searching through the bio list as we setup the IO in finish_rmw or stripe
1104  * reconstruction.
1105  *
1106  * This must be called before you trust the answers from page_in_rbio
1107  */
1108 static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1109 {
1110         struct bio *bio;
1111         u64 start;
1112         unsigned long stripe_offset;
1113         unsigned long page_index;
1114         struct page *p;
1115         int i;
1116
1117         spin_lock_irq(&rbio->bio_list_lock);
1118         bio_list_for_each(bio, &rbio->bio_list) {
1119                 start = (u64)bio->bi_iter.bi_sector << 9;
1120                 stripe_offset = start - rbio->raid_map[0];
1121                 page_index = stripe_offset >> PAGE_CACHE_SHIFT;
1122
1123                 for (i = 0; i < bio->bi_vcnt; i++) {
1124                         p = bio->bi_io_vec[i].bv_page;
1125                         rbio->bio_pages[page_index + i] = p;
1126                 }
1127         }
1128         spin_unlock_irq(&rbio->bio_list_lock);
1129 }
1130
1131 /*
1132  * this is called from one of two situations.  We either
1133  * have a full stripe from the higher layers, or we've read all
1134  * the missing bits off disk.
1135  *
1136  * This will calculate the parity and then send down any
1137  * changed blocks.
1138  */
1139 static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1140 {
1141         struct btrfs_bio *bbio = rbio->bbio;
1142         void *pointers[bbio->num_stripes];
1143         int stripe_len = rbio->stripe_len;
1144         int nr_data = rbio->nr_data;
1145         int stripe;
1146         int pagenr;
1147         int p_stripe = -1;
1148         int q_stripe = -1;
1149         struct bio_list bio_list;
1150         struct bio *bio;
1151         int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
1152         int ret;
1153
1154         bio_list_init(&bio_list);
1155
1156         if (bbio->num_stripes - rbio->nr_data == 1) {
1157                 p_stripe = bbio->num_stripes - 1;
1158         } else if (bbio->num_stripes - rbio->nr_data == 2) {
1159                 p_stripe = bbio->num_stripes - 2;
1160                 q_stripe = bbio->num_stripes - 1;
1161         } else {
1162                 BUG();
1163         }
1164
1165         /* at this point we either have a full stripe,
1166          * or we've read the full stripe from the drive.
1167          * recalculate the parity and write the new results.
1168          *
1169          * We're not allowed to add any new bios to the
1170          * bio list here, anyone else that wants to
1171          * change this stripe needs to do their own rmw.
1172          */
1173         spin_lock_irq(&rbio->bio_list_lock);
1174         set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1175         spin_unlock_irq(&rbio->bio_list_lock);
1176
1177         atomic_set(&rbio->error, 0);
1178
1179         /*
1180          * now that we've set rmw_locked, run through the
1181          * bio list one last time and map the page pointers
1182          *
1183          * We don't cache full rbios because we're assuming
1184          * the higher layers are unlikely to use this area of
1185          * the disk again soon.  If they do use it again,
1186          * hopefully they will send another full bio.
1187          */
1188         index_rbio_pages(rbio);
1189         if (!rbio_is_full(rbio))
1190                 cache_rbio_pages(rbio);
1191         else
1192                 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
1193
1194         for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
1195                 struct page *p;
1196                 /* first collect one page from each data stripe */
1197                 for (stripe = 0; stripe < nr_data; stripe++) {
1198                         p = page_in_rbio(rbio, stripe, pagenr, 0);
1199                         pointers[stripe] = kmap(p);
1200                 }
1201
1202                 /* then add the parity stripe */
1203                 p = rbio_pstripe_page(rbio, pagenr);
1204                 SetPageUptodate(p);
1205                 pointers[stripe++] = kmap(p);
1206
1207                 if (q_stripe != -1) {
1208
1209                         /*
1210                          * raid6, add the qstripe and call the
1211                          * library function to fill in our p/q
1212                          */
1213                         p = rbio_qstripe_page(rbio, pagenr);
1214                         SetPageUptodate(p);
1215                         pointers[stripe++] = kmap(p);
1216
1217                         raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE,
1218                                                 pointers);
1219                 } else {
1220                         /* raid5 */
1221                         memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
1222                         run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
1223                 }
1224
1225
1226                 for (stripe = 0; stripe < bbio->num_stripes; stripe++)
1227                         kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
1228         }
1229
1230         /*
1231          * time to start writing.  Make bios for everything from the
1232          * higher layers (the bio_list in our rbio) and our p/q.  Ignore
1233          * everything else.
1234          */
1235         for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
1236                 for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
1237                         struct page *page;
1238                         if (stripe < rbio->nr_data) {
1239                                 page = page_in_rbio(rbio, stripe, pagenr, 1);
1240                                 if (!page)
1241                                         continue;
1242                         } else {
1243                                page = rbio_stripe_page(rbio, stripe, pagenr);
1244                         }
1245
1246                         ret = rbio_add_io_page(rbio, &bio_list,
1247                                        page, stripe, pagenr, rbio->stripe_len);
1248                         if (ret)
1249                                 goto cleanup;
1250                 }
1251         }
1252
1253         atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
1254         BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
1255
1256         while (1) {
1257                 bio = bio_list_pop(&bio_list);
1258                 if (!bio)
1259                         break;
1260
1261                 bio->bi_private = rbio;
1262                 bio->bi_end_io = raid_write_end_io;
1263                 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
1264                 submit_bio(WRITE, bio);
1265         }
1266         return;
1267
1268 cleanup:
1269         rbio_orig_end_io(rbio, -EIO, 0);
1270 }
1271
1272 /*
1273  * helper to find the stripe number for a given bio.  Used to figure out which
1274  * stripe has failed.  This expects the bio to correspond to a physical disk,
1275  * so it looks up based on physical sector numbers.
1276  */
1277 static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1278                            struct bio *bio)
1279 {
1280         u64 physical = bio->bi_iter.bi_sector;
1281         u64 stripe_start;
1282         int i;
1283         struct btrfs_bio_stripe *stripe;
1284
1285         physical <<= 9;
1286
1287         for (i = 0; i < rbio->bbio->num_stripes; i++) {
1288                 stripe = &rbio->bbio->stripes[i];
1289                 stripe_start = stripe->physical;
1290                 if (physical >= stripe_start &&
1291                     physical < stripe_start + rbio->stripe_len) {
1292                         return i;
1293                 }
1294         }
1295         return -1;
1296 }
1297
1298 /*
1299  * helper to find the stripe number for a given
1300  * bio (before mapping).  Used to figure out which stripe has
1301  * failed.  This looks up based on logical block numbers.
1302  */
1303 static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1304                                    struct bio *bio)
1305 {
1306         u64 logical = bio->bi_iter.bi_sector;
1307         u64 stripe_start;
1308         int i;
1309
1310         logical <<= 9;
1311
1312         for (i = 0; i < rbio->nr_data; i++) {
1313                 stripe_start = rbio->raid_map[i];
1314                 if (logical >= stripe_start &&
1315                     logical < stripe_start + rbio->stripe_len) {
1316                         return i;
1317                 }
1318         }
1319         return -1;
1320 }
1321
1322 /*
1323  * returns -EIO if we had too many failures
1324  */
1325 static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
1326 {
1327         unsigned long flags;
1328         int ret = 0;
1329
1330         spin_lock_irqsave(&rbio->bio_list_lock, flags);
1331
1332         /* we already know this stripe is bad, move on */
1333         if (rbio->faila == failed || rbio->failb == failed)
1334                 goto out;
1335
1336         if (rbio->faila == -1) {
1337                 /* first failure on this rbio */
1338                 rbio->faila = failed;
1339                 atomic_inc(&rbio->error);
1340         } else if (rbio->failb == -1) {
1341                 /* second failure on this rbio */
1342                 rbio->failb = failed;
1343                 atomic_inc(&rbio->error);
1344         } else {
1345                 ret = -EIO;
1346         }
1347 out:
1348         spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
1349
1350         return ret;
1351 }
1352
1353 /*
1354  * helper to fail a stripe based on a physical disk
1355  * bio.
1356  */
1357 static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
1358                            struct bio *bio)
1359 {
1360         int failed = find_bio_stripe(rbio, bio);
1361
1362         if (failed < 0)
1363                 return -EIO;
1364
1365         return fail_rbio_index(rbio, failed);
1366 }
1367
1368 /*
1369  * this sets each page in the bio uptodate.  It should only be used on private
1370  * rbio pages, nothing that comes in from the higher layers
1371  */
1372 static void set_bio_pages_uptodate(struct bio *bio)
1373 {
1374         int i;
1375         struct page *p;
1376
1377         for (i = 0; i < bio->bi_vcnt; i++) {
1378                 p = bio->bi_io_vec[i].bv_page;
1379                 SetPageUptodate(p);
1380         }
1381 }
1382
1383 /*
1384  * end io for the read phase of the rmw cycle.  All the bios here are physical
1385  * stripe bios we've read from the disk so we can recalculate the parity of the
1386  * stripe.
1387  *
1388  * This will usually kick off finish_rmw once all the bios are read in, but it
1389  * may trigger parity reconstruction if we had any errors along the way
1390  */
1391 static void raid_rmw_end_io(struct bio *bio, int err)
1392 {
1393         struct btrfs_raid_bio *rbio = bio->bi_private;
1394
1395         if (err)
1396                 fail_bio_stripe(rbio, bio);
1397         else
1398                 set_bio_pages_uptodate(bio);
1399
1400         bio_put(bio);
1401
1402         if (!atomic_dec_and_test(&rbio->stripes_pending))
1403                 return;
1404
1405         err = 0;
1406         if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
1407                 goto cleanup;
1408
1409         /*
1410          * this will normally call finish_rmw to start our write
1411          * but if there are any failed stripes we'll reconstruct
1412          * from parity first
1413          */
1414         validate_rbio_for_rmw(rbio);
1415         return;
1416
1417 cleanup:
1418
1419         rbio_orig_end_io(rbio, -EIO, 0);
1420 }
1421
1422 static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1423 {
1424         btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1425                         rmw_work, NULL, NULL);
1426
1427         btrfs_queue_work(rbio->fs_info->rmw_workers,
1428                          &rbio->work);
1429 }
1430
1431 static void async_read_rebuild(struct btrfs_raid_bio *rbio)
1432 {
1433         btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1434                         read_rebuild_work, NULL, NULL);
1435
1436         btrfs_queue_work(rbio->fs_info->rmw_workers,
1437                          &rbio->work);
1438 }
1439
1440 /*
1441  * the stripe must be locked by the caller.  It will
1442  * unlock after all the writes are done
1443  */
1444 static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
1445 {
1446         int bios_to_read = 0;
1447         struct bio_list bio_list;
1448         int ret;
1449         int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
1450         int pagenr;
1451         int stripe;
1452         struct bio *bio;
1453
1454         bio_list_init(&bio_list);
1455
1456         ret = alloc_rbio_pages(rbio);
1457         if (ret)
1458                 goto cleanup;
1459
1460         index_rbio_pages(rbio);
1461
1462         atomic_set(&rbio->error, 0);
1463         /*
1464          * build a list of bios to read all the missing parts of this
1465          * stripe
1466          */
1467         for (stripe = 0; stripe < rbio->nr_data; stripe++) {
1468                 for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1469                         struct page *page;
1470                         /*
1471                          * we want to find all the pages missing from
1472                          * the rbio and read them from the disk.  If
1473                          * page_in_rbio finds a page in the bio list
1474                          * we don't need to read it off the stripe.
1475                          */
1476                         page = page_in_rbio(rbio, stripe, pagenr, 1);
1477                         if (page)
1478                                 continue;
1479
1480                         page = rbio_stripe_page(rbio, stripe, pagenr);
1481                         /*
1482                          * the bio cache may have handed us an uptodate
1483                          * page.  If so, be happy and use it
1484                          */
1485                         if (PageUptodate(page))
1486                                 continue;
1487
1488                         ret = rbio_add_io_page(rbio, &bio_list, page,
1489                                        stripe, pagenr, rbio->stripe_len);
1490                         if (ret)
1491                                 goto cleanup;
1492                 }
1493         }
1494
1495         bios_to_read = bio_list_size(&bio_list);
1496         if (!bios_to_read) {
1497                 /*
1498                  * this can happen if others have merged with
1499                  * us, it means there is nothing left to read.
1500                  * But if there are missing devices it may not be
1501                  * safe to do the full stripe write yet.
1502                  */
1503                 goto finish;
1504         }
1505
1506         /*
1507          * the bbio may be freed once we submit the last bio.  Make sure
1508          * not to touch it after that
1509          */
1510         atomic_set(&rbio->stripes_pending, bios_to_read);
1511         while (1) {
1512                 bio = bio_list_pop(&bio_list);
1513                 if (!bio)
1514                         break;
1515
1516                 bio->bi_private = rbio;
1517                 bio->bi_end_io = raid_rmw_end_io;
1518
1519                 btrfs_bio_wq_end_io(rbio->fs_info, bio,
1520                                     BTRFS_WQ_ENDIO_RAID56);
1521
1522                 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
1523                 submit_bio(READ, bio);
1524         }
1525         /* the actual write will happen once the reads are done */
1526         return 0;
1527
1528 cleanup:
1529         rbio_orig_end_io(rbio, -EIO, 0);
1530         return -EIO;
1531
1532 finish:
1533         validate_rbio_for_rmw(rbio);
1534         return 0;
1535 }
1536
1537 /*
1538  * if the upper layers pass in a full stripe, we thank them by only allocating
1539  * enough pages to hold the parity, and sending it all down quickly.
1540  */
1541 static int full_stripe_write(struct btrfs_raid_bio *rbio)
1542 {
1543         int ret;
1544
1545         ret = alloc_rbio_parity_pages(rbio);
1546         if (ret) {
1547                 __free_raid_bio(rbio);
1548                 return ret;
1549         }
1550
1551         ret = lock_stripe_add(rbio);
1552         if (ret == 0)
1553                 finish_rmw(rbio);
1554         return 0;
1555 }
1556
1557 /*
1558  * partial stripe writes get handed over to async helpers.
1559  * We're really hoping to merge a few more writes into this
1560  * rbio before calculating new parity
1561  */
1562 static int partial_stripe_write(struct btrfs_raid_bio *rbio)
1563 {
1564         int ret;
1565
1566         ret = lock_stripe_add(rbio);
1567         if (ret == 0)
1568                 async_rmw_stripe(rbio);
1569         return 0;
1570 }
1571
1572 /*
1573  * sometimes while we were reading from the drive to
1574  * recalculate parity, enough new bios come into create
1575  * a full stripe.  So we do a check here to see if we can
1576  * go directly to finish_rmw
1577  */
1578 static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
1579 {
1580         /* head off into rmw land if we don't have a full stripe */
1581         if (!rbio_is_full(rbio))
1582                 return partial_stripe_write(rbio);
1583         return full_stripe_write(rbio);
1584 }
1585
1586 /*
1587  * We use plugging call backs to collect full stripes.
1588  * Any time we get a partial stripe write while plugged
1589  * we collect it into a list.  When the unplug comes down,
1590  * we sort the list by logical block number and merge
1591  * everything we can into the same rbios
1592  */
1593 struct btrfs_plug_cb {
1594         struct blk_plug_cb cb;
1595         struct btrfs_fs_info *info;
1596         struct list_head rbio_list;
1597         struct btrfs_work work;
1598 };
1599
1600 /*
1601  * rbios on the plug list are sorted for easier merging.
1602  */
1603 static int plug_cmp(void *priv, struct list_head *a, struct list_head *b)
1604 {
1605         struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
1606                                                  plug_list);
1607         struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
1608                                                  plug_list);
1609         u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
1610         u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
1611
1612         if (a_sector < b_sector)
1613                 return -1;
1614         if (a_sector > b_sector)
1615                 return 1;
1616         return 0;
1617 }
1618
1619 static void run_plug(struct btrfs_plug_cb *plug)
1620 {
1621         struct btrfs_raid_bio *cur;
1622         struct btrfs_raid_bio *last = NULL;
1623
1624         /*
1625          * sort our plug list then try to merge
1626          * everything we can in hopes of creating full
1627          * stripes.
1628          */
1629         list_sort(NULL, &plug->rbio_list, plug_cmp);
1630         while (!list_empty(&plug->rbio_list)) {
1631                 cur = list_entry(plug->rbio_list.next,
1632                                  struct btrfs_raid_bio, plug_list);
1633                 list_del_init(&cur->plug_list);
1634
1635                 if (rbio_is_full(cur)) {
1636                         /* we have a full stripe, send it down */
1637                         full_stripe_write(cur);
1638                         continue;
1639                 }
1640                 if (last) {
1641                         if (rbio_can_merge(last, cur)) {
1642                                 merge_rbio(last, cur);
1643                                 __free_raid_bio(cur);
1644                                 continue;
1645
1646                         }
1647                         __raid56_parity_write(last);
1648                 }
1649                 last = cur;
1650         }
1651         if (last) {
1652                 __raid56_parity_write(last);
1653         }
1654         kfree(plug);
1655 }
1656
1657 /*
1658  * if the unplug comes from schedule, we have to push the
1659  * work off to a helper thread
1660  */
1661 static void unplug_work(struct btrfs_work *work)
1662 {
1663         struct btrfs_plug_cb *plug;
1664         plug = container_of(work, struct btrfs_plug_cb, work);
1665         run_plug(plug);
1666 }
1667
1668 static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1669 {
1670         struct btrfs_plug_cb *plug;
1671         plug = container_of(cb, struct btrfs_plug_cb, cb);
1672
1673         if (from_schedule) {
1674                 btrfs_init_work(&plug->work, btrfs_rmw_helper,
1675                                 unplug_work, NULL, NULL);
1676                 btrfs_queue_work(plug->info->rmw_workers,
1677                                  &plug->work);
1678                 return;
1679         }
1680         run_plug(plug);
1681 }
1682
1683 /*
1684  * our main entry point for writes from the rest of the FS.
1685  */
1686 int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1687                         struct btrfs_bio *bbio, u64 *raid_map,
1688                         u64 stripe_len)
1689 {
1690         struct btrfs_raid_bio *rbio;
1691         struct btrfs_plug_cb *plug = NULL;
1692         struct blk_plug_cb *cb;
1693
1694         rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
1695         if (IS_ERR(rbio))
1696                 return PTR_ERR(rbio);
1697         bio_list_add(&rbio->bio_list, bio);
1698         rbio->bio_list_bytes = bio->bi_iter.bi_size;
1699
1700         /*
1701          * don't plug on full rbios, just get them out the door
1702          * as quickly as we can
1703          */
1704         if (rbio_is_full(rbio))
1705                 return full_stripe_write(rbio);
1706
1707         cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info,
1708                                sizeof(*plug));
1709         if (cb) {
1710                 plug = container_of(cb, struct btrfs_plug_cb, cb);
1711                 if (!plug->info) {
1712                         plug->info = root->fs_info;
1713                         INIT_LIST_HEAD(&plug->rbio_list);
1714                 }
1715                 list_add_tail(&rbio->plug_list, &plug->rbio_list);
1716         } else {
1717                 return __raid56_parity_write(rbio);
1718         }
1719         return 0;
1720 }
1721
1722 /*
1723  * all parity reconstruction happens here.  We've read in everything
1724  * we can find from the drives and this does the heavy lifting of
1725  * sorting the good from the bad.
1726  */
1727 static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1728 {
1729         int pagenr, stripe;
1730         void **pointers;
1731         int faila = -1, failb = -1;
1732         int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
1733         struct page *page;
1734         int err;
1735         int i;
1736
1737         pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *),
1738                            GFP_NOFS);
1739         if (!pointers) {
1740                 err = -ENOMEM;
1741                 goto cleanup_io;
1742         }
1743
1744         faila = rbio->faila;
1745         failb = rbio->failb;
1746
1747         if (rbio->read_rebuild) {
1748                 spin_lock_irq(&rbio->bio_list_lock);
1749                 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1750                 spin_unlock_irq(&rbio->bio_list_lock);
1751         }
1752
1753         index_rbio_pages(rbio);
1754
1755         for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1756                 /* setup our array of pointers with pages
1757                  * from each stripe
1758                  */
1759                 for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
1760                         /*
1761                          * if we're rebuilding a read, we have to use
1762                          * pages from the bio list
1763                          */
1764                         if (rbio->read_rebuild &&
1765                             (stripe == faila || stripe == failb)) {
1766                                 page = page_in_rbio(rbio, stripe, pagenr, 0);
1767                         } else {
1768                                 page = rbio_stripe_page(rbio, stripe, pagenr);
1769                         }
1770                         pointers[stripe] = kmap(page);
1771                 }
1772
1773                 /* all raid6 handling here */
1774                 if (rbio->raid_map[rbio->bbio->num_stripes - 1] ==
1775                     RAID6_Q_STRIPE) {
1776
1777                         /*
1778                          * single failure, rebuild from parity raid5
1779                          * style
1780                          */
1781                         if (failb < 0) {
1782                                 if (faila == rbio->nr_data) {
1783                                         /*
1784                                          * Just the P stripe has failed, without
1785                                          * a bad data or Q stripe.
1786                                          * TODO, we should redo the xor here.
1787                                          */
1788                                         err = -EIO;
1789                                         goto cleanup;
1790                                 }
1791                                 /*
1792                                  * a single failure in raid6 is rebuilt
1793                                  * in the pstripe code below
1794                                  */
1795                                 goto pstripe;
1796                         }
1797
1798                         /* make sure our ps and qs are in order */
1799                         if (faila > failb) {
1800                                 int tmp = failb;
1801                                 failb = faila;
1802                                 faila = tmp;
1803                         }
1804
1805                         /* if the q stripe is failed, do a pstripe reconstruction
1806                          * from the xors.
1807                          * If both the q stripe and the P stripe are failed, we're
1808                          * here due to a crc mismatch and we can't give them the
1809                          * data they want
1810                          */
1811                         if (rbio->raid_map[failb] == RAID6_Q_STRIPE) {
1812                                 if (rbio->raid_map[faila] == RAID5_P_STRIPE) {
1813                                         err = -EIO;
1814                                         goto cleanup;
1815                                 }
1816                                 /*
1817                                  * otherwise we have one bad data stripe and
1818                                  * a good P stripe.  raid5!
1819                                  */
1820                                 goto pstripe;
1821                         }
1822
1823                         if (rbio->raid_map[failb] == RAID5_P_STRIPE) {
1824                                 raid6_datap_recov(rbio->bbio->num_stripes,
1825                                                   PAGE_SIZE, faila, pointers);
1826                         } else {
1827                                 raid6_2data_recov(rbio->bbio->num_stripes,
1828                                                   PAGE_SIZE, faila, failb,
1829                                                   pointers);
1830                         }
1831                 } else {
1832                         void *p;
1833
1834                         /* rebuild from P stripe here (raid5 or raid6) */
1835                         BUG_ON(failb != -1);
1836 pstripe:
1837                         /* Copy parity block into failed block to start with */
1838                         memcpy(pointers[faila],
1839                                pointers[rbio->nr_data],
1840                                PAGE_CACHE_SIZE);
1841
1842                         /* rearrange the pointer array */
1843                         p = pointers[faila];
1844                         for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
1845                                 pointers[stripe] = pointers[stripe + 1];
1846                         pointers[rbio->nr_data - 1] = p;
1847
1848                         /* xor in the rest */
1849                         run_xor(pointers, rbio->nr_data - 1, PAGE_CACHE_SIZE);
1850                 }
1851                 /* if we're doing this rebuild as part of an rmw, go through
1852                  * and set all of our private rbio pages in the
1853                  * failed stripes as uptodate.  This way finish_rmw will
1854                  * know they can be trusted.  If this was a read reconstruction,
1855                  * other endio functions will fiddle the uptodate bits
1856                  */
1857                 if (!rbio->read_rebuild) {
1858                         for (i = 0;  i < nr_pages; i++) {
1859                                 if (faila != -1) {
1860                                         page = rbio_stripe_page(rbio, faila, i);
1861                                         SetPageUptodate(page);
1862                                 }
1863                                 if (failb != -1) {
1864                                         page = rbio_stripe_page(rbio, failb, i);
1865                                         SetPageUptodate(page);
1866                                 }
1867                         }
1868                 }
1869                 for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
1870                         /*
1871                          * if we're rebuilding a read, we have to use
1872                          * pages from the bio list
1873                          */
1874                         if (rbio->read_rebuild &&
1875                             (stripe == faila || stripe == failb)) {
1876                                 page = page_in_rbio(rbio, stripe, pagenr, 0);
1877                         } else {
1878                                 page = rbio_stripe_page(rbio, stripe, pagenr);
1879                         }
1880                         kunmap(page);
1881                 }
1882         }
1883
1884         err = 0;
1885 cleanup:
1886         kfree(pointers);
1887
1888 cleanup_io:
1889
1890         if (rbio->read_rebuild) {
1891                 if (err == 0)
1892                         cache_rbio_pages(rbio);
1893                 else
1894                         clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
1895
1896                 rbio_orig_end_io(rbio, err, err == 0);
1897         } else if (err == 0) {
1898                 rbio->faila = -1;
1899                 rbio->failb = -1;
1900                 finish_rmw(rbio);
1901         } else {
1902                 rbio_orig_end_io(rbio, err, 0);
1903         }
1904 }
1905
1906 /*
1907  * This is called only for stripes we've read from disk to
1908  * reconstruct the parity.
1909  */
1910 static void raid_recover_end_io(struct bio *bio, int err)
1911 {
1912         struct btrfs_raid_bio *rbio = bio->bi_private;
1913
1914         /*
1915          * we only read stripe pages off the disk, set them
1916          * up to date if there were no errors
1917          */
1918         if (err)
1919                 fail_bio_stripe(rbio, bio);
1920         else
1921                 set_bio_pages_uptodate(bio);
1922         bio_put(bio);
1923
1924         if (!atomic_dec_and_test(&rbio->stripes_pending))
1925                 return;
1926
1927         if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
1928                 rbio_orig_end_io(rbio, -EIO, 0);
1929         else
1930                 __raid_recover_end_io(rbio);
1931 }
1932
1933 /*
1934  * reads everything we need off the disk to reconstruct
1935  * the parity. endio handlers trigger final reconstruction
1936  * when the IO is done.
1937  *
1938  * This is used both for reads from the higher layers and for
1939  * parity construction required to finish a rmw cycle.
1940  */
1941 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
1942 {
1943         int bios_to_read = 0;
1944         struct btrfs_bio *bbio = rbio->bbio;
1945         struct bio_list bio_list;
1946         int ret;
1947         int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
1948         int pagenr;
1949         int stripe;
1950         struct bio *bio;
1951
1952         bio_list_init(&bio_list);
1953
1954         ret = alloc_rbio_pages(rbio);
1955         if (ret)
1956                 goto cleanup;
1957
1958         atomic_set(&rbio->error, 0);
1959
1960         /*
1961          * read everything that hasn't failed.  Thanks to the
1962          * stripe cache, it is possible that some or all of these
1963          * pages are going to be uptodate.
1964          */
1965         for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
1966                 if (rbio->faila == stripe || rbio->failb == stripe) {
1967                         atomic_inc(&rbio->error);
1968                         continue;
1969                 }
1970
1971                 for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1972                         struct page *p;
1973
1974                         /*
1975                          * the rmw code may have already read this
1976                          * page in
1977                          */
1978                         p = rbio_stripe_page(rbio, stripe, pagenr);
1979                         if (PageUptodate(p))
1980                                 continue;
1981
1982                         ret = rbio_add_io_page(rbio, &bio_list,
1983                                        rbio_stripe_page(rbio, stripe, pagenr),
1984                                        stripe, pagenr, rbio->stripe_len);
1985                         if (ret < 0)
1986                                 goto cleanup;
1987                 }
1988         }
1989
1990         bios_to_read = bio_list_size(&bio_list);
1991         if (!bios_to_read) {
1992                 /*
1993                  * we might have no bios to read just because the pages
1994                  * were up to date, or we might have no bios to read because
1995                  * the devices were gone.
1996                  */
1997                 if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
1998                         __raid_recover_end_io(rbio);
1999                         goto out;
2000                 } else {
2001                         goto cleanup;
2002                 }
2003         }
2004
2005         /*
2006          * the bbio may be freed once we submit the last bio.  Make sure
2007          * not to touch it after that
2008          */
2009         atomic_set(&rbio->stripes_pending, bios_to_read);
2010         while (1) {
2011                 bio = bio_list_pop(&bio_list);
2012                 if (!bio)
2013                         break;
2014
2015                 bio->bi_private = rbio;
2016                 bio->bi_end_io = raid_recover_end_io;
2017
2018                 btrfs_bio_wq_end_io(rbio->fs_info, bio,
2019                                     BTRFS_WQ_ENDIO_RAID56);
2020
2021                 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
2022                 submit_bio(READ, bio);
2023         }
2024 out:
2025         return 0;
2026
2027 cleanup:
2028         if (rbio->read_rebuild)
2029                 rbio_orig_end_io(rbio, -EIO, 0);
2030         return -EIO;
2031 }
2032
2033 /*
2034  * the main entry point for reads from the higher layers.  This
2035  * is really only called when the normal read path had a failure,
2036  * so we assume the bio they send down corresponds to a failed part
2037  * of the drive.
2038  */
2039 int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2040                           struct btrfs_bio *bbio, u64 *raid_map,
2041                           u64 stripe_len, int mirror_num)
2042 {
2043         struct btrfs_raid_bio *rbio;
2044         int ret;
2045
2046         rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
2047         if (IS_ERR(rbio))
2048                 return PTR_ERR(rbio);
2049
2050         rbio->read_rebuild = 1;
2051         bio_list_add(&rbio->bio_list, bio);
2052         rbio->bio_list_bytes = bio->bi_iter.bi_size;
2053
2054         rbio->faila = find_logical_bio_stripe(rbio, bio);
2055         if (rbio->faila == -1) {
2056                 BUG();
2057                 kfree(raid_map);
2058                 kfree(bbio);
2059                 kfree(rbio);
2060                 return -EIO;
2061         }
2062
2063         /*
2064          * reconstruct from the q stripe if they are
2065          * asking for mirror 3
2066          */
2067         if (mirror_num == 3)
2068                 rbio->failb = bbio->num_stripes - 2;
2069
2070         ret = lock_stripe_add(rbio);
2071
2072         /*
2073          * __raid56_parity_recover will end the bio with
2074          * any errors it hits.  We don't want to return
2075          * its error value up the stack because our caller
2076          * will end up calling bio_endio with any nonzero
2077          * return
2078          */
2079         if (ret == 0)
2080                 __raid56_parity_recover(rbio);
2081         /*
2082          * our rbio has been added to the list of
2083          * rbios that will be handled after the
2084          * currently lock owner is done
2085          */
2086         return 0;
2087
2088 }
2089
2090 static void rmw_work(struct btrfs_work *work)
2091 {
2092         struct btrfs_raid_bio *rbio;
2093
2094         rbio = container_of(work, struct btrfs_raid_bio, work);
2095         raid56_rmw_stripe(rbio);
2096 }
2097
2098 static void read_rebuild_work(struct btrfs_work *work)
2099 {
2100         struct btrfs_raid_bio *rbio;
2101
2102         rbio = container_of(work, struct btrfs_raid_bio, work);
2103         __raid56_parity_recover(rbio);
2104 }