block/blk-map.c

   1 /*
   2  * Functions related to mapping data to requests
   3  */
   4 #include <linux/kernel.h>
   5 #include <linux/module.h>
   6 #include <linux/bio.h>
   7 #include <linux/blkdev.h>
   8 #include <linux/scatterlist.h>
   9 #include <linux/slab.h>
  10 #include <scsi/sg.h>            /* for struct sg_iovec */
  11
  12 #include "blk.h"
  13
  14 int blk_rq_append_bio(struct request_queue *q, struct request *rq,
  15                       struct bio *bio)
  16 {
  17         if (!rq->bio)
  18                 blk_rq_bio_prep(q, rq, bio);
  19         else if (!ll_back_merge_fn(q, rq, bio))
  20                 return -EINVAL;
  21         else {
  22                 rq->biotail->bi_next = bio;
  23                 rq->biotail = bio;
  24
  25                 rq->__data_len += bio->bi_size;
  26         }
  27         return 0;
  28 }
  29
  30 static int __blk_rq_unmap_user(struct bio *bio)
  31 {
  32         int ret = 0;
  33
  34         if (bio) {
  35                 if (bio_flagged(bio, BIO_USER_MAPPED))
  36                         bio_unmap_user(bio);
  37                 else
  38                         ret = bio_uncopy_user(bio);
  39         }
  40
  41         return ret;
  42 }
  43
  44 static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
  45                              struct rq_map_data *map_data, void __user *ubuf,
  46                              unsigned int len, gfp_t gfp_mask)
  47 {
  48         unsigned long uaddr;
  49         struct bio *bio, *orig_bio;
  50         int reading, ret;
  51
  52         reading = rq_data_dir(rq) == READ;
  53
  54         /*
  55          * if alignment requirement is satisfied, map in user pages for
  56          * direct dma. else, set up kernel bounce buffers
  57          */
  58         uaddr = (unsigned long) ubuf;
  59         if (blk_rq_aligned(q, uaddr, len) && !map_data)
  60                 bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
  61         else
  62                 bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
  63
  64         if (IS_ERR(bio))
  65                 return PTR_ERR(bio);
  66
  67         if (map_data && map_data->null_mapped)
  68                 bio->bi_flags |= (1 << BIO_NULL_MAPPED);
  69
  70         orig_bio = bio;
  71         blk_queue_bounce(q, &bio);
  72
  73         /*
  74          * We link the bounce buffer in and could have to traverse it
  75          * later so we have to get a ref to prevent it from being freed
  76          */
  77         bio_get(bio);
  78
  79         ret = blk_rq_append_bio(q, rq, bio);
  80         if (!ret)
  81                 return bio->bi_size;
  82
  83         /* if it was boucned we must call the end io function */
  84         bio_endio(bio, 0);
  85         __blk_rq_unmap_user(orig_bio);
  86         bio_put(bio);
  87         return ret;
  88 }
  89
  90 /**
  91  * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
  92  * @q:          request queue where request should be inserted
  93  * @rq:         request structure to fill
  94  * @map_data:   pointer to the rq_map_data holding pages (if necessary)
  95  * @ubuf:       the user buffer
  96  * @len:        length of user data
  97  * @gfp_mask:   memory allocation flags
  98  *
  99  * Description:
 100  *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
 101  *    a kernel bounce buffer is used.
 102  *
 103  *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
 104  *    still in process context.
 105  *
 106  *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
 107  *    before being submitted to the device, as pages mapped may be out of
 108  *    reach. It's the callers responsibility to make sure this happens. The
 109  *    original bio must be passed back in to blk_rq_unmap_user() for proper
 110  *    unmapping.
 111  */
 112 int blk_rq_map_user(struct request_queue *q, struct request *rq,
 113                     struct rq_map_data *map_data, void __user *ubuf,
 114                     unsigned long len, gfp_t gfp_mask)
 115 {
 116         unsigned long bytes_read = 0;
 117         struct bio *bio = NULL;
 118         int ret;
 119
 120         if (len > (queue_max_hw_sectors(q) << 9))
 121                 return -EINVAL;
 122         if (!len)
 123                 return -EINVAL;
 124
 125         if (!ubuf && (!map_data || !map_data->null_mapped))
 126                 return -EINVAL;
 127
 128         while (bytes_read != len) {
 129                 unsigned long map_len, end, start;
 130
 131                 map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE);
 132                 end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1)
 133                                                                 >> PAGE_SHIFT;
 134                 start = (unsigned long)ubuf >> PAGE_SHIFT;
 135
 136                 /*
 137                  * A bad offset could cause us to require BIO_MAX_PAGES + 1
 138                  * pages. If this happens we just lower the requested
 139                  * mapping len by a page so that we can fit
 140                  */
 141                 if (end - start > BIO_MAX_PAGES)
 142                         map_len -= PAGE_SIZE;
 143
 144                 ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
 145                                         gfp_mask);
 146                 if (ret < 0)
 147                         goto unmap_rq;
 148                 if (!bio)
 149                         bio = rq->bio;
 150                 bytes_read += ret;
 151                 ubuf += ret;
 152
 153                 if (map_data)
 154                         map_data->offset += ret;
 155         }
 156
 157         if (!bio_flagged(bio, BIO_USER_MAPPED))
 158                 rq->cmd_flags |= REQ_COPY_USER;
 159
 160         rq->buffer = NULL;
 161         return 0;
 162 unmap_rq:
 163         blk_rq_unmap_user(bio);
 164         rq->bio = NULL;
 165         return ret;
 166 }
 167 EXPORT_SYMBOL(blk_rq_map_user);
 168
 169 /**
 170  * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
 171  * @q:          request queue where request should be inserted
 172  * @rq:         request to map data to
 173  * @map_data:   pointer to the rq_map_data holding pages (if necessary)
 174  * @iov:        pointer to the iovec
 175  * @iov_count:  number of elements in the iovec
 176  * @len:        I/O byte count
 177  * @gfp_mask:   memory allocation flags
 178  *
 179  * Description:
 180  *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
 181  *    a kernel bounce buffer is used.
 182  *
 183  *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
 184  *    still in process context.
 185  *
 186  *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
 187  *    before being submitted to the device, as pages mapped may be out of
 188  *    reach. It's the callers responsibility to make sure this happens. The
 189  *    original bio must be passed back in to blk_rq_unmap_user() for proper
 190  *    unmapping.
 191  */
 192 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 193                         struct rq_map_data *map_data, struct sg_iovec *iov,
 194                         int iov_count, unsigned int len, gfp_t gfp_mask)
 195 {
 196         struct bio *bio;
 197         int i, read = rq_data_dir(rq) == READ;
 198         int unaligned = 0;
 199
 200         if (!iov || iov_count <= 0)
 201                 return -EINVAL;
 202
 203         for (i = 0; i < iov_count; i++) {
 204                 unsigned long uaddr = (unsigned long)iov[i].iov_base;
 205
 206                 if (!iov[i].iov_len)
 207                         return -EINVAL;
 208
 209                 if (uaddr & queue_dma_alignment(q)) {
 210                         unaligned = 1;
 211                         break;
 212                 }
 213         }
 214
 215         if (unaligned || (q->dma_pad_mask & len) || map_data)
 216                 bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
 217                                         gfp_mask);
 218         else
 219                 bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
 220
 221         if (IS_ERR(bio))
 222                 return PTR_ERR(bio);
 223
 224         if (bio->bi_size != len) {
 225                 /*
 226                  * Grab an extra reference to this bio, as bio_unmap_user()
 227                  * expects to be able to drop it twice as it happens on the
 228                  * normal IO completion path
 229                  */
 230                 bio_get(bio);
 231                 bio_endio(bio, 0);
 232                 __blk_rq_unmap_user(bio);
 233                 return -EINVAL;
 234         }
 235
 236         if (!bio_flagged(bio, BIO_USER_MAPPED))
 237                 rq->cmd_flags |= REQ_COPY_USER;
 238
 239         blk_queue_bounce(q, &bio);
 240         bio_get(bio);
 241         blk_rq_bio_prep(q, rq, bio);
 242         rq->buffer = NULL;
 243         return 0;
 244 }
 245 EXPORT_SYMBOL(blk_rq_map_user_iov);
 246
 247 /**
 248  * blk_rq_unmap_user - unmap a request with user data
 249  * @bio:               start of bio list
 250  *
 251  * Description:
 252  *    Unmap a rq previously mapped by blk_rq_map_user(). The caller must
 253  *    supply the original rq->bio from the blk_rq_map_user() return, since
 254  *    the I/O completion may have changed rq->bio.
 255  */
 256 int blk_rq_unmap_user(struct bio *bio)
 257 {
 258         struct bio *mapped_bio;
 259         int ret = 0, ret2;
 260
 261         while (bio) {
 262                 mapped_bio = bio;
 263                 if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
 264                         mapped_bio = bio->bi_private;
 265
 266                 ret2 = __blk_rq_unmap_user(mapped_bio);
 267                 if (ret2 && !ret)
 268                         ret = ret2;
 269
 270                 mapped_bio = bio;
 271                 bio = bio->bi_next;
 272                 bio_put(mapped_bio);
 273         }
 274
 275         return ret;
 276 }
 277 EXPORT_SYMBOL(blk_rq_unmap_user);
 278
 279 struct blk_kern_sg_work {
 280         atomic_t bios_inflight;
 281         struct sg_table sg_table;
 282         struct scatterlist *src_sgl;
 283 };
 284
 285 static void blk_free_kern_sg_work(struct blk_kern_sg_work *bw)
 286 {
 287         sg_free_table(&bw->sg_table);
 288         kfree(bw);
 289         return;
 290 }
 291
 292 static void blk_bio_map_kern_endio(struct bio *bio, int err)
 293 {
 294         struct blk_kern_sg_work *bw = bio->bi_private;
 295
 296         if (bw != NULL) {
 297                 /* Decrement the bios in processing and, if zero, free */
 298                 BUG_ON(atomic_read(&bw->bios_inflight) <= 0);
 299                 if (atomic_dec_and_test(&bw->bios_inflight)) {
 300                         if ((bio_data_dir(bio) == READ) && (err == 0)) {
 301                                 unsigned long flags;
 302
 303                                 local_irq_save(flags);  /* to protect KMs */
 304                                 sg_copy(bw->src_sgl, bw->sg_table.sgl, 0, 0,
 305                                         KM_BIO_DST_IRQ, KM_BIO_SRC_IRQ);
 306                                 local_irq_restore(flags);
 307                         }
 308                         blk_free_kern_sg_work(bw);
 309                 }
 310         }
 311
 312         bio_put(bio);
 313         return;
 314 }
 315
 316 static int blk_rq_copy_kern_sg(struct request *rq, struct scatterlist *sgl,
 317                                int nents, struct blk_kern_sg_work **pbw,
 318                                gfp_t gfp, gfp_t page_gfp)
 319 {
 320         int res = 0, i;
 321         struct scatterlist *sg;
 322         struct scatterlist *new_sgl;
 323         int new_sgl_nents;
 324         size_t len = 0, to_copy;
 325         struct blk_kern_sg_work *bw;
 326
 327         bw = kzalloc(sizeof(*bw), gfp);
 328         if (bw == NULL)
 329                 goto out;
 330
 331         bw->src_sgl = sgl;
 332
 333         for_each_sg(sgl, sg, nents, i)
 334                 len += sg->length;
 335         to_copy = len;
 336
 337         new_sgl_nents = PFN_UP(len);
 338
 339         res = sg_alloc_table(&bw->sg_table, new_sgl_nents, gfp);
 340         if (res != 0)
 341                 goto out_free_bw;
 342
 343         new_sgl = bw->sg_table.sgl;
 344
 345         for_each_sg(new_sgl, sg, new_sgl_nents, i) {
 346                 struct page *pg;
 347
 348                 pg = alloc_page(page_gfp);
 349                 if (pg == NULL)
 350                         goto err_free_new_sgl;
 351
 352                 sg_assign_page(sg, pg);
 353                 sg->length = min_t(size_t, PAGE_SIZE, len);
 354
 355                 len -= PAGE_SIZE;
 356         }
 357
 358         if (rq_data_dir(rq) == WRITE) {
 359                 /*
 360                  * We need to limit amount of copied data to to_copy, because
 361                  * sgl might have the last element in sgl not marked as last in
 362                  * SG chaining.
 363                  */
 364                 sg_copy(new_sgl, sgl, 0, to_copy,
 365                         KM_USER0, KM_USER1);
 366         }
 367
 368         *pbw = bw;
 369         /*
 370          * REQ_COPY_USER name is misleading. It should be something like
 371          * REQ_HAS_TAIL_SPACE_FOR_PADDING.
 372          */
 373         rq->cmd_flags |= REQ_COPY_USER;
 374
 375 out:
 376         return res;
 377
 378 err_free_new_sgl:
 379         for_each_sg(new_sgl, sg, new_sgl_nents, i) {
 380                 struct page *pg = sg_page(sg);
 381                 if (pg == NULL)
 382                         break;
 383                 __free_page(pg);
 384         }
 385         sg_free_table(&bw->sg_table);
 386
 387 out_free_bw:
 388         kfree(bw);
 389         res = -ENOMEM;
 390         goto out;
 391 }
 392
 393 static int __blk_rq_map_kern_sg(struct request *rq, struct scatterlist *sgl,
 394         int nents, struct blk_kern_sg_work *bw, gfp_t gfp)
 395 {
 396         int res;
 397         struct request_queue *q = rq->q;
 398         int rw = rq_data_dir(rq);
 399         int max_nr_vecs, i;
 400         size_t tot_len;
 401         bool need_new_bio;
 402         struct scatterlist *sg, *prev_sg = NULL;
 403         struct bio *bio = NULL, *hbio = NULL, *tbio = NULL;
 404         int bios;
 405
 406         if (unlikely((sgl == NULL) || (sgl->length == 0) || (nents <= 0))) {
 407                 WARN_ON(1);
 408                 res = -EINVAL;
 409                 goto out;
 410         }
 411
 412         /*
 413          * Let's keep each bio allocation inside a single page to decrease
 414          * probability of failure.
 415          */
 416         max_nr_vecs =  min_t(size_t,
 417                 ((PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec)),
 418                 BIO_MAX_PAGES);
 419
 420         need_new_bio = true;
 421         tot_len = 0;
 422         bios = 0;
 423         for_each_sg(sgl, sg, nents, i) {
 424                 struct page *page = sg_page(sg);
 425                 void *page_addr = page_address(page);
 426                 size_t len = sg->length, l;
 427                 size_t offset = sg->offset;
 428
 429                 tot_len += len;
 430                 prev_sg = sg;
 431
 432                 /*
 433                  * Each segment must be aligned on DMA boundary and
 434                  * not on stack. The last one may have unaligned
 435                  * length as long as the total length is aligned to
 436                  * DMA padding alignment.
 437                  */
 438                 if (i == nents - 1)
 439                         l = 0;
 440                 else
 441                         l = len;
 442                 if (((sg->offset | l) & queue_dma_alignment(q)) ||
 443                     (page_addr && object_is_on_stack(page_addr + sg->offset))) {
 444                         res = -EINVAL;
 445                         goto out_free_bios;
 446                 }
 447
 448                 while (len > 0) {
 449                         size_t bytes;
 450                         int rc;
 451
 452                         if (need_new_bio) {
 453                                 bio = bio_kmalloc(gfp, max_nr_vecs);
 454                                 if (bio == NULL) {
 455                                         res = -ENOMEM;
 456                                         goto out_free_bios;
 457                                 }
 458
 459                                 if (rw == WRITE)
 460                                         bio->bi_rw |= 1 << BIO_RW;
 461
 462                                 bios++;
 463                                 bio->bi_private = bw;
 464                                 bio->bi_end_io = blk_bio_map_kern_endio;
 465
 466                                 if (hbio == NULL)
 467                                         hbio = tbio = bio;
 468                                 else
 469                                         tbio = tbio->bi_next = bio;
 470                         }
 471
 472                         bytes = min_t(size_t, len, PAGE_SIZE - offset);
 473
 474                         rc = bio_add_pc_page(q, bio, page, bytes, offset);
 475                         if (rc < bytes) {
 476                                 if (unlikely(need_new_bio || (rc < 0))) {
 477                                         if (rc < 0)
 478                                                 res = rc;
 479                                         else
 480                                                 res = -EIO;
 481                                         goto out_free_bios;
 482                                 } else {
 483                                         need_new_bio = true;
 484                                         len -= rc;
 485                                         offset += rc;
 486                                         continue;
 487                                 }
 488                         }
 489
 490                         need_new_bio = false;
 491                         offset = 0;
 492                         len -= bytes;
 493                         page = nth_page(page, 1);
 494                 }
 495         }
 496
 497         if (hbio == NULL) {
 498                 res = -EINVAL;
 499                 goto out_free_bios;
 500         }
 501
 502         /* Total length must be aligned on DMA padding alignment */
 503         if ((tot_len & q->dma_pad_mask) &&
 504             !(rq->cmd_flags & REQ_COPY_USER)) {
 505                 res = -EINVAL;
 506                 goto out_free_bios;
 507         }
 508
 509         if (bw != NULL)
 510                 atomic_set(&bw->bios_inflight, bios);
 511
 512         while (hbio != NULL) {
 513                 bio = hbio;
 514                 hbio = hbio->bi_next;
 515                 bio->bi_next = NULL;
 516
 517                 blk_queue_bounce(q, &bio);
 518
 519                 res = blk_rq_append_bio(q, rq, bio);
 520                 if (unlikely(res != 0)) {
 521                         bio->bi_next = hbio;
 522                         hbio = bio;
 523                         /* We can have one or more bios bounced */
 524                         goto out_unmap_bios;
 525                 }
 526         }
 527
 528         rq->buffer = NULL;
 529 out:
 530         return res;
 531
 532 out_free_bios:
 533         while (hbio != NULL) {
 534                 bio = hbio;
 535                 hbio = hbio->bi_next;
 536                 bio_put(bio);
 537         }
 538         goto out;
 539
 540 out_unmap_bios:
 541         blk_rq_unmap_kern_sg(rq, res);
 542         goto out;
 543 }
 544
 545 /**
 546  * blk_rq_map_kern_sg - map kernel data to a request, for REQ_TYPE_BLOCK_PC
 547  * @rq:         request to fill
 548  * @sgl:        area to map
 549  * @nents:      number of elements in @sgl
 550  * @gfp:        memory allocation flags
 551  *
 552  * Description:
 553  *    Data will be mapped directly if possible. Otherwise a bounce
 554  *    buffer will be used.
 555  */
 556 int blk_rq_map_kern_sg(struct request *rq, struct scatterlist *sgl,
 557                        int nents, gfp_t gfp)
 558 {
 559         int res;
 560
 561         res = __blk_rq_map_kern_sg(rq, sgl, nents, NULL, gfp);
 562         if (unlikely(res != 0)) {
 563                 struct blk_kern_sg_work *bw = NULL;
 564
 565                 res = blk_rq_copy_kern_sg(rq, sgl, nents, &bw,
 566                                 gfp, rq->q->bounce_gfp | gfp);
 567                 if (unlikely(res != 0))
 568                         goto out;
 569
 570                 res = __blk_rq_map_kern_sg(rq, bw->sg_table.sgl,
 571                                 bw->sg_table.nents, bw, gfp);
 572                 if (res != 0) {
 573                         blk_free_kern_sg_work(bw);
 574                         goto out;
 575                 }
 576         }
 577
 578         rq->buffer = NULL;
 579
 580 out:
 581         return res;
 582 }
 583 EXPORT_SYMBOL(blk_rq_map_kern_sg);
 584
 585 /**
 586  * blk_rq_unmap_kern_sg - unmap a request with kernel sg
 587  * @rq:         request to unmap
 588  * @err:        non-zero error code
 589  *
 590  * Description:
 591  *    Unmap a rq previously mapped by blk_rq_map_kern_sg(). Must be called
 592  *    only in case of an error!
 593  */
 594 void blk_rq_unmap_kern_sg(struct request *rq, int err)
 595 {
 596         struct bio *bio = rq->bio;
 597
 598         while (bio) {
 599                 struct bio *b = bio;
 600                 bio = bio->bi_next;
 601                 b->bi_end_io(b, err);
 602         }
 603         rq->bio = NULL;
 604
 605         return;
 606 }
 607 EXPORT_SYMBOL(blk_rq_unmap_kern_sg);
 608
 609 /**
 610  * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
 611  * @q:          request queue where request should be inserted
 612  * @rq:         request to fill
 613  * @kbuf:       the kernel buffer
 614  * @len:        length of user data
 615  * @gfp_mask:   memory allocation flags
 616  *
 617  * Description:
 618  *    Data will be mapped directly if possible. Otherwise a bounce
 619  *    buffer is used. Can be called multple times to append multple
 620  *    buffers.
 621  */
 622 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 623                     unsigned int len, gfp_t gfp_mask)
 624 {
 625         int reading = rq_data_dir(rq) == READ;
 626         unsigned long addr = (unsigned long) kbuf;
 627         int do_copy = 0;
 628         struct bio *bio;
 629         int ret;
 630
 631         if (len > (queue_max_hw_sectors(q) << 9))
 632                 return -EINVAL;
 633         if (!len || !kbuf)
 634                 return -EINVAL;
 635
 636         do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf);
 637         if (do_copy)
 638                 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
 639         else
 640                 bio = bio_map_kern(q, kbuf, len, gfp_mask);
 641
 642         if (IS_ERR(bio))
 643                 return PTR_ERR(bio);
 644
 645         if (rq_data_dir(rq) == WRITE)
 646                 bio->bi_rw |= REQ_WRITE;
 647
 648         if (do_copy)
 649                 rq->cmd_flags |= REQ_COPY_USER;
 650
 651         ret = blk_rq_append_bio(q, rq, bio);
 652         if (unlikely(ret)) {
 653                 /* request is too big */
 654                 bio_put(bio);
 655                 return ret;
 656         }
 657
 658         blk_queue_bounce(q, &rq->bio);
 659         rq->buffer = NULL;
 660         return 0;
 661 }
 662 EXPORT_SYMBOL(blk_rq_map_kern);