From: Al Viro Date: Thu, 8 Oct 2015 22:17:26 +0000 (-0400) Subject: orangefs: don't bother with splitting iovecs X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=dc4067f671231eea971298cb44f687a30e04d0fd;p=linux-beck.git orangefs: don't bother with splitting iovecs copy_page_{to,from}_iter() advances it just fine *and* it has no problem with partially consumed segments. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 9a439b2e8bde..ff7fe37f5a22 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -256,168 +256,6 @@ out: return ret; } -/* - * The reason we need to do this is to be able to support readv and writev - * that are larger than (pvfs_bufmap_size_query()) Default is - * PVFS2_BUFMAP_DEFAULT_DESC_SIZE MB. What that means is that we will - * create a new io vec descriptor for those memory addresses that - * go beyond the limit. Return value for this routine is negative in case - * of errors and 0 in case of success. - * - * Further, the new_nr_segs pointer is updated to hold the new value - * of number of iovecs, the new_vec pointer is updated to hold the pointer - * to the new split iovec, and the size array is an array of integers holding - * the number of iovecs that straddle pvfs_bufmap_size_query(). - * The max_new_nr_segs value is computed by the caller and returned. - * (It will be (count of all iov_len/ block_size) + 1). - */ -static int split_iovecs(unsigned long max_new_nr_segs, /* IN */ - unsigned long nr_segs, /* IN */ - const struct iovec *original_iovec, /* IN */ - unsigned long *new_nr_segs, /* OUT */ - struct iovec **new_vec, /* OUT */ - unsigned long *seg_count, /* OUT */ - unsigned long **seg_array) /* OUT */ -{ - unsigned long seg; - unsigned long count = 0; - unsigned long begin_seg; - unsigned long tmpnew_nr_segs = 0; - struct iovec *new_iovec = NULL; - struct iovec *orig_iovec; - unsigned long *sizes = NULL; - unsigned long sizes_count = 0; - - if (nr_segs <= 0 || - original_iovec == NULL || - new_nr_segs == NULL || - new_vec == NULL || - seg_count == NULL || - seg_array == NULL || - max_new_nr_segs <= 0) { - gossip_err("Invalid parameters to split_iovecs\n"); - return -EINVAL; - } - *new_nr_segs = 0; - *new_vec = NULL; - *seg_count = 0; - *seg_array = NULL; - /* copy the passed in iovec descriptor to a temp structure */ - orig_iovec = kmalloc_array(nr_segs, - sizeof(*orig_iovec), - PVFS2_BUFMAP_GFP_FLAGS); - if (orig_iovec == NULL) { - gossip_err( - "split_iovecs: Could not allocate memory for %lu bytes!\n", - (unsigned long)(nr_segs * sizeof(*orig_iovec))); - return -ENOMEM; - } - new_iovec = kcalloc(max_new_nr_segs, - sizeof(*new_iovec), - PVFS2_BUFMAP_GFP_FLAGS); - if (new_iovec == NULL) { - kfree(orig_iovec); - gossip_err( - "split_iovecs: Could not allocate memory for %lu bytes!\n", - (unsigned long)(max_new_nr_segs * sizeof(*new_iovec))); - return -ENOMEM; - } - sizes = kcalloc(max_new_nr_segs, - sizeof(*sizes), - PVFS2_BUFMAP_GFP_FLAGS); - if (sizes == NULL) { - kfree(new_iovec); - kfree(orig_iovec); - gossip_err( - "split_iovecs: Could not allocate memory for %lu bytes!\n", - (unsigned long)(max_new_nr_segs * sizeof(*sizes))); - return -ENOMEM; - } - /* copy the passed in iovec to a temp structure */ - memcpy(orig_iovec, original_iovec, nr_segs * sizeof(*orig_iovec)); - begin_seg = 0; -repeat: - for (seg = begin_seg; seg < nr_segs; seg++) { - if (tmpnew_nr_segs >= max_new_nr_segs || - sizes_count >= max_new_nr_segs) { - kfree(sizes); - kfree(orig_iovec); - kfree(new_iovec); - gossip_err - ("split_iovecs: exceeded the index limit (%lu)\n", - tmpnew_nr_segs); - return -EINVAL; - } - if (count + orig_iovec[seg].iov_len < - pvfs_bufmap_size_query()) { - count += orig_iovec[seg].iov_len; - memcpy(&new_iovec[tmpnew_nr_segs], - &orig_iovec[seg], - sizeof(*new_iovec)); - tmpnew_nr_segs++; - sizes[sizes_count]++; - } else { - new_iovec[tmpnew_nr_segs].iov_base = - orig_iovec[seg].iov_base; - new_iovec[tmpnew_nr_segs].iov_len = - (pvfs_bufmap_size_query() - count); - tmpnew_nr_segs++; - sizes[sizes_count]++; - sizes_count++; - begin_seg = seg; - orig_iovec[seg].iov_base += - (pvfs_bufmap_size_query() - count); - orig_iovec[seg].iov_len -= - (pvfs_bufmap_size_query() - count); - count = 0; - break; - } - } - if (seg != nr_segs) - goto repeat; - else - sizes_count++; - - *new_nr_segs = tmpnew_nr_segs; - /* new_iovec is freed by the caller */ - *new_vec = new_iovec; - *seg_count = sizes_count; - /* seg_array is also freed by the caller */ - *seg_array = sizes; - kfree(orig_iovec); - return 0; -} - -static long bound_max_iovecs(const struct iovec *curr, unsigned long nr_segs, - ssize_t *total_count) -{ - unsigned long i; - long max_nr_iovecs; - ssize_t total; - ssize_t count; - - total = 0; - count = 0; - max_nr_iovecs = 0; - for (i = 0; i < nr_segs; i++) { - const struct iovec *iv = &curr[i]; - - count += iv->iov_len; - if (unlikely((ssize_t) (count | iv->iov_len) < 0)) - return -EINVAL; - if (total + iv->iov_len < pvfs_bufmap_size_query()) { - total += iv->iov_len; - max_nr_iovecs++; - } else { - total = - (total + iv->iov_len - pvfs_bufmap_size_query()); - max_nr_iovecs += (total / pvfs_bufmap_size_query() + 2); - } - } - *total_count = count; - return max_nr_iovecs; -} - /* * Common entry point for read/write/readv/writev * This function will dispatch it to either the direct I/O @@ -431,25 +269,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, struct inode *inode = file->f_mapping->host; struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; - ssize_t ret; - ssize_t total_count; - unsigned int to_free; - size_t count; - unsigned long seg; - unsigned long new_nr_segs; - unsigned long max_new_nr_segs; - unsigned long seg_count; - unsigned long *seg_array; - struct iovec *iovecptr; - struct iovec *ptr; - - total_count = 0; - ret = -EINVAL; - count = 0; - to_free = 0; - - /* Compute total and max number of segments after split */ - max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count); + struct iov_iter iter; + size_t count = iov_length(iov, nr_segs); + ssize_t total_count = 0; + ssize_t ret = -EINVAL; gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", @@ -472,93 +295,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, goto out; } - /* - * if the total size of data transfer requested is greater than - * the kernel-set blocksize of PVFS2, then we split the iovecs - * such that no iovec description straddles a block size limit - */ - - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: pvfs_bufmap_size:%d\n", - __func__, - pvfs_bufmap_size_query()); - - if (count > pvfs_bufmap_size_query()) { - /* - * Split up the given iovec description such that - * no iovec descriptor straddles over the block-size limitation. - * This makes us our job easier to stage the I/O. - * In addition, this function will also compute an array - * with seg_count entries that will store the number of - * segments that straddle the block-size boundaries. - */ - ret = split_iovecs(max_new_nr_segs, /* IN */ - nr_segs, /* IN */ - iov, /* IN */ - &new_nr_segs, /* OUT */ - &iovecptr, /* OUT */ - &seg_count, /* OUT */ - &seg_array); /* OUT */ - if (ret < 0) { - gossip_err("%s: Failed to split iovecs to satisfy larger than blocksize readv/writev request %zd\n", - __func__, - ret); - goto out; - } - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: Splitting iovecs from %lu to %lu" - " [max_new %lu]\n", - __func__, - nr_segs, - new_nr_segs, - max_new_nr_segs); - /* We must free seg_array and iovecptr */ - to_free = 1; - } else { - new_nr_segs = nr_segs; - /* use the given iovec description */ - iovecptr = (struct iovec *)iov; - /* There is only 1 element in the seg_array */ - seg_count = 1; - /* and its value is the number of segments passed in */ - seg_array = &nr_segs; - /* We dont have to free up anything */ - to_free = 0; - } - ptr = iovecptr; + iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, + iov, nr_segs, count); - gossip_debug(GOSSIP_FILE_DEBUG, - "%s(%pU) %zd@%llu\n", - __func__, - handle, - count, - llu(*offset)); - gossip_debug(GOSSIP_FILE_DEBUG, - "%s(%pU): new_nr_segs: %lu, seg_count: %lu\n", - __func__, - handle, - new_nr_segs, seg_count); - -/* PVFS2_KERNEL_DEBUG is a CFLAGS define. */ -#ifdef PVFS2_KERNEL_DEBUG - for (seg = 0; seg < new_nr_segs; seg++) - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: %d) %p to %p [%d bytes]\n", - __func__, - (int)seg + 1, - iovecptr[seg].iov_base, - iovecptr[seg].iov_base + iovecptr[seg].iov_len, - (int)iovecptr[seg].iov_len); - for (seg = 0; seg < seg_count; seg++) - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: %zd) %lu\n", - __func__, - seg + 1, - seg_array[seg]); -#endif - seg = 0; while (total_count < count) { - struct iov_iter iter; size_t each_count; size_t amt_complete; @@ -579,9 +319,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, handle, (int)*offset); - iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, - ptr, seg_array[seg], each_count); - ret = wait_for_direct_io(type, inode, offset, &iter, each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, @@ -593,9 +330,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, if (ret < 0) goto out; - /* advance the iovec pointer */ - ptr += seg_array[seg]; - seg++; *offset += ret; total_count += ret; amt_complete = ret; @@ -617,10 +351,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, if (total_count > 0) ret = total_count; out: - if (to_free) { - kfree(iovecptr); - kfree(seg_array); - } if (ret > 0) { if (type == PVFS_IO_READ) { file_accessed(file);