Merge branch 'work.splice' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 2 May 2017 18:38:06 +0000 (11:38 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 2 May 2017 18:38:06 +0000 (11:38 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 2 May 2017 18:38:06 +0000 (11:38 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 2 May 2017 18:38:06 +0000 (11:38 -0700)
diff --combined fs/splice.c

index 006ba50f4ece671f48367b644641ab58b04b65d8,712bc902e4149e5b28912044ff2a38a0af727c24..540c4a44756c20cdd0e7e2f3eb3db98712858210
--- 1/fs/splice.c
--- 2/fs/splice.c
+++ b/fs/splice.c
@@@ -33,8 -33,6 +33,8 @@@
   #include <linux/gfp.h>
   #include <linux/socket.h>
   #include <linux/compat.h>
+ +#include <linux/sched/signal.h>
+ +
   #include "internal.h"
   
   /*
@@@ -206,7 -204,6 +206,7 @@@ ssize_t splice_to_pipe(struct pipe_inod
                 buf->len = spd->partial[page_nr].len;
                 buf->private = spd->partial[page_nr].private;
                 buf->ops = spd->ops;
+ +              buf->flags = 0;
   
                 pipe->nrbufs++;
                 page_nr++;
@@@ -247,11 -244,6 +247,6 @@@ ssize_t add_to_pipe(struct pipe_inode_i
   }
   EXPORT_SYMBOL(add_to_pipe);
   
- void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
- {
-       put_page(spd->pages[i]);
- }
- 
   /*
    * Check if we need to grow the arrays holding pages and partial page
    * descriptions.
@@@ -309,7 -301,7 +304,7 @@@ ssize_t generic_file_splice_read(struc
         idx = to.idx;
         init_sync_kiocb(&kiocb, in);
         kiocb.ki_pos = *ppos;
- -      ret = in->f_op->read_iter(&kiocb, &to);
+ +      ret = call_read_iter(in, &kiocb, &to);
         if (ret > 0) {
                 *ppos = kiocb.ki_pos;
                 file_accessed(in);
@@@ -393,7 -385,7 +388,7 @@@ static ssize_t default_file_splice_read
         struct iov_iter to;
         struct page **pages;
         unsigned int nr_pages;
-       size_t offset, dummy, copied = 0;
+       size_t offset, base, copied = 0;
         ssize_t res;
         int i;
   
@@@ -408,12 -400,11 +403,11 @@@
   
         iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
   
-       res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy);
+       res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
         if (res <= 0)
                 return -ENOMEM;
   
-       BUG_ON(dummy);
-       nr_pages = DIV_ROUND_UP(res, PAGE_SIZE);
+       nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
   
         vec = __vec;
         if (nr_pages > PIPE_DEF_BUFFERS) {
@@@ -1359,6 -1350,8 +1353,8 @@@ SYSCALL_DEFINE4(vmsplice, int, fd, cons
         struct fd f;
         long error;
   
+       if (unlikely(flags & ~SPLICE_F_ALL))
+               return -EINVAL;
         if (unlikely(nr_segs > UIO_MAXIOV))
                 return -EINVAL;
         else if (unlikely(!nr_segs))
@@@ -1409,6 -1402,9 +1405,9 @@@ SYSCALL_DEFINE6(splice, int, fd_in, lof
         if (unlikely(!len))
                 return 0;
   
+       if (unlikely(flags & ~SPLICE_F_ALL))
+               return -EINVAL;
+ 
         error = -EBADF;
         in = fdget(fd_in);
         if (in.file) {
@@@ -1737,6 -1733,9 +1736,9 @@@ SYSCALL_DEFINE4(tee, int, fdin, int, fd
         struct fd in;
         int error;
   
+       if (unlikely(flags & ~SPLICE_F_ALL))
+               return -EINVAL;
+ 
         if (unlikely(!len))
                 return 0;
   
diff --combined kernel/relay.c

index 0e413d9eec8af484517300e1c142325356865771,9b48284eac56b0e033fea99046925e87c9ef4c67..39a9dfc69486b57a85d115dcefee75d35907a74d
--- 1/kernel/relay.c
--- 2/kernel/relay.c
+++ b/kernel/relay.c
@@@ -39,10 -39,10 +39,10 @@@ static void relay_file_mmap_close(struc
   /*
    * fault() vm_op implementation for relay file mapping.
    */
- -static int relay_buf_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ +static int relay_buf_fault(struct vm_fault *vmf)
   {
         struct page *page;
- -      struct rchan_buf *buf = vma->vm_private_data;
+ +      struct rchan_buf *buf = vmf->vma->vm_private_data;
         pgoff_t pgoff = vmf->pgoff;
   
         if (!buf)
@@@ -847,7 -847,7 +847,7 @@@ void relay_close(struct rchan *chan
   
         if (chan->last_toobig)
                 printk(KERN_WARNING "relay: one or more items not logged "
- -                     "[item size (%Zd) > sub-buffer size (%Zd)]\n",
+ +                     "[item size (%zd) > sub-buffer size (%zd)]\n",
                        chan->last_toobig, chan->subbuf_size);
   
         list_del(&chan->list);
@@@ -1212,7 -1212,6 +1212,6 @@@ static ssize_t subbuf_splice_actor(stru
                 .nr_pages = 0,
                 .nr_pages_max = PIPE_DEF_BUFFERS,
                 .partial = partial,
-               .flags = flags,
                 .ops = &relay_pipe_buf_ops,
                 .spd_release = relay_page_release,
         };
diff --combined kernel/trace/trace.c

index b253d59b9c518a4c71ad03d2e4940e782dd4e8a4,77c2d9bcb40f9d75d85a4caa7923906260216fa9..0029fe62b2450361923ebb93346206cded759a09
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -260,8 -260,16 +260,8 @@@ unsigned long long ns2usecs(u64 nsec
         TRACE_ITER_EVENT_FORK
   
   /*
- - * The global_trace is the descriptor that holds the tracing
- - * buffers for the live tracing. For each CPU, it contains
- - * a link list of pages that will store trace entries. The
- - * page descriptor of the pages in the memory is used to hold
- - * the link list by linking the lru item in the page descriptor
- - * to each of the pages in the buffer per CPU.
- - *
- - * For each active CPU there is a data field that holds the
- - * pages for the buffer for that CPU. Each CPU has the same number
- - * of pages allocated for its buffer.
+ + * The global_trace is the descriptor that holds the top-level tracing
+ + * buffers for the live tracing.
    */
   static struct trace_array global_trace = {
         .trace_flags = TRACE_DEFAULT_FLAGS,
@@@ -1185,7 -1193,6 +1185,7 @@@ int trace_parser_get_init(struct trace_
   void trace_parser_put(struct trace_parser *parser)
   {
         kfree(parser->buffer);
+ +      parser->buffer = NULL;
   }
   
   /*
@@@ -4341,23 -4348,22 +4341,23 @@@ static const char readme_msg[] 
         "\t\t\t  traces\n"
   #endif
   #endif /* CONFIG_STACK_TRACER */
- -#ifdef CONFIG_KPROBE_EVENT
+ +#ifdef CONFIG_KPROBE_EVENTS
         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
         "\t\t\t  Write into this file to define/undefine new trace events.\n"
   #endif
- -#ifdef CONFIG_UPROBE_EVENT
+ +#ifdef CONFIG_UPROBE_EVENTS
         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
         "\t\t\t  Write into this file to define/undefine new trace events.\n"
   #endif
- -#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+ +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
         "\t  accepts: event-definitions (one definition per line)\n"
         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
         "\t           -:[<group>/]<event>\n"
- -#ifdef CONFIG_KPROBE_EVENT
+ +#ifdef CONFIG_KPROBE_EVENTS
         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
+ +  "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
   #endif
- -#ifdef CONFIG_UPROBE_EVENT
+ +#ifdef CONFIG_UPROBE_EVENTS
         "\t    place: <path>:<offset>\n"
   #endif
         "\t     args: <name>=fetcharg[:type]\n"
@@@ -5530,7 -5536,6 +5530,6 @@@ static ssize_t tracing_splice_read_pipe
                 .partial        = partial_def,
                 .nr_pages       = 0, /* This gets updated below. */
                 .nr_pages_max   = PIPE_DEF_BUFFERS,
-               .flags          = flags,
                 .ops            = &tracing_pipe_buf_ops,
                 .spd_release    = tracing_spd_release_pipe,
         };
@@@ -6428,7 -6433,6 +6427,6 @@@ tracing_buffers_splice_read(struct fil
                 .pages          = pages_def,
                 .partial        = partial_def,
                 .nr_pages_max   = PIPE_DEF_BUFFERS,
-               .flags          = flags,
                 .ops            = &buffer_pipe_buf_ops,
                 .spd_release    = buffer_spd_release,
         };
@@@ -6734,13 -6738,11 +6732,13 @@@ ftrace_trace_snapshot_callback(struct f
                 return ret;
   
    out_reg:
- -      ret = register_ftrace_function_probe(glob, ops, count);
+ +      ret = alloc_snapshot(&global_trace);
+ +      if (ret < 0)
+ +              goto out;
   
- -      if (ret >= 0)
- -              alloc_snapshot(&global_trace);
+ +      ret = register_ftrace_function_probe(glob, ops, count);
   
+ + out:
         return ret < 0 ? ret : 0;
   }
   
@@@ -7405,7 -7407,6 +7403,7 @@@ static int instance_rmdir(const char *n
   
         tracing_set_nop(tr);
         event_trace_del_tracer(tr);
+ +      ftrace_clear_pids(tr);
         ftrace_destroy_function_files(tr);
         tracefs_remove_recursive(tr->dir);
         free_trace_buffers(tr);
@@@ -7500,7 -7501,7 +7498,7 @@@ init_tracer_tracefs(struct trace_array 
         ftrace_init_tracefs(tr, d_tracer);
   }
   
- -static struct vfsmount *trace_automount(void *ingore)
+ +static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
   {
         struct vfsmount *mnt;
         struct file_system_type *type;
@@@ -7513,7 -7514,7 +7511,7 @@@
         type = get_fs_type("tracefs");
         if (!type)
                 return NULL;
- -      mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
+ +      mnt = vfs_submount(mntpt, type, "tracefs", NULL);
         put_filesystem(type);
         if (IS_ERR(mnt))
                 return NULL;
diff --combined net/core/skbuff.c

index f1d04592ace02f32efa6e05df89c9a5e0023157f,0835ac93a4b1a5eee99b228db896af0676a19300..b782b4593f8cbc005ebb8d8befc148f96cc325f5
--- 1/net/core/skbuff.c
--- 2/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@@ -271,6 -271,7 +271,6 @@@ struct sk_buff *__alloc_skb(unsigned in
                 atomic_set(&fclones->fclone_ref, 1);
   
                 fclones->skb2.fclone = SKB_FCLONE_CLONE;
- -              fclones->skb2.pfmemalloc = pfmemalloc;
         }
   out:
         return skb;
@@@ -368,7 -369,7 +368,7 @@@ static void *__netdev_alloc_frag(unsign
   
         local_irq_save(flags);
         nc = this_cpu_ptr(&netdev_alloc_cache);
- -      data = __alloc_page_frag(nc, fragsz, gfp_mask);
+ +      data = page_frag_alloc(nc, fragsz, gfp_mask);
         local_irq_restore(flags);
         return data;
   }
@@@ -390,7 -391,7 +390,7 @@@ static void *__napi_alloc_frag(unsigne
   {
         struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
   
- -      return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
+ +      return page_frag_alloc(&nc->page, fragsz, gfp_mask);
   }
   
   void *napi_alloc_frag(unsigned int fragsz)
@@@ -440,7 -441,7 +440,7 @@@ struct sk_buff *__netdev_alloc_skb(stru
         local_irq_save(flags);
   
         nc = this_cpu_ptr(&netdev_alloc_cache);
- -      data = __alloc_page_frag(nc, len, gfp_mask);
+ +      data = page_frag_alloc(nc, len, gfp_mask);
         pfmemalloc = nc->pfmemalloc;
   
         local_irq_restore(flags);
@@@ -504,7 -505,7 +504,7 @@@ struct sk_buff *__napi_alloc_skb(struc
         if (sk_memalloc_socks())
                 gfp_mask |= __GFP_MEMALLOC;
   
- -      data = __alloc_page_frag(&nc->page, len, gfp_mask);
+ +      data = page_frag_alloc(&nc->page, len, gfp_mask);
         if (unlikely(!data))
                 return NULL;
   
@@@ -654,7 -655,7 +654,7 @@@ static void skb_release_head_state(stru
                 skb->destructor(skb);
         }
   #if IS_ENABLED(CONFIG_NF_CONNTRACK)
- -      nf_conntrack_put(skb->nfct);
+ +      nf_conntrack_put(skb_nfct(skb));
   #endif
   #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         nf_bridge_put(skb->nf_bridge);
@@@ -877,6 -878,9 +877,6 @@@ static void __copy_skb_header(struct sk
   #endif
   #ifdef CONFIG_NET_SCHED
         CHECK_SKB_FIELD(tc_index);
- -#ifdef CONFIG_NET_CLS_ACT
- -      CHECK_SKB_FIELD(tc_verd);
- -#endif
   #endif
   
   }
@@@ -1191,10 -1195,10 +1191,10 @@@ EXPORT_SYMBOL(__pskb_copy_fclone)
   int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
                      gfp_t gfp_mask)
   {
- -      int i;
- -      u8 *data;
- -      int size = nhead + skb_end_offset(skb) + ntail;
+ +      int i, osize = skb_end_offset(skb);
+ +      int size = osize + nhead + ntail;
         long off;
+ +      u8 *data;
   
         BUG_ON(nhead < 0);
   
@@@ -1256,14 -1260,6 +1256,14 @@@
         skb->hdr_len  = 0;
         skb->nohdr    = 0;
         atomic_set(&skb_shinfo(skb)->dataref, 1);
+ +
+ +      /* It is not generally safe to change skb->truesize.
+ +       * For the moment, we really care of rx path, or
+ +       * when skb is orphaned (not attached to a socket).
+ +       */
+ +      if (!skb->sk || skb->destructor == sock_edemux)
+ +              skb->truesize += size - osize;
+ +
         return 0;
   
   nofrags:
@@@ -1576,8 -1572,6 +1576,8 @@@ done
                 skb_set_tail_pointer(skb, len);
         }
   
+ +      if (!skb->sk || skb->destructor == sock_edemux)
+ +              skb_condense(skb);
         return 0;
   }
   EXPORT_SYMBOL(___pskb_trim);
@@@ -1982,7 -1976,6 +1982,6 @@@ int skb_splice_bits(struct sk_buff *skb
                 .pages = pages,
                 .partial = partial,
                 .nr_pages_max = MAX_SKB_FRAGS,
-               .flags = flags,
                 .ops = &nosteal_pipe_buf_ops,
                 .spd_release = sock_spd_release,
         };
@@@ -3084,32 -3077,22 +3083,32 @@@ struct sk_buff *skb_segment(struct sk_b
         if (sg && csum && (mss != GSO_BY_FRAGS))  {
                 if (!(features & NETIF_F_GSO_PARTIAL)) {
                         struct sk_buff *iter;
+ +                      unsigned int frag_len;
   
                         if (!list_skb ||
                             !net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
                                 goto normal;
   
- -                      /* Split the buffer at the frag_list pointer.
- -                       * This is based on the assumption that all
- -                       * buffers in the chain excluding the last
- -                       * containing the same amount of data.
+ +                      /* If we get here then all the required
+ +                       * GSO features except frag_list are supported.
+ +                       * Try to split the SKB to multiple GSO SKBs
+ +                       * with no frag_list.
+ +                       * Currently we can do that only when the buffers don't
+ +                       * have a linear part and all the buffers except
+ +                       * the last are of the same length.
                          */
+ +                      frag_len = list_skb->len;
                         skb_walk_frags(head_skb, iter) {
+ +                              if (frag_len != iter->len && iter->next)
+ +                                      goto normal;
                                 if (skb_headlen(iter))
                                         goto normal;
   
                                 len -= iter->len;
                         }
+ +
+ +                      if (len != frag_len)
+ +                              goto normal;
                 }
   
                 /* GSO partial only requires that we trim off any excess that
@@@ -3706,15 -3689,6 +3705,15 @@@ static void sock_rmem_free(struct sk_bu
         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
   }
   
+ +static void skb_set_err_queue(struct sk_buff *skb)
+ +{
+ +      /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
+ +       * So, it is safe to (mis)use it to mark skbs on the error queue.
+ +       */
+ +      skb->pkt_type = PACKET_OUTGOING;
+ +      BUILD_BUG_ON(PACKET_OUTGOING == 0);
+ +}
+ +
   /*
    * Note: We dont mem charge error packets (no sk_forward_alloc changes)
    */
@@@ -3728,7 -3702,6 +3727,7 @@@ int sock_queue_err_skb(struct sock *sk
         skb->sk = sk;
         skb->destructor = sock_rmem_free;
         atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ +      skb_set_err_queue(skb);
   
         /* before exiting rcu section, make sure dst is refcounted */
         skb_dst_force(skb);
@@@ -3805,21 -3778,16 +3804,21 @@@ EXPORT_SYMBOL(skb_clone_sk)
   
   static void __skb_complete_tx_timestamp(struct sk_buff *skb,
                                         struct sock *sk,
- -                                      int tstype)
+ +                                      int tstype,
+ +                                      bool opt_stats)
   {
         struct sock_exterr_skb *serr;
         int err;
   
+ +      BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+ +
         serr = SKB_EXT_ERR(skb);
         memset(serr, 0, sizeof(*serr));
         serr->ee.ee_errno = ENOMSG;
         serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
         serr->ee.ee_info = tstype;
+ +      serr->opt_stats = opt_stats;
+ +      serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
         if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
                 serr->ee.ee_data = skb_shinfo(skb)->tskey;
                 if (sk->sk_protocol == IPPROTO_TCP &&
@@@ -3855,14 -3823,13 +3854,14 @@@ void skb_complete_tx_timestamp(struct s
         if (!skb_may_tx_timestamp(sk, false))
                 return;
   
- -      /* take a reference to prevent skb_orphan() from freeing the socket */
- -      sock_hold(sk);
- -
- -      *skb_hwtstamps(skb) = *hwtstamps;
- -      __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
- -
- -      sock_put(sk);
+ +      /* Take a reference to prevent skb_orphan() from freeing the socket,
+ +       * but only if the socket refcount is not zero.
+ +       */
+ +      if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ +              *skb_hwtstamps(skb) = *hwtstamps;
+ +              __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
+ +              sock_put(sk);
+ +      }
   }
   EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
   
@@@ -3871,7 -3838,7 +3870,7 @@@ void __skb_tstamp_tx(struct sk_buff *or
                      struct sock *sk, int tstype)
   {
         struct sk_buff *skb;
- -      bool tsonly;
+ +      bool tsonly, opt_stats = false;
   
         if (!sk)
                 return;
@@@ -3884,10 -3851,9 +3883,10 @@@
   #ifdef CONFIG_INET
                 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
                     sk->sk_protocol == IPPROTO_TCP &&
- -                  sk->sk_type == SOCK_STREAM)
+ +                  sk->sk_type == SOCK_STREAM) {
                         skb = tcp_get_timestamping_opt_stats(sk);
- -              else
+ +                      opt_stats = true;
+ +              } else
   #endif
                         skb = alloc_skb(0, GFP_ATOMIC);
         } else {
@@@ -3906,7 -3872,7 +3905,7 @@@
         else
                 skb->tstamp = ktime_get_real();
   
- -      __skb_complete_tx_timestamp(skb, sk, tstype);
+ +      __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
   }
   EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
   
@@@ -3922,7 -3888,7 +3921,7 @@@ void skb_complete_wifi_ack(struct sk_bu
   {
         struct sock *sk = skb->sk;
         struct sock_exterr_skb *serr;
- -      int err;
+ +      int err = 1;
   
         skb->wifi_acked_valid = 1;
         skb->wifi_acked = acked;
@@@ -3932,15 -3898,14 +3931,15 @@@
         serr->ee.ee_errno = ENOMSG;
         serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
   
- -      /* take a reference to prevent skb_orphan() from freeing the socket */
- -      sock_hold(sk);
- -
- -      err = sock_queue_err_skb(sk, skb);
+ +      /* Take a reference to prevent skb_orphan() from freeing the socket,
+ +       * but only if the socket refcount is not zero.
+ +       */
+ +      if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ +              err = sock_queue_err_skb(sk, skb);
+ +              sock_put(sk);
+ +      }
         if (err)
                 kfree_skb(skb);
- -
- -      sock_put(sk);
   }
   EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 2 May 2017 18:38:06 +0000 (11:38 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 2 May 2017 18:38:06 +0000 (11:38 -0700)
		1	2
fs/splice.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/relay.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/skbuff.c	patch \|	diff1 \|	diff2 \|	blob \| history