#include <linux/gfp.h>
#include <linux/socket.h>
#include <linux/compat.h>
+#include <linux/sched/signal.h>
+
#include "internal.h"
/*
buf->len = spd->partial[page_nr].len;
buf->private = spd->partial[page_nr].private;
buf->ops = spd->ops;
+ buf->flags = 0;
pipe->nrbufs++;
page_nr++;
}
EXPORT_SYMBOL(add_to_pipe);
- void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
- {
- put_page(spd->pages[i]);
- }
-
/*
* Check if we need to grow the arrays holding pages and partial page
* descriptions.
idx = to.idx;
init_sync_kiocb(&kiocb, in);
kiocb.ki_pos = *ppos;
- ret = in->f_op->read_iter(&kiocb, &to);
+ ret = call_read_iter(in, &kiocb, &to);
if (ret > 0) {
*ppos = kiocb.ki_pos;
file_accessed(in);
struct iov_iter to;
struct page **pages;
unsigned int nr_pages;
- size_t offset, dummy, copied = 0;
+ size_t offset, base, copied = 0;
ssize_t res;
int i;
iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
- res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy);
+ res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
if (res <= 0)
return -ENOMEM;
- BUG_ON(dummy);
- nr_pages = DIV_ROUND_UP(res, PAGE_SIZE);
+ nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
vec = __vec;
if (nr_pages > PIPE_DEF_BUFFERS) {
struct fd f;
long error;
+ if (unlikely(flags & ~SPLICE_F_ALL))
+ return -EINVAL;
if (unlikely(nr_segs > UIO_MAXIOV))
return -EINVAL;
else if (unlikely(!nr_segs))
if (unlikely(!len))
return 0;
+ if (unlikely(flags & ~SPLICE_F_ALL))
+ return -EINVAL;
+
error = -EBADF;
in = fdget(fd_in);
if (in.file) {
struct fd in;
int error;
+ if (unlikely(flags & ~SPLICE_F_ALL))
+ return -EINVAL;
+
if (unlikely(!len))
return 0;
TRACE_ITER_EVENT_FORK
/*
- * The global_trace is the descriptor that holds the tracing
- * buffers for the live tracing. For each CPU, it contains
- * a link list of pages that will store trace entries. The
- * page descriptor of the pages in the memory is used to hold
- * the link list by linking the lru item in the page descriptor
- * to each of the pages in the buffer per CPU.
- *
- * For each active CPU there is a data field that holds the
- * pages for the buffer for that CPU. Each CPU has the same number
- * of pages allocated for its buffer.
+ * The global_trace is the descriptor that holds the top-level tracing
+ * buffers for the live tracing.
*/
static struct trace_array global_trace = {
.trace_flags = TRACE_DEFAULT_FLAGS,
void trace_parser_put(struct trace_parser *parser)
{
kfree(parser->buffer);
+ parser->buffer = NULL;
}
/*
"\t\t\t traces\n"
#endif
#endif /* CONFIG_STACK_TRACER */
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
" kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
#endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
" uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
#endif
-#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
"\t accepts: event-definitions (one definition per line)\n"
"\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
"\t -:[<group>/]<event>\n"
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
+ "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
#endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
"\t place: <path>:<offset>\n"
#endif
"\t args: <name>=fetcharg[:type]\n"
.partial = partial_def,
.nr_pages = 0, /* This gets updated below. */
.nr_pages_max = PIPE_DEF_BUFFERS,
- .flags = flags,
.ops = &tracing_pipe_buf_ops,
.spd_release = tracing_spd_release_pipe,
};
.pages = pages_def,
.partial = partial_def,
.nr_pages_max = PIPE_DEF_BUFFERS,
- .flags = flags,
.ops = &buffer_pipe_buf_ops,
.spd_release = buffer_spd_release,
};
return ret;
out_reg:
- ret = register_ftrace_function_probe(glob, ops, count);
+ ret = alloc_snapshot(&global_trace);
+ if (ret < 0)
+ goto out;
- if (ret >= 0)
- alloc_snapshot(&global_trace);
+ ret = register_ftrace_function_probe(glob, ops, count);
+ out:
return ret < 0 ? ret : 0;
}
tracing_set_nop(tr);
event_trace_del_tracer(tr);
+ ftrace_clear_pids(tr);
ftrace_destroy_function_files(tr);
tracefs_remove_recursive(tr->dir);
free_trace_buffers(tr);
ftrace_init_tracefs(tr, d_tracer);
}
-static struct vfsmount *trace_automount(void *ingore)
+static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
{
struct vfsmount *mnt;
struct file_system_type *type;
type = get_fs_type("tracefs");
if (!type)
return NULL;
- mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
+ mnt = vfs_submount(mntpt, type, "tracefs", NULL);
put_filesystem(type);
if (IS_ERR(mnt))
return NULL;
atomic_set(&fclones->fclone_ref, 1);
fclones->skb2.fclone = SKB_FCLONE_CLONE;
- fclones->skb2.pfmemalloc = pfmemalloc;
}
out:
return skb;
local_irq_save(flags);
nc = this_cpu_ptr(&netdev_alloc_cache);
- data = __alloc_page_frag(nc, fragsz, gfp_mask);
+ data = page_frag_alloc(nc, fragsz, gfp_mask);
local_irq_restore(flags);
return data;
}
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
+ return page_frag_alloc(&nc->page, fragsz, gfp_mask);
}
void *napi_alloc_frag(unsigned int fragsz)
local_irq_save(flags);
nc = this_cpu_ptr(&netdev_alloc_cache);
- data = __alloc_page_frag(nc, len, gfp_mask);
+ data = page_frag_alloc(nc, len, gfp_mask);
pfmemalloc = nc->pfmemalloc;
local_irq_restore(flags);
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
- data = __alloc_page_frag(&nc->page, len, gfp_mask);
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
if (unlikely(!data))
return NULL;
skb->destructor(skb);
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- nf_conntrack_put(skb->nfct);
+ nf_conntrack_put(skb_nfct(skb));
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
#endif
#ifdef CONFIG_NET_SCHED
CHECK_SKB_FIELD(tc_index);
-#ifdef CONFIG_NET_CLS_ACT
- CHECK_SKB_FIELD(tc_verd);
-#endif
#endif
}
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
gfp_t gfp_mask)
{
- int i;
- u8 *data;
- int size = nhead + skb_end_offset(skb) + ntail;
+ int i, osize = skb_end_offset(skb);
+ int size = osize + nhead + ntail;
long off;
+ u8 *data;
BUG_ON(nhead < 0);
skb->hdr_len = 0;
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
+
+ /* It is not generally safe to change skb->truesize.
+ * For the moment, we really care of rx path, or
+ * when skb is orphaned (not attached to a socket).
+ */
+ if (!skb->sk || skb->destructor == sock_edemux)
+ skb->truesize += size - osize;
+
return 0;
nofrags:
skb_set_tail_pointer(skb, len);
}
+ if (!skb->sk || skb->destructor == sock_edemux)
+ skb_condense(skb);
return 0;
}
EXPORT_SYMBOL(___pskb_trim);
.pages = pages,
.partial = partial,
.nr_pages_max = MAX_SKB_FRAGS,
- .flags = flags,
.ops = &nosteal_pipe_buf_ops,
.spd_release = sock_spd_release,
};
if (sg && csum && (mss != GSO_BY_FRAGS)) {
if (!(features & NETIF_F_GSO_PARTIAL)) {
struct sk_buff *iter;
+ unsigned int frag_len;
if (!list_skb ||
!net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
goto normal;
- /* Split the buffer at the frag_list pointer.
- * This is based on the assumption that all
- * buffers in the chain excluding the last
- * containing the same amount of data.
+ /* If we get here then all the required
+ * GSO features except frag_list are supported.
+ * Try to split the SKB to multiple GSO SKBs
+ * with no frag_list.
+ * Currently we can do that only when the buffers don't
+ * have a linear part and all the buffers except
+ * the last are of the same length.
*/
+ frag_len = list_skb->len;
skb_walk_frags(head_skb, iter) {
+ if (frag_len != iter->len && iter->next)
+ goto normal;
if (skb_headlen(iter))
goto normal;
len -= iter->len;
}
+
+ if (len != frag_len)
+ goto normal;
}
/* GSO partial only requires that we trim off any excess that
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
}
+static void skb_set_err_queue(struct sk_buff *skb)
+{
+ /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
+ * So, it is safe to (mis)use it to mark skbs on the error queue.
+ */
+ skb->pkt_type = PACKET_OUTGOING;
+ BUILD_BUG_ON(PACKET_OUTGOING == 0);
+}
+
/*
* Note: We dont mem charge error packets (no sk_forward_alloc changes)
*/
skb->sk = sk;
skb->destructor = sock_rmem_free;
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ skb_set_err_queue(skb);
/* before exiting rcu section, make sure dst is refcounted */
skb_dst_force(skb);
static void __skb_complete_tx_timestamp(struct sk_buff *skb,
struct sock *sk,
- int tstype)
+ int tstype,
+ bool opt_stats)
{
struct sock_exterr_skb *serr;
int err;
+ BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = tstype;
+ serr->opt_stats = opt_stats;
+ serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk->sk_protocol == IPPROTO_TCP &&
if (!skb_may_tx_timestamp(sk, false))
return;
- /* take a reference to prevent skb_orphan() from freeing the socket */
- sock_hold(sk);
-
- *skb_hwtstamps(skb) = *hwtstamps;
- __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-
- sock_put(sk);
+ /* Take a reference to prevent skb_orphan() from freeing the socket,
+ * but only if the socket refcount is not zero.
+ */
+ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ *skb_hwtstamps(skb) = *hwtstamps;
+ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
+ sock_put(sk);
+ }
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
struct sock *sk, int tstype)
{
struct sk_buff *skb;
- bool tsonly;
+ bool tsonly, opt_stats = false;
if (!sk)
return;
#ifdef CONFIG_INET
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk->sk_protocol == IPPROTO_TCP &&
- sk->sk_type == SOCK_STREAM)
+ sk->sk_type == SOCK_STREAM) {
skb = tcp_get_timestamping_opt_stats(sk);
- else
+ opt_stats = true;
+ } else
#endif
skb = alloc_skb(0, GFP_ATOMIC);
} else {
else
skb->tstamp = ktime_get_real();
- __skb_complete_tx_timestamp(skb, sk, tstype);
+ __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
}
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
{
struct sock *sk = skb->sk;
struct sock_exterr_skb *serr;
- int err;
+ int err = 1;
skb->wifi_acked_valid = 1;
skb->wifi_acked = acked;
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
- /* take a reference to prevent skb_orphan() from freeing the socket */
- sock_hold(sk);
-
- err = sock_queue_err_skb(sk, skb);
+ /* Take a reference to prevent skb_orphan() from freeing the socket,
+ * but only if the socket refcount is not zero.
+ */
+ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ err = sock_queue_err_skb(sk, skb);
+ sock_put(sk);
+ }
if (err)
kfree_skb(skb);
-
- sock_put(sk);
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);