]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - net/core/dev.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
[karo-tx-linux.git] / net / core / dev.c
index 904ff431d570e3cfc0e6255480bd25f3c9dec2f3..4ce07dc25573ed3d20f181f5b36327cb0f407fe3 100644 (file)
@@ -94,6 +94,7 @@
 #include <linux/ethtool.h>
 #include <linux/notifier.h>
 #include <linux/skbuff.h>
+#include <linux/bpf.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/busy_poll.h>
 #include <linux/hrtimer.h>
 #include <linux/netfilter_ingress.h>
 #include <linux/sctp.h>
+#include <linux/crash_dump.h>
 
 #include "net-sysfs.h"
 
@@ -196,7 +198,7 @@ static inline void dev_base_seq_inc(struct net *net)
 
 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 {
-       unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+       unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
 
        return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
 }
@@ -2249,11 +2251,12 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues);
  */
 int netif_get_num_default_rss_queues(void)
 {
-       return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+       return is_kdump_kernel() ?
+               1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
 }
 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
 
-static inline void __netif_reschedule(struct Qdisc *q)
+static void __netif_reschedule(struct Qdisc *q)
 {
        struct softnet_data *sd;
        unsigned long flags;
@@ -2420,7 +2423,7 @@ EXPORT_SYMBOL(__skb_tx_hash);
 
 static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
-       static const netdev_features_t null_features = 0;
+       static const netdev_features_t null_features;
        struct net_device *dev = skb->dev;
        const char *name = "";
 
@@ -3068,6 +3071,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                                 struct netdev_queue *txq)
 {
        spinlock_t *root_lock = qdisc_lock(q);
+       struct sk_buff *to_free = NULL;
        bool contended;
        int rc;
 
@@ -3075,7 +3079,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
        /*
         * Heuristic to force contended enqueues to serialize on a
         * separate lock before trying to get qdisc main lock.
-        * This permits __QDISC___STATE_RUNNING owner to get the lock more
+        * This permits qdisc->running owner to get the lock more
         * often and dequeue packets faster.
         */
        contended = qdisc_is_running(q);
@@ -3084,7 +3088,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
        spin_lock(root_lock);
        if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-               kfree_skb(skb);
+               __qdisc_drop(skb, &to_free);
                rc = NET_XMIT_DROP;
        } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
                   qdisc_run_begin(q)) {
@@ -3107,7 +3111,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
                rc = NET_XMIT_SUCCESS;
        } else {
-               rc = q->enqueue(skb, q) & NET_XMIT_MASK;
+               rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
                if (qdisc_run_begin(q)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
@@ -3117,6 +3121,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                }
        }
        spin_unlock(root_lock);
+       if (unlikely(to_free))
+               kfree_skb_list(to_free);
        if (unlikely(contended))
                spin_unlock(&q->busylock);
        return rc;
@@ -3142,8 +3148,6 @@ static void skb_update_prio(struct sk_buff *skb)
 DEFINE_PER_CPU(int, xmit_recursion);
 EXPORT_SYMBOL(xmit_recursion);
 
-#define RECURSION_LIMIT 10
-
 /**
  *     dev_loopback_xmit - loop back @skb
  *     @net: network namespace this loopback is happening in
@@ -3386,8 +3390,8 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
                int cpu = smp_processor_id(); /* ok because BHs are off */
 
                if (txq->xmit_lock_owner != cpu) {
-
-                       if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+                       if (unlikely(__this_cpu_read(xmit_recursion) >
+                                    XMIT_RECURSION_LIMIT))
                                goto recursion_alert;
 
                        skb = validate_xmit_skb(skb, dev);
@@ -3898,22 +3902,14 @@ static void net_tx_action(struct softirq_action *h)
                        head = head->next_sched;
 
                        root_lock = qdisc_lock(q);
-                       if (spin_trylock(root_lock)) {
-                               smp_mb__before_atomic();
-                               clear_bit(__QDISC_STATE_SCHED,
-                                         &q->state);
-                               qdisc_run(q);
-                               spin_unlock(root_lock);
-                       } else {
-                               if (!test_bit(__QDISC_STATE_DEACTIVATED,
-                                             &q->state)) {
-                                       __netif_reschedule(q);
-                               } else {
-                                       smp_mb__before_atomic();
-                                       clear_bit(__QDISC_STATE_SCHED,
-                                                 &q->state);
-                               }
-                       }
+                       spin_lock(root_lock);
+                       /* We need to make sure head->next_sched is read
+                        * before clearing __QDISC_STATE_SCHED
+                        */
+                       smp_mb__before_atomic();
+                       clear_bit(__QDISC_STATE_SCHED, &q->state);
+                       qdisc_run(q);
+                       spin_unlock(root_lock);
                }
        }
 }
@@ -4977,7 +4973,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
 
                        if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
                                rc = napi->poll(napi, BUSY_POLL_BUDGET);
-                               trace_napi_poll(napi);
+                               trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
                                if (rc == BUSY_POLL_BUDGET) {
                                        napi_complete_done(napi, rc);
                                        napi_schedule(napi);
@@ -5133,7 +5129,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
        work = 0;
        if (test_bit(NAPI_STATE_SCHED, &n->state)) {
                work = n->poll(n, weight);
-               trace_napi_poll(n);
+               trace_napi_poll(n, work, weight);
        }
 
        WARN_ON_ONCE(work > weight);
@@ -5449,6 +5445,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
 }
 EXPORT_SYMBOL(netdev_lower_get_next);
 
+/**
+ * netdev_all_lower_get_next - Get the next device from all lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour all lower
+ * list will remain unchanged.
+ */
+struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+       struct netdev_adjacent *lower;
+
+       lower = list_entry(*iter, struct netdev_adjacent, list);
+
+       if (&lower->list == &dev->all_adj_list.lower)
+               return NULL;
+
+       *iter = lower->list.next;
+
+       return lower->dev;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next);
+
+/**
+ * netdev_all_lower_get_next_rcu - Get the next device from all
+ *                                lower neighbour list, RCU variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+                                                struct list_head **iter)
+{
+       struct netdev_adjacent *lower;
+
+       lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
+                                      struct netdev_adjacent, list);
+
+       return lower ? lower->dev : NULL;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
+
 /**
  * netdev_lower_get_first_private_rcu - Get the first ->private from the
  *                                    lower neighbour list, RCU
@@ -5919,7 +5961,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
        struct net *net = dev_net(dev);
 
        list_for_each_entry(iter, &dev->adj_list.upper, list) {
-               if (!net_eq(net,dev_net(iter->dev)))
+               if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_add(iter->dev, dev,
                                          &iter->dev->adj_list.lower);
@@ -5928,7 +5970,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
        }
 
        list_for_each_entry(iter, &dev->adj_list.lower, list) {
-               if (!net_eq(net,dev_net(iter->dev)))
+               if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_add(iter->dev, dev,
                                          &iter->dev->adj_list.upper);
@@ -5944,7 +5986,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
        struct net *net = dev_net(dev);
 
        list_for_each_entry(iter, &dev->adj_list.upper, list) {
-               if (!net_eq(net,dev_net(iter->dev)))
+               if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, dev->name,
                                          &iter->dev->adj_list.lower);
@@ -5953,7 +5995,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
        }
 
        list_for_each_entry(iter, &dev->adj_list.lower, list) {
-               if (!net_eq(net,dev_net(iter->dev)))
+               if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, dev->name,
                                          &iter->dev->adj_list.upper);
@@ -5969,7 +6011,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
        struct net *net = dev_net(dev);
 
        list_for_each_entry(iter, &dev->adj_list.upper, list) {
-               if (!net_eq(net,dev_net(iter->dev)))
+               if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, oldname,
                                          &iter->dev->adj_list.lower);
@@ -5978,7 +6020,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
        }
 
        list_for_each_entry(iter, &dev->adj_list.lower, list) {
-               if (!net_eq(net,dev_net(iter->dev)))
+               if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, oldname,
                                          &iter->dev->adj_list.upper);
@@ -6046,6 +6088,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
 }
 EXPORT_SYMBOL(netdev_lower_state_changed);
 
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+                                          struct neighbour *n)
+{
+       struct net_device *lower_dev, *stop_dev;
+       struct list_head *iter;
+       int err;
+
+       netdev_for_each_lower_dev(dev, lower_dev, iter) {
+               if (!lower_dev->netdev_ops->ndo_neigh_construct)
+                       continue;
+               err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
+               if (err) {
+                       stop_dev = lower_dev;
+                       goto rollback;
+               }
+       }
+       return 0;
+
+rollback:
+       netdev_for_each_lower_dev(dev, lower_dev, iter) {
+               if (lower_dev == stop_dev)
+                       break;
+               if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+                       continue;
+               lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+       }
+       return err;
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
+
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+                                         struct neighbour *n)
+{
+       struct net_device *lower_dev;
+       struct list_head *iter;
+
+       netdev_for_each_lower_dev(dev, lower_dev, iter) {
+               if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+                       continue;
+               lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+       }
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
+
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
        const struct net_device_ops *ops = dev->netdev_ops;
@@ -6529,6 +6615,38 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
+/**
+ *     dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ *     @dev: device
+ *     @fd: new program fd or negative value to clear
+ *
+ *     Set or clear a bpf program for a device
+ */
+int dev_change_xdp_fd(struct net_device *dev, int fd)
+{
+       const struct net_device_ops *ops = dev->netdev_ops;
+       struct bpf_prog *prog = NULL;
+       struct netdev_xdp xdp = {};
+       int err;
+
+       if (!ops->ndo_xdp)
+               return -EOPNOTSUPP;
+       if (fd >= 0) {
+               prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+               if (IS_ERR(prog))
+                       return PTR_ERR(prog);
+       }
+
+       xdp.command = XDP_SETUP_PROG;
+       xdp.prog = prog;
+       err = ops->ndo_xdp(dev, &xdp);
+       if (err < 0 && prog)
+               bpf_prog_put(prog);
+
+       return err;
+}
+EXPORT_SYMBOL(dev_change_xdp_fd);
+
 /**
  *     dev_new_index   -       allocate an ifindex
  *     @net: the applicable net namespace