]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - net/ipv4/ip_gre.c
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[mv-sheeva.git] / net / ipv4 / ip_gre.c
index fc20e687e933ba2557ab36bd1a3c57d8a9374d7f..9d421f4cf3efbf41f52d1bd9d0a43c5b6aa2313d 100644 (file)
@@ -46,7 +46,7 @@
 #include <net/rtnetlink.h>
 #include <net/gre.h>
 
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
    We cannot track such dead loops during route installation,
    it is infeasible task. The most general solutions would be
    to keep skb->encapsulation counter (sort of local ttl),
-   and silently drop packet when it expires. It is the best
+   and silently drop packet when it expires. It is a good
    solution, but it supposes maintaing new variable in ALL
    skb, even if no tunneling is used.
 
-   Current solution: HARD_TX_LOCK lock breaks dead loops.
-
-
+   Current solution: xmit_recursion breaks dead loops. This is a percpu
+   counter, since when we enter the first ndo_xmit(), cpu migration is
+   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
 
    2. Networking dead loops would not kill routers, but would really
    kill network. IP hop limit plays role of "t->recursion" in this case,
@@ -165,6 +165,34 @@ struct ipgre_net {
 #define for_each_ip_tunnel_rcu(start) \
        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+       unsigned long   rx_packets;
+       unsigned long   rx_bytes;
+       unsigned long   tx_packets;
+       unsigned long   tx_bytes;
+};
+
+static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
+{
+       struct pcpu_tstats sum = { 0 };
+       int i;
+
+       for_each_possible_cpu(i) {
+               const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+
+               sum.rx_packets += tstats->rx_packets;
+               sum.rx_bytes   += tstats->rx_bytes;
+               sum.tx_packets += tstats->tx_packets;
+               sum.tx_bytes   += tstats->tx_bytes;
+       }
+       dev->stats.rx_packets = sum.rx_packets;
+       dev->stats.rx_bytes   = sum.rx_bytes;
+       dev->stats.tx_packets = sum.tx_packets;
+       dev->stats.tx_bytes   = sum.tx_bytes;
+       return &dev->stats;
+}
+
 /* Given src, dst and key, find appropriate for input tunnel. */
 
 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
@@ -584,7 +612,7 @@ static int ipgre_rcv(struct sk_buff *skb)
        if ((tunnel = ipgre_tunnel_lookup(skb->dev,
                                          iph->saddr, iph->daddr, key,
                                          gre_proto))) {
-               struct net_device_stats *stats = &tunnel->dev->stats;
+               struct pcpu_tstats *tstats;
 
                secpath_reset(skb);
 
@@ -608,22 +636,22 @@ static int ipgre_rcv(struct sk_buff *skb)
                        /* Looped back packet, drop it! */
                        if (skb_rtable(skb)->fl.iif == 0)
                                goto drop;
-                       stats->multicast++;
+                       tunnel->dev->stats.multicast++;
                        skb->pkt_type = PACKET_BROADCAST;
                }
 #endif
 
                if (((flags&GRE_CSUM) && csum) ||
                    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
-                       stats->rx_crc_errors++;
-                       stats->rx_errors++;
+                       tunnel->dev->stats.rx_crc_errors++;
+                       tunnel->dev->stats.rx_errors++;
                        goto drop;
                }
                if (tunnel->parms.i_flags&GRE_SEQ) {
                        if (!(flags&GRE_SEQ) ||
                            (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
-                               stats->rx_fifo_errors++;
-                               stats->rx_errors++;
+                               tunnel->dev->stats.rx_fifo_errors++;
+                               tunnel->dev->stats.rx_errors++;
                                goto drop;
                        }
                        tunnel->i_seqno = seqno + 1;
@@ -632,8 +660,8 @@ static int ipgre_rcv(struct sk_buff *skb)
                /* Warning: All skb pointers will be invalidated! */
                if (tunnel->dev->type == ARPHRD_ETHER) {
                        if (!pskb_may_pull(skb, ETH_HLEN)) {
-                               stats->rx_length_errors++;
-                               stats->rx_errors++;
+                               tunnel->dev->stats.rx_length_errors++;
+                               tunnel->dev->stats.rx_errors++;
                                goto drop;
                        }
 
@@ -642,14 +670,19 @@ static int ipgre_rcv(struct sk_buff *skb)
                        skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
                }
 
-               skb_tunnel_rx(skb, tunnel->dev);
+               tstats = this_cpu_ptr(tunnel->dev->tstats);
+               tstats->rx_packets++;
+               tstats->rx_bytes += skb->len;
+
+               __skb_tunnel_rx(skb, tunnel->dev);
 
                skb_reset_network_header(skb);
                ipgre_ecn_decapsulate(iph, skb);
 
                netif_rx(skb);
+
                rcu_read_unlock();
-               return(0);
+               return 0;
        }
        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 
@@ -657,14 +690,13 @@ drop:
        rcu_read_unlock();
 drop_nolock:
        kfree_skb(skb);
-       return(0);
+       return 0;
 }
 
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct ip_tunnel *tunnel = netdev_priv(dev);
-       struct net_device_stats *stats = &dev->stats;
-       struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
+       struct pcpu_tstats *tstats;
        struct iphdr  *old_iph = ip_hdr(skb);
        struct iphdr  *tiph;
        u8     tos;
@@ -692,7 +724,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                /* NBMA tunnel */
 
                if (skb_dst(skb) == NULL) {
-                       stats->tx_fifo_errors++;
+                       dev->stats.tx_fifo_errors++;
                        goto tx_error;
                }
 
@@ -701,7 +733,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                        if ((dst = rt->rt_gateway) == 0)
                                goto tx_error_icmp;
                }
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
                else if (skb->protocol == htons(ETH_P_IPV6)) {
                        struct in6_addr *addr6;
                        int addr_type;
@@ -738,14 +770,20 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
        }
 
        {
-               struct flowi fl = { .oif = tunnel->parms.link,
-                                   .nl_u = { .ip4_u =
-                                             { .daddr = dst,
-                                               .saddr = tiph->saddr,
-                                               .tos = RT_TOS(tos) } },
-                                   .proto = IPPROTO_GRE };
+               struct flowi fl = {
+                       .oif = tunnel->parms.link,
+                       .nl_u = {
+                               .ip4_u = {
+                                       .daddr = dst,
+                                       .saddr = tiph->saddr,
+                                       .tos = RT_TOS(tos)
+                               }
+                       },
+                       .proto = IPPROTO_GRE
+               }
+;
                if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
-                       stats->tx_carrier_errors++;
+                       dev->stats.tx_carrier_errors++;
                        goto tx_error;
                }
        }
@@ -753,7 +791,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 
        if (tdev == dev) {
                ip_rt_put(rt);
-               stats->collisions++;
+               dev->stats.collisions++;
                goto tx_error;
        }
 
@@ -776,7 +814,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                        goto tx_error;
                }
        }
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
        else if (skb->protocol == htons(ETH_P_IPV6)) {
                struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 
@@ -816,7 +854,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                        dev->needed_headroom = max_headroom;
                if (!new_skb) {
                        ip_rt_put(rt);
-                       txq->tx_dropped++;
+                       dev->stats.tx_dropped++;
                        dev_kfree_skb(skb);
                        return NETDEV_TX_OK;
                }
@@ -852,7 +890,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
        if ((iph->ttl = tiph->ttl) == 0) {
                if (skb->protocol == htons(ETH_P_IP))
                        iph->ttl = old_iph->ttl;
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
                else if (skb->protocol == htons(ETH_P_IPV6))
                        iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 #endif
@@ -883,15 +921,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
        }
 
        nf_reset(skb);
-
-       IPTUNNEL_XMIT();
+       tstats = this_cpu_ptr(dev->tstats);
+       __IPTUNNEL_XMIT(tstats, &dev->stats);
        return NETDEV_TX_OK;
 
 tx_error_icmp:
        dst_link_failure(skb);
 
 tx_error:
-       stats->tx_errors++;
+       dev->stats.tx_errors++;
        dev_kfree_skb(skb);
        return NETDEV_TX_OK;
 }
@@ -911,13 +949,19 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
        /* Guess output device to choose reasonable mtu and needed_headroom */
 
        if (iph->daddr) {
-               struct flowi fl = { .oif = tunnel->parms.link,
-                                   .nl_u = { .ip4_u =
-                                             { .daddr = iph->daddr,
-                                               .saddr = iph->saddr,
-                                               .tos = RT_TOS(iph->tos) } },
-                                   .proto = IPPROTO_GRE };
+               struct flowi fl = {
+                       .oif = tunnel->parms.link,
+                       .nl_u = {
+                               .ip4_u = {
+                                       .daddr = iph->daddr,
+                                       .saddr = iph->saddr,
+                                       .tos = RT_TOS(iph->tos)
+                               }
+                       },
+                       .proto = IPPROTO_GRE
+               };
                struct rtable *rt;
+
                if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
                        tdev = rt->dst.dev;
                        ip_rt_put(rt);
@@ -1169,13 +1213,19 @@ static int ipgre_open(struct net_device *dev)
        struct ip_tunnel *t = netdev_priv(dev);
 
        if (ipv4_is_multicast(t->parms.iph.daddr)) {
-               struct flowi fl = { .oif = t->parms.link,
-                                   .nl_u = { .ip4_u =
-                                             { .daddr = t->parms.iph.daddr,
-                                               .saddr = t->parms.iph.saddr,
-                                               .tos = RT_TOS(t->parms.iph.tos) } },
-                                   .proto = IPPROTO_GRE };
+               struct flowi fl = {
+                       .oif = t->parms.link,
+                       .nl_u = {
+                               .ip4_u = {
+                                       .daddr = t->parms.iph.daddr,
+                                       .saddr = t->parms.iph.saddr,
+                                       .tos = RT_TOS(t->parms.iph.tos)
+                               }
+                       },
+                       .proto = IPPROTO_GRE
+               };
                struct rtable *rt;
+
                if (ip_route_output_key(dev_net(dev), &rt, &fl))
                        return -EADDRNOTAVAIL;
                dev = rt->dst.dev;
@@ -1215,12 +1265,19 @@ static const struct net_device_ops ipgre_netdev_ops = {
        .ndo_start_xmit         = ipgre_tunnel_xmit,
        .ndo_do_ioctl           = ipgre_tunnel_ioctl,
        .ndo_change_mtu         = ipgre_tunnel_change_mtu,
+       .ndo_get_stats          = ipgre_get_stats,
 };
 
+static void ipgre_dev_free(struct net_device *dev)
+{
+       free_percpu(dev->tstats);
+       free_netdev(dev);
+}
+
 static void ipgre_tunnel_setup(struct net_device *dev)
 {
        dev->netdev_ops         = &ipgre_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->destructor         = ipgre_dev_free;
 
        dev->type               = ARPHRD_IPGRE;
        dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
@@ -1258,6 +1315,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
        } else
                dev->header_ops = &ipgre_header_ops;
 
+       dev->tstats = alloc_percpu(struct pcpu_tstats);
+       if (!dev->tstats)
+               return -ENOMEM;
+
        return 0;
 }
 
@@ -1444,6 +1505,10 @@ static int ipgre_tap_init(struct net_device *dev)
 
        ipgre_tunnel_bind_dev(dev);
 
+       dev->tstats = alloc_percpu(struct pcpu_tstats);
+       if (!dev->tstats)
+               return -ENOMEM;
+
        return 0;
 }
 
@@ -1454,6 +1519,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_change_mtu         = ipgre_tunnel_change_mtu,
+       .ndo_get_stats          = ipgre_get_stats,
 };
 
 static void ipgre_tap_setup(struct net_device *dev)
@@ -1462,7 +1528,7 @@ static void ipgre_tap_setup(struct net_device *dev)
        ether_setup(dev);
 
        dev->netdev_ops         = &ipgre_tap_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->destructor         = ipgre_dev_free;
 
        dev->iflink             = 0;
        dev->features           |= NETIF_F_NETNS_LOCAL;
@@ -1490,6 +1556,10 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nla
        if (!tb[IFLA_MTU])
                dev->mtu = mtu;
 
+       /* Can use a lockless transmit, unless we generate output sequences */
+       if (!(nt->parms.o_flags & GRE_SEQ))
+               dev->features |= NETIF_F_LLTX;
+
        err = register_netdevice(dev);
        if (err)
                goto out;