struct list_head remotes;
u8 eth_addr[ETH_ALEN];
u16 state; /* see ndm_state */
+ __be32 vni;
u8 flags; /* see ndm_flags */
};
send_eth = send_ip = true;
if (type == RTM_GETNEIGH) {
- ndm->ndm_family = AF_INET;
send_ip = !vxlan_addr_any(&rdst->remote_ip);
send_eth = !is_zero_ether_addr(fdb->eth_addr);
+ ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
} else
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_state = fdb->state;
if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
goto nla_put_failure;
+ if ((vxlan->flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
+ nla_put_u32(skb, NDA_SRC_VNI,
+ be32_to_cpu(fdb->vni)))
+ goto nla_put_failure;
if (rdst->remote_ifindex &&
nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
goto nla_put_failure;
return hash_64(value, FDB_HASH_BITS);
}
+static u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
+{
+ /* use 1 byte of OUI and 3 bytes of NIC */
+ u32 key = get_unaligned((u32 *)(addr + 2));
+
+ return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
+}
+
/* Hash chain to use given mac address */
static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
- const u8 *mac)
+ const u8 *mac, __be32 vni)
{
- return &vxlan->fdb_head[eth_hash(mac)];
+ if (vxlan->flags & VXLAN_F_COLLECT_METADATA)
+ return &vxlan->fdb_head[eth_vni_hash(mac, vni)];
+ else
+ return &vxlan->fdb_head[eth_hash(mac)];
}
/* Look up Ethernet address in forwarding table */
static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
- const u8 *mac)
+ const u8 *mac, __be32 vni)
{
- struct hlist_head *head = vxlan_fdb_head(vxlan, mac);
+ struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni);
struct vxlan_fdb *f;
hlist_for_each_entry_rcu(f, head, hlist) {
- if (ether_addr_equal(mac, f->eth_addr))
- return f;
+ if (ether_addr_equal(mac, f->eth_addr)) {
+ if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
+ if (vni == f->vni)
+ return f;
+ } else {
+ return f;
+ }
+ }
}
return NULL;
}
static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
- const u8 *mac)
+ const u8 *mac, __be32 vni)
{
struct vxlan_fdb *f;
- f = __vxlan_find_mac(vxlan, mac);
+ f = __vxlan_find_mac(vxlan, mac, vni);
if (f)
f->used = jiffies;
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags,
- __be16 port, __be32 vni, __u32 ifindex,
- __u8 ndm_flags)
+ __be16 port, __be32 src_vni, __be32 vni,
+ __u32 ifindex, __u8 ndm_flags)
{
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
int notify = 0;
int rc;
- f = __vxlan_find_mac(vxlan, mac);
+ f = __vxlan_find_mac(vxlan, mac, src_vni);
if (f) {
if (flags & NLM_F_EXCL) {
netdev_dbg(vxlan->dev,
f->state = state;
f->flags = ndm_flags;
f->updated = f->used = jiffies;
+ f->vni = src_vni;
INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN);
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
- vxlan_fdb_head(vxlan, mac));
+ vxlan_fdb_head(vxlan, mac, src_vni));
}
if (notify) {
}
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
- union vxlan_addr *ip, __be16 *port, __be32 *vni,
- u32 *ifindex)
+ union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
+ __be32 *vni, u32 *ifindex)
{
struct net *net = dev_net(vxlan->dev);
int err;
*vni = vxlan->default_dst.remote_vni;
}
+ if (tb[NDA_SRC_VNI]) {
+ if (nla_len(tb[NDA_SRC_VNI]) != sizeof(u32))
+ return -EINVAL;
+ *src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI]));
+ } else {
+ *src_vni = vxlan->default_dst.remote_vni;
+ }
+
if (tb[NDA_IFINDEX]) {
struct net_device *tdev;
/* struct net *net = dev_net(vxlan->dev); */
union vxlan_addr ip;
__be16 port;
- __be32 vni;
+ __be32 src_vni, vni;
u32 ifindex;
int err;
if (tb[NDA_DST] == NULL)
return -EINVAL;
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
if (err)
return err;
spin_lock_bh(&vxlan->hash_lock);
err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
- port, vni, ifindex, ndm->ndm_flags);
+ port, src_vni, vni, ifindex, ndm->ndm_flags);
spin_unlock_bh(&vxlan->hash_lock);
return err;
}
-/* Delete entry (via netlink) */
-static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid)
+static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
+ const unsigned char *addr, union vxlan_addr ip,
+ __be16 port, __be32 src_vni, u32 vni, u32 ifindex,
+ u16 vid)
{
- struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
struct vxlan_rdst *rd = NULL;
- union vxlan_addr ip;
- __be16 port;
- __be32 vni;
- u32 ifindex;
- int err;
+ int err = -ENOENT;
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
- if (err)
- return err;
-
- err = -ENOENT;
-
- spin_lock_bh(&vxlan->hash_lock);
- f = vxlan_find_mac(vxlan, addr);
+ f = vxlan_find_mac(vxlan, addr, src_vni);
if (!f)
- goto out;
+ return err;
if (!vxlan_addr_any(&ip)) {
rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex);
goto out;
}
- err = 0;
-
/* remove a destination if it's not the only one on the list,
* otherwise destroy the fdb entry
*/
vxlan_fdb_destroy(vxlan, f);
out:
+ return 0;
+}
+
+/* Delete entry (via netlink) */
+static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ union vxlan_addr ip;
+ __be32 src_vni, vni;
+ __be16 port;
+ u32 ifindex;
+ int err;
+
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
+ if (err)
+ return err;
+
+ spin_lock_bh(&vxlan->hash_lock);
+ err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
+ vid);
spin_unlock_bh(&vxlan->hash_lock);
return err;
* Return true if packet is bogus and should be dropped.
*/
static bool vxlan_snoop(struct net_device *dev,
- union vxlan_addr *src_ip, const u8 *src_mac)
+ union vxlan_addr *src_ip, const u8 *src_mac,
+ __be32 vni)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
- f = vxlan_find_mac(vxlan, src_mac);
+ f = vxlan_find_mac(vxlan, src_mac, vni);
if (likely(f)) {
struct vxlan_rdst *rdst = first_remote_rcu(f);
NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE,
vxlan->cfg.dst_port,
+ vni,
vxlan->default_dst.remote_vni,
0, NTF_SELF);
spin_unlock(&vxlan->hash_lock);
static bool vxlan_set_mac(struct vxlan_dev *vxlan,
struct vxlan_sock *vs,
- struct sk_buff *skb)
+ struct sk_buff *skb, __be32 vni)
{
union vxlan_addr saddr;
}
if ((vxlan->flags & VXLAN_F_LEARN) &&
- vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
+ vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, vni))
return false;
return true;
__be16 protocol = htons(ETH_P_TEB);
bool raw_proto = false;
void *oiph;
+ __be32 vni = 0;
/* Need UDP and VXLAN header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
if (!vs)
goto drop;
- vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni));
+ vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
+
+ vxlan = vxlan_vs_find_vni(vs, vni);
if (!vxlan)
goto drop;
goto drop;
if (vxlan_collect_metadata(vs)) {
- __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
struct metadata_dst *tun_dst;
tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
}
if (!raw_proto) {
- if (!vxlan_set_mac(vxlan, vs, skb))
+ if (!vxlan_set_mac(vxlan, vs, skb, vni))
goto drop;
} else {
skb_reset_mac_header(skb);
return 0;
}
-static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
+static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct arphdr *parp;
goto out;
}
- f = vxlan_find_mac(vxlan, n->ha);
+ f = vxlan_find_mac(vxlan, n->ha, vni);
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
/* bridge-local neighbor */
neigh_release(n);
int ns_olen;
int i, len;
- if (dev == NULL)
+ if (dev == NULL || !pskb_may_pull(request, request->len))
return NULL;
len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
skb_push(reply, sizeof(struct ethhdr));
skb_reset_mac_header(reply);
- ns = (struct nd_msg *)skb_transport_header(request);
+ ns = (struct nd_msg *)(ipv6_hdr(request) + 1);
daddr = eth_hdr(request)->h_source;
- ns_olen = request->len - skb_transport_offset(request) - sizeof(*ns);
+ ns_olen = request->len - skb_network_offset(request) -
+ sizeof(struct ipv6hdr) - sizeof(*ns);
for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
return reply;
}
-static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
+static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct nd_msg *msg;
const struct ipv6hdr *iphdr;
- const struct in6_addr *saddr, *daddr;
+ const struct in6_addr *daddr;
struct neighbour *n;
struct inet6_dev *in6_dev;
if (!in6_dev)
goto out;
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
+ goto out;
+
iphdr = ipv6_hdr(skb);
- saddr = &iphdr->saddr;
daddr = &iphdr->daddr;
- msg = (struct nd_msg *)skb_transport_header(skb);
+ msg = (struct nd_msg *)(iphdr + 1);
if (msg->icmph.icmp6_code != 0 ||
msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
goto out;
goto out;
}
- f = vxlan_find_mac(vxlan, n->ha);
+ f = vxlan_find_mac(vxlan, n->ha, vni);
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
/* bridge-local neighbor */
neigh_release(n);
/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
- struct vxlan_dev *dst_vxlan)
+ struct vxlan_dev *dst_vxlan, __be32 vni)
{
struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback;
}
if (dst_vxlan->flags & VXLAN_F_LEARN)
- vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source);
+ vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, vni);
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tx_packets++;
static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *vxlan, union vxlan_addr *daddr,
- __be32 dst_port, __be32 vni, struct dst_entry *dst,
+ __be16 dst_port, __be32 vni, struct dst_entry *dst,
u32 rt_flags)
{
#if IS_ENABLED(CONFIG_IPV6)
return -ENOENT;
}
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
+ vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni);
return 1;
}
}
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
- struct vxlan_rdst *rdst, bool did_rsc)
+ __be32 default_vni, struct vxlan_rdst *rdst,
+ bool did_rsc)
{
struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
const struct iphdr *old_iph = ip_hdr(skb);
union vxlan_addr *dst;
union vxlan_addr remote_ip, local_ip;
- union vxlan_addr *src;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port;
if (vxlan_addr_any(dst)) {
if (did_rsc) {
/* short-circuited back to local bridge */
- vxlan_encap_bypass(skb, vxlan, vxlan);
+ vxlan_encap_bypass(skb, vxlan, vxlan, default_vni);
return;
}
goto drop;
}
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
- vni = rdst->remote_vni;
- src = &vxlan->cfg.saddr;
+ vni = (rdst->remote_vni) ? : default_vni;
+ local_ip = vxlan->cfg.saddr;
dst_cache = &rdst->dst_cache;
md->gbp = skb->mark;
ttl = vxlan->cfg.ttl;
dst = &remote_ip;
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
vni = tunnel_id_to_key32(info->key.tun_id);
- src = &local_ip;
dst_cache = &info->dst_cache;
if (info->options_len)
md = ip_tunnel_info_opts(info);
src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);
+ rcu_read_lock();
if (dst->sa.sa_family == AF_INET) {
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
struct rtable *rt;
rt = vxlan_get_route(vxlan, dev, sock4, skb,
rdst ? rdst->remote_ifindex : 0, tos,
dst->sin.sin_addr.s_addr,
- &src->sin.sin_addr.s_addr,
+ &local_ip.sin.sin_addr.s_addr,
dst_port, src_port,
dst_cache, info);
if (IS_ERR(rt)) {
dst_port, vni, &rt->dst,
rt->rt_flags);
if (err)
- return;
+ goto out_unlock;
} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
df = htons(IP_DF);
}
if (err < 0)
goto tx_error;
- udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, src->sin.sin_addr.s_addr,
+ udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr,
dst->sin.sin_addr.s_addr, tos, ttl, df,
src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
ndst = vxlan6_get_route(vxlan, dev, sock6, skb,
rdst ? rdst->remote_ifindex : 0, tos,
label, &dst->sin6.sin6_addr,
- &src->sin6.sin6_addr,
+ &local_ip.sin6.sin6_addr,
dst_port, src_port,
dst_cache, info);
if (IS_ERR(ndst)) {
dst_port, vni, ndst,
rt6i_flags);
if (err)
- return;
+ goto out_unlock;
}
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
goto tx_error;
udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
- &src->sin6.sin6_addr,
+ &local_ip.sin6.sin6_addr,
&dst->sin6.sin6_addr, tos, ttl,
label, src_port, dst_port, !udp_sum);
#endif
}
+out_unlock:
+ rcu_read_unlock();
return;
drop:
return;
tx_error:
+ rcu_read_unlock();
if (err == -ELOOP)
dev->stats.collisions++;
else if (err == -ENETUNREACH)
bool did_rsc = false;
struct vxlan_rdst *rdst, *fdst = NULL;
struct vxlan_fdb *f;
+ __be32 vni = 0;
info = skb_tunnel_info(skb);
skb_reset_mac_header(skb);
if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
- if (info && info->mode & IP_TUNNEL_INFO_TX)
- vxlan_xmit_one(skb, dev, NULL, false);
- else
- kfree_skb(skb);
- return NETDEV_TX_OK;
+ if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
+ info->mode & IP_TUNNEL_INFO_TX) {
+ vni = tunnel_id_to_key32(info->key.tun_id);
+ } else {
+ if (info && info->mode & IP_TUNNEL_INFO_TX)
+ vxlan_xmit_one(skb, dev, vni, NULL, false);
+ else
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
}
if (vxlan->flags & VXLAN_F_PROXY) {
eth = eth_hdr(skb);
if (ntohs(eth->h_proto) == ETH_P_ARP)
- return arp_reduce(dev, skb);
+ return arp_reduce(dev, skb, vni);
#if IS_ENABLED(CONFIG_IPV6)
- else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
- pskb_may_pull(skb, sizeof(struct ipv6hdr)
- + sizeof(struct nd_msg)) &&
- ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
- struct nd_msg *msg;
-
- msg = (struct nd_msg *)skb_transport_header(skb);
- if (msg->icmph.icmp6_code == 0 &&
- msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
- return neigh_reduce(dev, skb);
+ else if (ntohs(eth->h_proto) == ETH_P_IPV6) {
+ struct ipv6hdr *hdr, _hdr;
+ if ((hdr = skb_header_pointer(skb,
+ skb_network_offset(skb),
+ sizeof(_hdr), &_hdr)) &&
+ hdr->nexthdr == IPPROTO_ICMPV6)
+ return neigh_reduce(dev, skb, vni);
}
#endif
}
eth = eth_hdr(skb);
- f = vxlan_find_mac(vxlan, eth->h_dest);
+ f = vxlan_find_mac(vxlan, eth->h_dest, vni);
did_rsc = false;
if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
ntohs(eth->h_proto) == ETH_P_IPV6)) {
did_rsc = route_shortcircuit(dev, skb);
if (did_rsc)
- f = vxlan_find_mac(vxlan, eth->h_dest);
+ f = vxlan_find_mac(vxlan, eth->h_dest, vni);
}
if (f == NULL) {
- f = vxlan_find_mac(vxlan, all_zeros_mac);
+ f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f == NULL) {
if ((vxlan->flags & VXLAN_F_L2MISS) &&
!is_multicast_ether_addr(eth->h_dest))
}
skb1 = skb_clone(skb, GFP_ATOMIC);
if (skb1)
- vxlan_xmit_one(skb1, dev, rdst, did_rsc);
+ vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
}
if (fdst)
- vxlan_xmit_one(skb, dev, fdst, did_rsc);
+ vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
else
kfree_skb(skb);
return NETDEV_TX_OK;
if (f->state & (NUD_PERMANENT | NUD_NOARP))
continue;
+ if (f->flags & NTF_EXT_LEARNED)
+ continue;
+
timeout = f->used + vxlan->cfg.age_interval * HZ;
if (time_before_eq(timeout, jiffies)) {
netdev_dbg(vxlan->dev,
return 0;
}
-static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
+static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
{
struct vxlan_fdb *f;
spin_lock_bh(&vxlan->hash_lock);
- f = __vxlan_find_mac(vxlan, all_zeros_mac);
+ f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f)
vxlan_fdb_destroy(vxlan, f);
spin_unlock_bh(&vxlan->hash_lock);
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- vxlan_fdb_delete_default(vxlan);
+ vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
free_percpu(dev->tstats);
}
if (data[IFLA_VXLAN_ID]) {
__u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
- if (id >= VXLAN_VID_MASK)
+ if (id >= VXLAN_N_VID)
return -ERANGE;
}
sock = vxlan_create_sock(net, ipv6, port, flags);
if (IS_ERR(sock)) {
- pr_info("Cannot bind port %d, err=%ld\n", ntohs(port),
- PTR_ERR(sock));
kfree(vs);
return ERR_CAST(sock);
}
static int vxlan_sock_add(struct vxlan_dev *vxlan)
{
- bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
+ bool ipv6 = vxlan->flags & VXLAN_F_IPV6 || metadata;
+ bool ipv4 = !ipv6 || metadata;
int ret = 0;
RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
#if IS_ENABLED(CONFIG_IPV6)
RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
- if (ipv6 || metadata)
+ if (ipv6) {
ret = __vxlan_sock_add(vxlan, true);
+ if (ret < 0 && ret != -EAFNOSUPPORT)
+ ipv4 = false;
+ }
#endif
- if (!ret && (!ipv6 || metadata))
+ if (ipv4)
ret = __vxlan_sock_add(vxlan, false);
if (ret < 0)
vxlan_sock_release(vxlan);
}
static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
- struct vxlan_config *conf)
+ struct vxlan_config *conf,
+ bool changelink)
{
struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev), *tmp;
struct vxlan_rdst *dst = &vxlan->default_dst;
unsigned short needed_headroom = ETH_HLEN;
- int err;
bool use_ipv6 = false;
__be16 default_port = vxlan->cfg.dst_port;
struct net_device *lowerdev = NULL;
- if (conf->flags & VXLAN_F_GPE) {
- /* For now, allow GPE only together with COLLECT_METADATA.
- * This can be relaxed later; in such case, the other side
- * of the PtP link will have to be provided.
- */
- if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
- !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
- pr_info("unsupported combination of extensions\n");
- return -EINVAL;
+ if (!changelink) {
+ if (conf->flags & VXLAN_F_GPE) {
+ /* For now, allow GPE only together with
+ * COLLECT_METADATA. This can be relaxed later; in such
+ * case, the other side of the PtP link will have to be
+ * provided.
+ */
+ if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
+ !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+ pr_info("unsupported combination of extensions\n");
+ return -EINVAL;
+ }
+ vxlan_raw_setup(dev);
+ } else {
+ vxlan_ether_setup(dev);
}
- vxlan_raw_setup(dev);
- } else {
- vxlan_ether_setup(dev);
+ /* MTU range: 68 - 65535 */
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = ETH_MAX_MTU;
+ vxlan->net = src_net;
}
- /* MTU range: 68 - 65535 */
- dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = ETH_MAX_MTU;
-
- vxlan->net = src_net;
-
dst->remote_vni = conf->vni;
memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip));
return -EINVAL;
}
- if (conf->remote_ifindex) {
+ if (conf->remote_ifindex &&
+ conf->remote_ifindex != vxlan->cfg.remote_ifindex) {
lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
dst->remote_ifindex = conf->remote_ifindex;
if (!lowerdev) {
- pr_info("ifindex %d does not exist\n", dst->remote_ifindex);
+ pr_info("ifindex %d does not exist\n",
+ dst->remote_ifindex);
return -ENODEV;
}
(use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
needed_headroom = lowerdev->hard_header_len;
- } else if (vxlan_addr_multicast(&dst->remote_ip)) {
+ } else if (!conf->remote_ifindex &&
+ vxlan_addr_multicast(&dst->remote_ip)) {
pr_info("multicast destination requires interface to be specified\n");
return -EINVAL;
}
+ if (lowerdev) {
+ dev->gso_max_size = lowerdev->gso_max_size;
+ dev->gso_max_segs = lowerdev->gso_max_segs;
+ }
+
if (conf->mtu) {
int max_mtu = ETH_MAX_MTU;
if (!vxlan->cfg.age_interval)
vxlan->cfg.age_interval = FDB_AGE_DEFAULT;
+ if (changelink)
+ return 0;
+
list_for_each_entry(tmp, &vn->vxlan_list, next) {
if (tmp->cfg.vni == conf->vni &&
(tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 ||
}
}
+ return 0;
+}
+
+static int __vxlan_dev_create(struct net *net, struct net_device *dev,
+ struct vxlan_config *conf)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ int err;
+
+ err = vxlan_dev_configure(net, dev, conf, false);
+ if (err)
+ return err;
+
dev->ethtool_ops = &vxlan_ethtool_ops;
/* create an fdb entry for a valid default destination */
if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
err = vxlan_fdb_create(vxlan, all_zeros_mac,
&vxlan->default_dst.remote_ip,
- NUD_REACHABLE|NUD_PERMANENT,
- NLM_F_EXCL|NLM_F_CREATE,
+ NUD_REACHABLE | NUD_PERMANENT,
+ NLM_F_EXCL | NLM_F_CREATE,
vxlan->cfg.dst_port,
vxlan->default_dst.remote_vni,
+ vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_ifindex,
NTF_SELF);
if (err)
err = register_netdevice(dev);
if (err) {
- vxlan_fdb_delete_default(vxlan);
+ vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
return err;
}
list_add(&vxlan->next, &vn->vxlan_list);
-
return 0;
}
-static int vxlan_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
+static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
+ struct net_device *dev, struct vxlan_config *conf,
+ bool changelink)
{
- struct vxlan_config conf;
+ struct vxlan_dev *vxlan = netdev_priv(dev);
- memset(&conf, 0, sizeof(conf));
+ memset(conf, 0, sizeof(*conf));
- if (data[IFLA_VXLAN_ID])
- conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
+ /* if changelink operation, start with old existing cfg */
+ if (changelink)
+ memcpy(conf, &vxlan->cfg, sizeof(*conf));
+
+ if (data[IFLA_VXLAN_ID]) {
+ __be32 vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
+
+ if (changelink && (vni != conf->vni))
+ return -EOPNOTSUPP;
+ conf->vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
+ }
if (data[IFLA_VXLAN_GROUP]) {
- conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
+ conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
} else if (data[IFLA_VXLAN_GROUP6]) {
if (!IS_ENABLED(CONFIG_IPV6))
return -EPFNOSUPPORT;
- conf.remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
- conf.remote_ip.sa.sa_family = AF_INET6;
+ conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
+ conf->remote_ip.sa.sa_family = AF_INET6;
}
if (data[IFLA_VXLAN_LOCAL]) {
- conf.saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
- conf.saddr.sa.sa_family = AF_INET;
+ conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
+ conf->saddr.sa.sa_family = AF_INET;
} else if (data[IFLA_VXLAN_LOCAL6]) {
if (!IS_ENABLED(CONFIG_IPV6))
return -EPFNOSUPPORT;
/* TODO: respect scope id */
- conf.saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
- conf.saddr.sa.sa_family = AF_INET6;
+ conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
+ conf->saddr.sa.sa_family = AF_INET6;
}
if (data[IFLA_VXLAN_LINK])
- conf.remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]);
+ conf->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]);
if (data[IFLA_VXLAN_TOS])
- conf.tos = nla_get_u8(data[IFLA_VXLAN_TOS]);
+ conf->tos = nla_get_u8(data[IFLA_VXLAN_TOS]);
if (data[IFLA_VXLAN_TTL])
- conf.ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
+ conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
if (data[IFLA_VXLAN_LABEL])
- conf.label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
+ conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
IPV6_FLOWLABEL_MASK;
- if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING]))
- conf.flags |= VXLAN_F_LEARN;
+ if (data[IFLA_VXLAN_LEARNING]) {
+ if (nla_get_u8(data[IFLA_VXLAN_LEARNING])) {
+ conf->flags |= VXLAN_F_LEARN;
+ } else {
+ conf->flags &= ~VXLAN_F_LEARN;
+ vxlan->flags &= ~VXLAN_F_LEARN;
+ }
+ } else if (!changelink) {
+ /* default to learn on a new device */
+ conf->flags |= VXLAN_F_LEARN;
+ }
- if (data[IFLA_VXLAN_AGEING])
- conf.age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
+ if (data[IFLA_VXLAN_AGEING]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ conf->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
+ }
- if (data[IFLA_VXLAN_PROXY] && nla_get_u8(data[IFLA_VXLAN_PROXY]))
- conf.flags |= VXLAN_F_PROXY;
+ if (data[IFLA_VXLAN_PROXY]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (nla_get_u8(data[IFLA_VXLAN_PROXY]))
+ conf->flags |= VXLAN_F_PROXY;
+ }
- if (data[IFLA_VXLAN_RSC] && nla_get_u8(data[IFLA_VXLAN_RSC]))
- conf.flags |= VXLAN_F_RSC;
+ if (data[IFLA_VXLAN_RSC]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (nla_get_u8(data[IFLA_VXLAN_RSC]))
+ conf->flags |= VXLAN_F_RSC;
+ }
- if (data[IFLA_VXLAN_L2MISS] && nla_get_u8(data[IFLA_VXLAN_L2MISS]))
- conf.flags |= VXLAN_F_L2MISS;
+ if (data[IFLA_VXLAN_L2MISS]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (nla_get_u8(data[IFLA_VXLAN_L2MISS]))
+ conf->flags |= VXLAN_F_L2MISS;
+ }
- if (data[IFLA_VXLAN_L3MISS] && nla_get_u8(data[IFLA_VXLAN_L3MISS]))
- conf.flags |= VXLAN_F_L3MISS;
+ if (data[IFLA_VXLAN_L3MISS]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (nla_get_u8(data[IFLA_VXLAN_L3MISS]))
+ conf->flags |= VXLAN_F_L3MISS;
+ }
- if (data[IFLA_VXLAN_LIMIT])
- conf.addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
+ if (data[IFLA_VXLAN_LIMIT]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ conf->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
+ }
- if (data[IFLA_VXLAN_COLLECT_METADATA] &&
- nla_get_u8(data[IFLA_VXLAN_COLLECT_METADATA]))
- conf.flags |= VXLAN_F_COLLECT_METADATA;
+ if (data[IFLA_VXLAN_COLLECT_METADATA]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (nla_get_u8(data[IFLA_VXLAN_COLLECT_METADATA]))
+ conf->flags |= VXLAN_F_COLLECT_METADATA;
+ }
if (data[IFLA_VXLAN_PORT_RANGE]) {
- const struct ifla_vxlan_port_range *p
- = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
- conf.port_min = ntohs(p->low);
- conf.port_max = ntohs(p->high);
+ if (!changelink) {
+ const struct ifla_vxlan_port_range *p
+ = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
+ conf->port_min = ntohs(p->low);
+ conf->port_max = ntohs(p->high);
+ } else {
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (data[IFLA_VXLAN_PORT]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ conf->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
+ }
+
+ if (data[IFLA_VXLAN_UDP_CSUM]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (!nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+ conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
+ }
+
+ if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
+ conf->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+ }
+
+ if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
+ conf->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
}
- if (data[IFLA_VXLAN_PORT])
- conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
+ if (data[IFLA_VXLAN_REMCSUM_TX]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
+ conf->flags |= VXLAN_F_REMCSUM_TX;
+ }
- if (data[IFLA_VXLAN_UDP_CSUM] &&
- !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
- conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
+ if (data[IFLA_VXLAN_REMCSUM_RX]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ if (nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
+ conf->flags |= VXLAN_F_REMCSUM_RX;
+ }
- if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
- nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
- conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+ if (data[IFLA_VXLAN_GBP]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ conf->flags |= VXLAN_F_GBP;
+ }
- if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
- nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
- conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
+ if (data[IFLA_VXLAN_GPE]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ conf->flags |= VXLAN_F_GPE;
+ }
- if (data[IFLA_VXLAN_REMCSUM_TX] &&
- nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
- conf.flags |= VXLAN_F_REMCSUM_TX;
+ if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ conf->flags |= VXLAN_F_REMCSUM_NOPARTIAL;
+ }
- if (data[IFLA_VXLAN_REMCSUM_RX] &&
- nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
- conf.flags |= VXLAN_F_REMCSUM_RX;
+ if (tb[IFLA_MTU]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ conf->mtu = nla_get_u32(tb[IFLA_MTU]);
+ }
- if (data[IFLA_VXLAN_GBP])
- conf.flags |= VXLAN_F_GBP;
+ return 0;
+}
- if (data[IFLA_VXLAN_GPE])
- conf.flags |= VXLAN_F_GPE;
+static int vxlan_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct vxlan_config conf;
+ int err;
- if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
- conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
+ err = vxlan_nl2conf(tb, data, dev, &conf, false);
+ if (err)
+ return err;
- if (tb[IFLA_MTU])
- conf.mtu = nla_get_u32(tb[IFLA_MTU]);
+ return __vxlan_dev_create(src_net, dev, &conf);
+}
+
+static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ struct vxlan_rdst old_dst;
+ struct vxlan_config conf;
+ int err;
+
+ err = vxlan_nl2conf(tb, data,
+ dev, &conf, true);
+ if (err)
+ return err;
+
+ memcpy(&old_dst, dst, sizeof(struct vxlan_rdst));
- return vxlan_dev_configure(src_net, dev, &conf);
+ err = vxlan_dev_configure(vxlan->net, dev, &conf, true);
+ if (err)
+ return err;
+
+ /* handle default dst entry */
+ if (!vxlan_addr_equal(&dst->remote_ip, &old_dst.remote_ip)) {
+ spin_lock_bh(&vxlan->hash_lock);
+ if (!vxlan_addr_any(&old_dst.remote_ip))
+ __vxlan_fdb_delete(vxlan, all_zeros_mac,
+ old_dst.remote_ip,
+ vxlan->cfg.dst_port,
+ old_dst.remote_vni,
+ old_dst.remote_vni,
+ old_dst.remote_ifindex, 0);
+
+ if (!vxlan_addr_any(&dst->remote_ip)) {
+ err = vxlan_fdb_create(vxlan, all_zeros_mac,
+ &dst->remote_ip,
+ NUD_REACHABLE | NUD_PERMANENT,
+ NLM_F_CREATE | NLM_F_APPEND,
+ vxlan->cfg.dst_port,
+ dst->remote_vni,
+ dst->remote_vni,
+ dst->remote_ifindex,
+ NTF_SELF);
+ if (err) {
+ spin_unlock_bh(&vxlan->hash_lock);
+ return err;
+ }
+ }
+ spin_unlock_bh(&vxlan->hash_lock);
+ }
+
+ return 0;
}
static void vxlan_dellink(struct net_device *dev, struct list_head *head)
.setup = vxlan_setup,
.validate = vxlan_validate,
.newlink = vxlan_newlink,
+ .changelink = vxlan_changelink,
.dellink = vxlan_dellink,
.get_size = vxlan_get_size,
.fill_info = vxlan_fill_info,
if (IS_ERR(dev))
return dev;
- err = vxlan_dev_configure(net, dev, conf);
+ err = __vxlan_dev_create(net, dev, conf);
if (err < 0) {
free_netdev(dev);
return ERR_PTR(err);