struct vxlan_net {
struct list_head vxlan_list;
struct hlist_head sock_list[PORT_HASH_SIZE];
+ spinlock_t sock_lock;
};
struct vxlan_rdst {
__u8 ttl;
u32 flags; /* VXLAN_F_* below */
+ struct work_struct sock_work;
struct work_struct igmp_work;
+
unsigned long age_interval;
struct timer_list age_timer;
spinlock_t hash_lock;
static u32 vxlan_salt __read_mostly;
static struct workqueue_struct *vxlan_wq;
+static void vxlan_sock_work(struct work_struct *work);
+
/* Virtual Network hash table head */
static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
{
atomic_inc(&vs->refcnt);
}
-static void vxlan_sock_release(struct vxlan_sock *vs)
+static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
{
if (!atomic_dec_and_test(&vs->refcnt))
return;
+ spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
+ spin_unlock(&vn->sock_lock);
+
queue_work(vxlan_wq, &vs->del_work);
}
ip_mc_leave_group(sk, &mreq);
release_sock(sk);
- vxlan_sock_release(vs);
+ vxlan_sock_release(vn, vs);
dev_put(vxlan->dev);
}
/* Setup stats when device is created */
static int vxlan_init(struct net_device *dev)
{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+ struct vxlan_sock *vs;
+ __u32 vni = vxlan->default_dst.remote_vni;
+
dev->tstats = alloc_percpu(struct pcpu_tstats);
if (!dev->tstats)
return -ENOMEM;
+ spin_lock(&vn->sock_lock);
+ vs = vxlan_find_port(dev_net(dev), vxlan->dst_port);
+ if (vs) {
+ /* If we have a socket with same port already, reuse it */
+ atomic_inc(&vs->refcnt);
+ vxlan->vn_sock = vs;
+ hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
+ } else {
+ /* otherwise make new socket outside of RTNL */
+ dev_hold(dev);
+ queue_work(vxlan_wq, &vxlan->sock_work);
+ }
+ spin_unlock(&vn->sock_lock);
+
return 0;
}
static int vxlan_open(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_sock *vs = vxlan->vn_sock;
+
+ /* socket hasn't been created */
+ if (!vs)
+ return -ENOTCONN;
if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
- vxlan_sock_hold(vxlan->vn_sock);
+ vxlan_sock_hold(vs);
dev_hold(dev);
queue_work(vxlan_wq, &vxlan->igmp_work);
}
static int vxlan_stop(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_sock *vs = vxlan->vn_sock;
- if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
- vxlan_sock_hold(vxlan->vn_sock);
+ if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
+ vxlan_sock_hold(vs);
dev_hold(dev);
queue_work(vxlan_wq, &vxlan->igmp_work);
}
INIT_LIST_HEAD(&vxlan->next);
spin_lock_init(&vxlan->hash_lock);
INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work);
+ INIT_WORK(&vxlan->sock_work, vxlan_sock_work);
init_timer_deferrable(&vxlan->age_timer);
vxlan->age_timer.function = vxlan_cleanup;
kfree_rcu(vs, rcu);
}
-/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
{
struct vxlan_sock *vs;
return vs;
}
+/* Scheduled at device creation to bind to a socket */
+static void vxlan_sock_work(struct work_struct *work)
+{
+ struct vxlan_dev *vxlan
+ = container_of(work, struct vxlan_dev, sock_work);
+ struct net_device *dev = vxlan->dev;
+ struct net *net = dev_net(dev);
+ __u32 vni = vxlan->default_dst.remote_vni;
+ __be16 port = vxlan->dst_port;
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct vxlan_sock *nvs, *ovs;
+
+ nvs = vxlan_socket_create(net, port);
+ if (IS_ERR(nvs)) {
+ netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n",
+ PTR_ERR(nvs));
+ goto out;
+ }
+
+ spin_lock(&vn->sock_lock);
+ /* Look again to see if can reuse socket */
+ ovs = vxlan_find_port(net, port);
+ if (ovs) {
+ atomic_inc(&ovs->refcnt);
+ vxlan->vn_sock = ovs;
+ hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni));
+ spin_unlock(&vn->sock_lock);
+
+ sk_release_kernel(nvs->sock->sk);
+ kfree(nvs);
+ } else {
+ vxlan->vn_sock = nvs;
+ hlist_add_head_rcu(&nvs->hlist, vs_head(net, port));
+ hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni));
+ spin_unlock(&vn->sock_lock);
+ }
+out:
+ dev_put(dev);
+}
+
static int vxlan_newlink(struct net *net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *dst = &vxlan->default_dst;
- struct vxlan_sock *vs;
__u32 vni;
int err;
return -EEXIST;
}
- vs = vxlan_find_port(net, vxlan->dst_port);
- if (vs)
- atomic_inc(&vs->refcnt);
- else {
- /* Drop lock because socket create acquires RTNL lock */
- rtnl_unlock();
- vs = vxlan_socket_create(net, vxlan->dst_port);
- rtnl_lock();
- if (IS_ERR(vs))
- return PTR_ERR(vs);
-
- hlist_add_head_rcu(&vs->hlist, vs_head(net, vxlan->dst_port));
- }
- vxlan->vn_sock = vs;
-
SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
err = register_netdevice(dev);
- if (err) {
- vxlan_sock_release(vs);
+ if (err)
return err;
- }
list_add(&vxlan->next, &vn->vxlan_list);
- hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
return 0;
}
static void vxlan_dellink(struct net_device *dev, struct list_head *head)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_sock *vs = vxlan->vn_sock;
hlist_del_rcu(&vxlan->hlist);
list_del(&vxlan->next);
unregister_netdevice_queue(dev, head);
- vxlan_sock_release(vs);
+ if (vs)
+ vxlan_sock_release(vn, vs);
}
static size_t vxlan_get_size(const struct net_device *dev)
unsigned int h;
INIT_LIST_HEAD(&vn->vxlan_list);
+ spin_lock_init(&vn->sock_lock);
for (h = 0; h < PORT_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vn->sock_list[h]);