struct veth_msg *next;
struct VethFramesData data;
int token;
- unsigned long in_use;
+ int in_use;
struct sk_buff *skb;
struct device *dev;
};
int num_events;
struct VethCapData local_caps;
+ struct kobject kobject;
struct timer_list ack_timer;
+ struct timer_list reset_timer;
+ unsigned int reset_timeout;
+ unsigned long last_contact;
+ int outstanding_tx;
+
spinlock_t lock;
unsigned long state;
HvLpInstanceId src_inst;
struct VethCapData remote_caps;
u32 ack_timeout;
- spinlock_t msg_stack_lock;
struct veth_msg *msg_stack_head;
};
u64 mac_addr;
HvLpIndexMap lpar_map;
- spinlock_t pending_gate;
- struct sk_buff *pending_skb;
- HvLpIndexMap pending_lpmask;
+ /* queue_lock protects the stopped_map and dev's queue. */
+ spinlock_t queue_lock;
+ HvLpIndexMap stopped_map;
+ /* mcast_gate protects promiscuous, num_mcast & mcast_addr. */
rwlock_t mcast_gate;
int promiscuous;
- int all_mcast;
int num_mcast;
u64 mcast_addr[VETH_MAX_MCAST];
};
static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);
static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);
-static void veth_flush_pending(struct veth_lpar_connection *cnx);
+static void veth_wake_queues(struct veth_lpar_connection *cnx);
+static void veth_stop_queues(struct veth_lpar_connection *cnx);
static void veth_receive(struct veth_lpar_connection *, struct VethLpEvent *);
-static void veth_timed_ack(unsigned long connectionPtr);
+static void veth_release_connection(struct kobject *kobject);
+static void veth_timed_ack(unsigned long ptr);
+static void veth_timed_reset(unsigned long ptr);
+
+static struct kobj_type veth_lpar_connection_ktype = {
+ .release = veth_release_connection
+};
/*
* Utility functions
#define veth_debug(fmt, args...) do {} while (0)
#endif
+/* You must hold the connection's lock when you call this function. */
static inline void veth_stack_push(struct veth_lpar_connection *cnx,
struct veth_msg *msg)
{
- unsigned long flags;
-
- spin_lock_irqsave(&cnx->msg_stack_lock, flags);
msg->next = cnx->msg_stack_head;
cnx->msg_stack_head = msg;
- spin_unlock_irqrestore(&cnx->msg_stack_lock, flags);
}
+/* You must hold the connection's lock when you call this function. */
static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx)
{
- unsigned long flags;
struct veth_msg *msg;
- spin_lock_irqsave(&cnx->msg_stack_lock, flags);
msg = cnx->msg_stack_head;
if (msg)
cnx->msg_stack_head = cnx->msg_stack_head->next;
- spin_unlock_irqrestore(&cnx->msg_stack_lock, flags);
+
return msg;
}
+/* You must hold the connection's lock when you call this function. */
+static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx)
+{
+ return cnx->msg_stack_head == NULL;
+}
+
static inline HvLpEvent_Rc
veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,
HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,
HvLpIndex rlp = event->base_event.xSourceLp;
struct veth_lpar_connection *cnx = veth_cnx[rlp];
unsigned long flags;
- int i;
+ int i, acked = 0;
BUG_ON(! cnx);
break;
case VethEventTypeFramesAck:
spin_lock_irqsave(&cnx->lock, flags);
+
for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {
u16 msgnum = event->u.frames_ack_data.token[i];
- if (msgnum < VETH_NUMBUFFERS)
+ if (msgnum < VETH_NUMBUFFERS) {
veth_recycle_msg(cnx, cnx->msgs + msgnum);
+ cnx->outstanding_tx--;
+ acked++;
+ }
}
+
+ if (acked > 0) {
+ cnx->last_contact = jiffies;
+ veth_wake_queues(cnx);
+ }
+
spin_unlock_irqrestore(&cnx->lock, flags);
- veth_flush_pending(cnx);
break;
case VethEventTypeFrames:
veth_receive(cnx, event);
restart:
if (cnx->state & VETH_STATE_RESET) {
- int i;
-
if (cnx->state & VETH_STATE_OPEN)
HvCallEvent_closeLpEventPath(cnx->remote_lp,
HvLpEvent_Type_VirtualLan);
| VETH_STATE_SENTCAPACK | VETH_STATE_READY);
/* Clean up any leftover messages */
- if (cnx->msgs)
+ if (cnx->msgs) {
+ int i;
for (i = 0; i < VETH_NUMBUFFERS; ++i)
veth_recycle_msg(cnx, cnx->msgs + i);
+ }
+
+ cnx->outstanding_tx = 0;
+ veth_wake_queues(cnx);
/* Drop the lock so we can do stuff that might sleep or
* take other locks. */
spin_unlock_irq(&cnx->lock);
del_timer_sync(&cnx->ack_timer);
- veth_flush_pending(cnx);
+ del_timer_sync(&cnx->reset_timer);
spin_lock_irq(&cnx->lock);
{
struct veth_lpar_connection *cnx;
struct veth_msg *msgs;
- int i;
+ int i, rc;
if ( (rlp == this_lp)
|| ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) )
cnx->remote_lp = rlp;
spin_lock_init(&cnx->lock);
INIT_WORK(&cnx->statemachine_wq, veth_statemachine, cnx);
+
init_timer(&cnx->ack_timer);
cnx->ack_timer.function = veth_timed_ack;
cnx->ack_timer.data = (unsigned long) cnx;
+
+ init_timer(&cnx->reset_timer);
+ cnx->reset_timer.function = veth_timed_reset;
+ cnx->reset_timer.data = (unsigned long) cnx;
+ cnx->reset_timeout = 5 * HZ * (VETH_ACKTIMEOUT / 1000000);
+
memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
veth_cnx[rlp] = cnx;
+ /* This gets us 1 reference, which is held on behalf of the driver
+ * infrastructure. It's released at module unload. */
+ kobject_init(&cnx->kobject);
+ cnx->kobject.ktype = &veth_lpar_connection_ktype;
+ rc = kobject_set_name(&cnx->kobject, "cnx%.2d", rlp);
+ if (rc != 0)
+ return rc;
+
msgs = kmalloc(VETH_NUMBUFFERS * sizeof(struct veth_msg), GFP_KERNEL);
if (! msgs) {
veth_error("Can't allocate buffers for LPAR %d.\n", rlp);
cnx->msgs = msgs;
memset(msgs, 0, VETH_NUMBUFFERS * sizeof(struct veth_msg));
- spin_lock_init(&cnx->msg_stack_lock);
for (i = 0; i < VETH_NUMBUFFERS; i++) {
msgs[i].token = i;
return 0;
}
-static void veth_stop_connection(u8 rlp)
+static void veth_stop_connection(struct veth_lpar_connection *cnx)
{
- struct veth_lpar_connection *cnx = veth_cnx[rlp];
-
- if (! cnx)
+ if (!cnx)
return;
spin_lock_irq(&cnx->lock);
/* Wait for the state machine to run. */
flush_scheduled_work();
+}
+
+static void veth_destroy_connection(struct veth_lpar_connection *cnx)
+{
+ if (!cnx)
+ return;
if (cnx->num_events > 0)
mf_deallocate_lp_events(cnx->remote_lp,
HvLpEvent_Type_VirtualLan,
cnx->num_ack_events,
NULL, NULL);
-}
-
-static void veth_destroy_connection(u8 rlp)
-{
- struct veth_lpar_connection *cnx = veth_cnx[rlp];
-
- if (! cnx)
- return;
kfree(cnx->msgs);
+ veth_cnx[cnx->remote_lp] = NULL;
kfree(cnx);
- veth_cnx[rlp] = NULL;
+}
+
+static void veth_release_connection(struct kobject *kobj)
+{
+ struct veth_lpar_connection *cnx;
+ cnx = container_of(kobj, struct veth_lpar_connection, kobject);
+ veth_stop_connection(cnx);
+ veth_destroy_connection(cnx);
}
/*
write_lock_irqsave(&port->mcast_gate, flags);
- if (dev->flags & IFF_PROMISC) { /* set promiscuous mode */
- printk(KERN_INFO "%s: Promiscuous mode enabled.\n",
- dev->name);
+ if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) ||
+ (dev->mc_count > VETH_MAX_MCAST)) {
port->promiscuous = 1;
- } else if ( (dev->flags & IFF_ALLMULTI)
- || (dev->mc_count > VETH_MAX_MCAST) ) {
- port->all_mcast = 1;
} else {
struct dev_mc_list *dmi = dev->mc_list;
int i;
+ port->promiscuous = 0;
+
/* Update table */
port->num_mcast = 0;
.get_link = veth_get_link,
};
-static void veth_tx_timeout(struct net_device *dev)
-{
- struct veth_port *port = (struct veth_port *)dev->priv;
- struct net_device_stats *stats = &port->stats;
- unsigned long flags;
- int i;
-
- stats->tx_errors++;
-
- spin_lock_irqsave(&port->pending_gate, flags);
-
- if (!port->pending_lpmask) {
- spin_unlock_irqrestore(&port->pending_gate, flags);
- return;
- }
-
- printk(KERN_WARNING "%s: Tx timeout! Resetting lp connections: %08x\n",
- dev->name, port->pending_lpmask);
-
- for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
- struct veth_lpar_connection *cnx = veth_cnx[i];
-
- if (! (port->pending_lpmask & (1<<i)))
- continue;
-
- /* If we're pending on it, we must be connected to it,
- * so we should certainly have a structure for it. */
- BUG_ON(! cnx);
-
- /* Theoretically we could be kicking a connection
- * which doesn't deserve it, but in practice if we've
- * had a Tx timeout, the pending_lpmask will have
- * exactly one bit set - the connection causing the
- * problem. */
- spin_lock(&cnx->lock);
- cnx->state |= VETH_STATE_RESET;
- veth_kick_statemachine(cnx);
- spin_unlock(&cnx->lock);
- }
-
- spin_unlock_irqrestore(&port->pending_gate, flags);
-}
-
static struct net_device * __init veth_probe_one(int vlan, struct device *vdev)
{
struct net_device *dev;
port = (struct veth_port *) dev->priv;
- spin_lock_init(&port->pending_gate);
+ spin_lock_init(&port->queue_lock);
rwlock_init(&port->mcast_gate);
+ port->stopped_map = 0;
for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
HvLpVirtualLanIndexMap map;
dev->set_multicast_list = veth_set_multicast_list;
SET_ETHTOOL_OPS(dev, &ops);
- dev->watchdog_timeo = 2 * (VETH_ACKTIMEOUT * HZ / 1000000);
- dev->tx_timeout = veth_tx_timeout;
-
SET_NETDEV_DEV(dev, vdev);
rc = register_netdev(dev);
struct veth_lpar_connection *cnx = veth_cnx[rlp];
struct veth_port *port = (struct veth_port *) dev->priv;
HvLpEvent_Rc rc;
- u32 dma_address, dma_length;
struct veth_msg *msg = NULL;
int err = 0;
unsigned long flags;
goto drop;
}
- dma_length = skb->len;
- dma_address = dma_map_single(port->dev, skb->data,
- dma_length, DMA_TO_DEVICE);
+ msg->in_use = 1;
- if (dma_mapping_error(dma_address))
+ msg->data.addr[0] = dma_map_single(port->dev, skb->data,
+ skb->len, DMA_TO_DEVICE);
+
+ if (dma_mapping_error(msg->data.addr[0]))
goto recycle_and_drop;
/* Is it really necessary to check the length and address
* fields of the first entry here? */
msg->skb = skb;
msg->dev = port->dev;
- msg->data.addr[0] = dma_address;
- msg->data.len[0] = dma_length;
+ msg->data.len[0] = skb->len;
msg->data.eofmask = 1 << VETH_EOF_SHIFT;
- set_bit(0, &(msg->in_use));
+
rc = veth_signaldata(cnx, VethEventTypeFrames, msg->token, &msg->data);
if (rc != HvLpEvent_Rc_Good)
goto recycle_and_drop;
+ /* If the timer's not already running, start it now. */
+ if (0 == cnx->outstanding_tx)
+ mod_timer(&cnx->reset_timer, jiffies + cnx->reset_timeout);
+
+ cnx->last_contact = jiffies;
+ cnx->outstanding_tx++;
+
+ if (veth_stack_is_empty(cnx))
+ veth_stop_queues(cnx);
+
spin_unlock_irqrestore(&cnx->lock, flags);
return 0;
recycle_and_drop:
+ /* we free the skb below, so tell veth_recycle_msg() not to. */
msg->skb = NULL;
- /* need to set in use to make veth_recycle_msg in case this
- * was a mapping failure */
- set_bit(0, &msg->in_use);
veth_recycle_msg(cnx, msg);
drop:
port->stats.tx_errors++;
{
unsigned char *frame = skb->data;
struct veth_port *port = (struct veth_port *) dev->priv;
- unsigned long flags;
HvLpIndexMap lpmask;
if (! (frame[0] & 0x01)) {
lpmask = port->lpar_map;
}
- spin_lock_irqsave(&port->pending_gate, flags);
-
- lpmask = veth_transmit_to_many(skb, lpmask, dev);
-
- dev->trans_start = jiffies;
-
- if (! lpmask) {
- dev_kfree_skb(skb);
- } else {
- if (port->pending_skb) {
- veth_error("%s: TX while skb was pending!\n",
- dev->name);
- dev_kfree_skb(skb);
- spin_unlock_irqrestore(&port->pending_gate, flags);
- return 1;
- }
-
- port->pending_skb = skb;
- port->pending_lpmask = lpmask;
- netif_stop_queue(dev);
- }
+ veth_transmit_to_many(skb, lpmask, dev);
- spin_unlock_irqrestore(&port->pending_gate, flags);
+ dev_kfree_skb(skb);
return 0;
}
+/* You must hold the connection's lock when you call this function. */
static void veth_recycle_msg(struct veth_lpar_connection *cnx,
struct veth_msg *msg)
{
u32 dma_address, dma_length;
- if (test_and_clear_bit(0, &msg->in_use)) {
+ if (msg->in_use) {
+ msg->in_use = 0;
dma_address = msg->data.addr[0];
dma_length = msg->data.len[0];
- dma_unmap_single(msg->dev, dma_address, dma_length,
- DMA_TO_DEVICE);
+ if (!dma_mapping_error(dma_address))
+ dma_unmap_single(msg->dev, dma_address, dma_length,
+ DMA_TO_DEVICE);
if (msg->skb) {
dev_kfree_skb_any(msg->skb);
}
}
-static void veth_flush_pending(struct veth_lpar_connection *cnx)
+static void veth_wake_queues(struct veth_lpar_connection *cnx)
{
int i;
+
for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
struct net_device *dev = veth_dev[i];
struct veth_port *port;
if (! (port->lpar_map & (1<<cnx->remote_lp)))
continue;
- spin_lock_irqsave(&port->pending_gate, flags);
- if (port->pending_skb) {
- port->pending_lpmask =
- veth_transmit_to_many(port->pending_skb,
- port->pending_lpmask,
- dev);
- if (! port->pending_lpmask) {
- dev_kfree_skb_any(port->pending_skb);
- port->pending_skb = NULL;
- netif_wake_queue(dev);
- }
+ spin_lock_irqsave(&port->queue_lock, flags);
+
+ port->stopped_map &= ~(1 << cnx->remote_lp);
+
+ if (0 == port->stopped_map && netif_queue_stopped(dev)) {
+ veth_debug("cnx %d: woke queue for %s.\n",
+ cnx->remote_lp, dev->name);
+ netif_wake_queue(dev);
+ }
+ spin_unlock_irqrestore(&port->queue_lock, flags);
+ }
+}
+
+static void veth_stop_queues(struct veth_lpar_connection *cnx)
+{
+ int i;
+
+ for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
+ struct net_device *dev = veth_dev[i];
+ struct veth_port *port;
+
+ if (! dev)
+ continue;
+
+ port = (struct veth_port *)dev->priv;
+
+ /* If this cnx is not on the vlan for this port, continue */
+ if (! (port->lpar_map & (1 << cnx->remote_lp)))
+ continue;
+
+ spin_lock(&port->queue_lock);
+
+ netif_stop_queue(dev);
+ port->stopped_map |= (1 << cnx->remote_lp);
+
+ veth_debug("cnx %d: stopped queue for %s, map = 0x%x.\n",
+ cnx->remote_lp, dev->name, port->stopped_map);
+
+ spin_unlock(&port->queue_lock);
+ }
+}
+
+static void veth_timed_reset(unsigned long ptr)
+{
+ struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)ptr;
+ unsigned long trigger_time, flags;
+
+ /* FIXME is it possible this fires after veth_stop_connection()?
+ * That would reschedule the statemachine for 5 seconds and probably
+ * execute it after the module's been unloaded. Hmm. */
+
+ spin_lock_irqsave(&cnx->lock, flags);
+
+ if (cnx->outstanding_tx > 0) {
+ trigger_time = cnx->last_contact + cnx->reset_timeout;
+
+ if (trigger_time < jiffies) {
+ cnx->state |= VETH_STATE_RESET;
+ veth_kick_statemachine(cnx);
+ veth_error("%d packets not acked by LPAR %d within %d "
+ "seconds, resetting.\n",
+ cnx->outstanding_tx, cnx->remote_lp,
+ cnx->reset_timeout / HZ);
+ } else {
+ /* Reschedule the timer */
+ trigger_time = jiffies + cnx->reset_timeout;
+ mod_timer(&cnx->reset_timer, trigger_time);
}
- spin_unlock_irqrestore(&port->pending_gate, flags);
}
+
+ spin_unlock_irqrestore(&cnx->lock, flags);
}
/*
if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) )
return 1;
- if (! (((char *) &mac_addr)[0] & 0x01))
- return 0;
-
read_lock_irqsave(&port->mcast_gate, flags);
- if (port->promiscuous || port->all_mcast) {
+ if (port->promiscuous) {
wanted = 1;
goto out;
}
static int veth_remove(struct vio_dev *vdev)
{
- int i = vdev->unit_address;
+ struct veth_lpar_connection *cnx;
struct net_device *dev;
+ struct veth_port *port;
+ int i;
- dev = veth_dev[i];
- if (dev != NULL) {
- veth_dev[i] = NULL;
- unregister_netdev(dev);
- free_netdev(dev);
+ dev = veth_dev[vdev->unit_address];
+
+ if (! dev)
+ return 0;
+
+ port = netdev_priv(dev);
+
+ for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
+ cnx = veth_cnx[i];
+
+ if (cnx && (port->lpar_map & (1 << i))) {
+ /* Drop our reference to connections on our VLAN */
+ kobject_put(&cnx->kobject);
+ }
}
+
+ veth_dev[vdev->unit_address] = NULL;
+ unregister_netdev(dev);
+ free_netdev(dev);
+
return 0;
}
{
int i = vdev->unit_address;
struct net_device *dev;
+ struct veth_port *port;
dev = veth_probe_one(i, &vdev->dev);
if (dev == NULL) {
}
veth_dev[i] = dev;
- /* Start the state machine on each connection, to commence
- * link negotiation */
- for (i = 0; i < HVMAXARCHITECTEDLPS; i++)
- if (veth_cnx[i])
- veth_kick_statemachine(veth_cnx[i]);
+ port = (struct veth_port*)netdev_priv(dev);
+
+ /* Start the state machine on each connection on this vlan. If we're
+ * the first dev to do so this will commence link negotiation */
+ for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
+ struct veth_lpar_connection *cnx;
+
+ if (! (port->lpar_map & (1 << i)))
+ continue;
+
+ cnx = veth_cnx[i];
+ if (!cnx)
+ continue;
+
+ kobject_get(&cnx->kobject);
+ veth_kick_statemachine(cnx);
+ }
return 0;
}
void __exit veth_module_cleanup(void)
{
int i;
+ struct veth_lpar_connection *cnx;
- /* Stop the queues first to stop any new packets being sent. */
- for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++)
- if (veth_dev[i])
- netif_stop_queue(veth_dev[i]);
-
- /* Stop the connections before we unregister the driver. This
- * ensures there's no skbs lying around holding the device open. */
- for (i = 0; i < HVMAXARCHITECTEDLPS; ++i)
- veth_stop_connection(i);
-
+ /* Disconnect our "irq" to stop events coming from the Hypervisor. */
HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan);
- /* Hypervisor callbacks may have scheduled more work while we
- * were stoping connections. Now that we've disconnected from
- * the hypervisor make sure everything's finished. */
+ /* Make sure any work queued from Hypervisor callbacks is finished. */
flush_scheduled_work();
- vio_unregister_driver(&veth_driver);
+ for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
+ cnx = veth_cnx[i];
+
+ if (!cnx)
+ continue;
- for (i = 0; i < HVMAXARCHITECTEDLPS; ++i)
- veth_destroy_connection(i);
+ /* Drop the driver's reference to the connection */
+ kobject_put(&cnx->kobject);
+ }
+ /* Unregister the driver, which will close all the netdevs and stop
+ * the connections when they're no longer referenced. */
+ vio_unregister_driver(&veth_driver);
}
module_exit(veth_module_cleanup);
for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
rc = veth_init_connection(i);
- if (rc != 0) {
- veth_module_cleanup();
- return rc;
- }
+ if (rc != 0)
+ goto error;
}
HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan,
&veth_handle_event);
- return vio_register_driver(&veth_driver);
+ rc = vio_register_driver(&veth_driver);
+ if (rc != 0)
+ goto error;
+
+ return 0;
+
+error:
+ for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
+ veth_destroy_connection(veth_cnx[i]);
+ }
+
+ return rc;
}
module_init(veth_module_init);