/* Last packet in the request */
#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
+
+/* Last packet that uses a particular io vector */
#define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0)
#define SDMA_REQ_IN_USE 0
#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
struct user_sdma_iovec {
+ struct list_head list;
struct iovec iov;
/* number of pages in this vector */
unsigned npages;
* to 0.
*/
u8 omfactor;
- /*
- * pointer to the user's mm_struct. We are going to
- * get a reference to it so it doesn't get freed
- * since we might not be in process context when we
- * are processing the iov's.
- * Using this mm_struct, we can get vma based on the
- * iov's address (find_vma()).
- */
- struct mm_struct *user_mm;
/*
* We copy the iovs for this request (based on
* info.iovcnt). These are only the data vectors
u16 tididx;
u32 sent;
u64 seqnum;
+ u64 seqcomp;
struct list_head txps;
spinlock_t txcmp_lock; /* protect txcmp list */
struct list_head txcmp;
unsigned long flags;
/* status of the last txreq completed */
int status;
- struct work_struct worker;
};
/*
static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
static int num_user_pages(const struct iovec *);
static void user_sdma_txreq_cb(struct sdma_txreq *, int, int);
-static void user_sdma_delayed_completion(struct work_struct *);
-static void user_sdma_free_request(struct user_sdma_request *);
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
+static void user_sdma_free_request(struct user_sdma_request *, bool);
static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *);
-static void unpin_vector_pages(struct user_sdma_request *,
- struct user_sdma_iovec *);
+static void unpin_vector_pages(struct user_sdma_iovec *);
static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *,
struct user_sdma_txreq *, u32);
static int set_txreq_header_ahg(struct user_sdma_request *,
struct user_sdma_txreq *, u32);
-static inline void set_comp_state(struct user_sdma_request *,
- enum hfi1_sdma_comp_state, int);
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
+ struct hfi1_user_sdma_comp_q *,
+ u16, enum hfi1_sdma_comp_state, int);
static inline u32 set_pkt_bth_psn(__be32, u8, u32);
static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
goto pq_nomem;
memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
- pq->reqs = kmalloc(memsize, GFP_KERNEL);
+ pq->reqs = kzalloc(memsize, GFP_KERNEL);
if (!pq->reqs)
goto pq_reqs_nomem;
INIT_LIST_HEAD(&pq->list);
+ INIT_LIST_HEAD(&pq->iovec_list);
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
pq->n_max_reqs = hfi1_sdma_comp_ring_size;
pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0);
+ spin_lock_init(&pq->iovec_lock);
init_waitqueue_head(&pq->wait);
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq;
+ struct user_sdma_iovec *iov;
unsigned long flags;
hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
wait_event_interruptible(
pq->wait,
(ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
+ /* Unpin any left over buffers. */
+ while (!list_empty(&pq->iovec_list)) {
+ spin_lock_irqsave(&pq->iovec_lock, flags);
+ iov = list_first_entry(&pq->iovec_list,
+ struct user_sdma_iovec, list);
+ list_del_init(&iov->list);
+ spin_unlock_irqrestore(&pq->iovec_lock, flags);
+ unpin_vector_pages(iov);
+ }
kfree(pq->reqs);
kmem_cache_destroy(pq->txreq_cache);
kfree(pq);
int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
unsigned long dim, unsigned long *count)
{
- int ret = 0, i = 0, sent;
+ int ret = 0, i = 0;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
struct hfi1_devdata *dd = pq->dd;
- unsigned long idx = 0;
+ unsigned long idx = 0, flags;
u8 pcount = initial_pkt_count;
struct sdma_req_info info;
struct user_sdma_request *req;
+ struct user_sdma_iovec *ioptr;
u8 opcode, sc, vl;
if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
dd->unit, uctxt->ctxt, fd->subctxt, ret);
return -EFAULT;
}
+
+ /* Process any completed vectors */
+ while (!list_empty(&pq->iovec_list)) {
+ spin_lock_irqsave(&pq->iovec_lock, flags);
+ ioptr = list_first_entry(&pq->iovec_list,
+ struct user_sdma_iovec, list);
+ list_del_init(&ioptr->list);
+ spin_unlock_irqrestore(&pq->iovec_lock, flags);
+ unpin_vector_pages(ioptr);
+ }
+
trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
(u16 *)&info);
- if (cq->comps[info.comp_idx].status == QUEUED) {
+ if (cq->comps[info.comp_idx].status == QUEUED ||
+ test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) {
hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state",
dd->unit, uctxt->ctxt, fd->subctxt,
info.comp_idx);
req->cq = cq;
req->status = -1;
INIT_LIST_HEAD(&req->txps);
- INIT_LIST_HEAD(&req->txcmp);
- INIT_WORK(&req->worker, user_sdma_delayed_completion);
- spin_lock_init(&req->txcmp_lock);
memcpy(&req->info, &info, sizeof(info));
if (req_opcode(info.ctrl) == EXPECTED)
/* Save all the IO vector structures */
while (i < req->data_iovs) {
+ INIT_LIST_HEAD(&req->iovs[i].list);
memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
req->iovs[i].offset = 0;
req->data_len += req->iovs[i++].iov.iov_len;
}
}
- set_comp_state(req, QUEUED, 0);
+ set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
+ atomic_inc(&pq->n_reqs);
/* Send the first N packets in the request to buy us some time */
- sent = user_sdma_send_pkts(req, pcount);
- if (unlikely(sent < 0)) {
- if (sent != -EBUSY) {
- req->status = sent;
- set_comp_state(req, ERROR, req->status);
- return sent;
- } else
- sent = 0;
+ ret = user_sdma_send_pkts(req, pcount);
+ if (unlikely(ret < 0 && ret != -EBUSY)) {
+ req->status = ret;
+ atomic_dec(&pq->n_reqs);
+ goto free_req;
}
- atomic_inc(&pq->n_reqs);
- xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
- if (sent < req->info.npkts) {
- /*
- * This is a somewhat blocking send implementation.
- * The driver will block the caller until all packets of the
- * request have been submitted to the SDMA engine. However, it
- * will not wait for send completions.
- */
- while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
- ret = user_sdma_send_pkts(req, pcount);
- if (ret < 0) {
- if (ret != -EBUSY) {
- req->status = ret;
- return ret;
- }
- wait_event_interruptible_timeout(
- pq->busy.wait_dma,
- (pq->state == SDMA_PKT_Q_ACTIVE),
- msecs_to_jiffies(
- SDMA_IOWAIT_TIMEOUT));
+ /*
+ * It is possible that the SDMA engine would have processed all the
+ * submitted packets by the time we get here. Therefore, only set
+ * packet queue state to ACTIVE if there are still uncompleted
+ * requests.
+ */
+ if (atomic_read(&pq->n_reqs))
+ xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+
+ /*
+ * This is a somewhat blocking send implementation.
+ * The driver will block the caller until all packets of the
+ * request have been submitted to the SDMA engine. However, it
+ * will not wait for send completions.
+ */
+ while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+ ret = user_sdma_send_pkts(req, pcount);
+ if (ret < 0) {
+ if (ret != -EBUSY) {
+ req->status = ret;
+ set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ return ret;
}
+ wait_event_interruptible_timeout(
+ pq->busy.wait_dma,
+ (pq->state == SDMA_PKT_Q_ACTIVE),
+ msecs_to_jiffies(
+ SDMA_IOWAIT_TIMEOUT));
}
}
*count += idx;
return 0;
free_req:
- user_sdma_free_request(req);
+ user_sdma_free_request(req, true);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
return ret;
}
iovec->pages[pageidx],
offset, len);
if (ret) {
- int i;
-
SDMA_DBG(req, "SDMA txreq add page failed %d\n",
ret);
- /* Mark all assigned vectors as complete so they
- * are unpinned in the callback. */
- for (i = tx->idx; i >= 0; i--) {
- tx->iovecs[i].flags |=
- TXREQ_FLAGS_IOVEC_LAST_PKT;
- }
goto free_txreq;
}
iov_offset += len;
return -ENOMEM;
}
- /*
- * Get a reference to the process's mm so we can use it when
- * unpinning the io vectors.
- */
- req->pq->user_mm = get_task_mm(current);
-
pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base,
npages, 0, iovec->pages);
iovec->npages = pinned;
if (pinned != npages) {
SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages);
- unpin_vector_pages(req, iovec);
+ unpin_vector_pages(iovec);
return -EFAULT;
}
+ /*
+ * Get a reference to the process's mm so we can use it when
+ * unpinning the io vectors.
+ */
return 0;
}
-static void unpin_vector_pages(struct user_sdma_request *req,
- struct user_sdma_iovec *iovec)
+static void unpin_vector_pages(struct user_sdma_iovec *iovec)
{
- /*
- * Unpinning is done through the workqueue so use the
- * process's mm if we have a reference to it.
- */
- if ((current->flags & PF_KTHREAD) && req->pq->user_mm)
- use_mm(req->pq->user_mm);
-
hfi1_release_user_pages(iovec->pages, iovec->npages, 0);
- /*
- * Unuse the user's mm (see above) and release the
- * reference to it.
- */
- if (req->pq->user_mm) {
- if (current->flags & PF_KTHREAD)
- unuse_mm(req->pq->user_mm);
- mmput(req->pq->user_mm);
- }
-
kfree(iovec->pages);
iovec->pages = NULL;
iovec->npages = 0;
struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq);
struct user_sdma_request *req;
- bool defer;
+ struct hfi1_user_sdma_pkt_q *pq;
+ struct hfi1_user_sdma_comp_q *cq;
+ u16 idx;
int i;
if (!tx->req)
return;
req = tx->req;
- /*
- * If this is the callback for the last packet of the request,
- * queue up the request for clean up.
- */
- defer = (tx->seqnum == req->info.npkts - 1);
+ pq = req->pq;
+ cq = req->cq;
/*
* If we have any io vectors associated with this txreq,
*/
for (i = tx->idx; i >= 0; i--) {
if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) {
- defer = true;
- break;
+ spin_lock(&pq->iovec_lock);
+ list_add_tail(&tx->iovecs[i].vec->list,
+ &pq->iovec_list);
+ spin_unlock(&pq->iovec_lock);
}
}
- req->status = status;
if (status != SDMA_TXREQ_S_OK) {
SDMA_DBG(req, "SDMA completion with error %d",
status);
set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
- defer = true;
}
- /*
- * Defer the clean up of the iovectors and the request until later
- * so it can be done outside of interrupt context.
- */
- if (defer) {
- spin_lock(&req->txcmp_lock);
- list_add_tail(&tx->list, &req->txcmp);
- spin_unlock(&req->txcmp_lock);
- schedule_work(&req->worker);
+ req->seqcomp = tx->seqnum;
+ kmem_cache_free(pq->txreq_cache, tx);
+ tx = NULL;
+
+ idx = req->info.comp_idx;
+ if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
+ if (req->seqcomp == req->info.npkts - 1) {
+ req->status = 0;
+ user_sdma_free_request(req, false);
+ pq_update(pq);
+ set_comp_state(pq, cq, idx, COMPLETE, 0);
+ }
} else {
- kmem_cache_free(req->pq->txreq_cache, tx);
+ if (status != SDMA_TXREQ_S_OK)
+ req->status = status;
+ if (req->seqcomp == ACCESS_ONCE(req->seqnum) &&
+ test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) {
+ user_sdma_free_request(req, false);
+ pq_update(pq);
+ set_comp_state(pq, cq, idx, ERROR, req->status);
+ }
}
}
-static void user_sdma_delayed_completion(struct work_struct *work)
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
{
- struct user_sdma_request *req =
- container_of(work, struct user_sdma_request, worker);
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
- struct user_sdma_txreq *tx = NULL;
- unsigned long flags;
- u64 seqnum;
- int i;
-
- while (1) {
- spin_lock_irqsave(&req->txcmp_lock, flags);
- if (!list_empty(&req->txcmp)) {
- tx = list_first_entry(&req->txcmp,
- struct user_sdma_txreq, list);
- list_del(&tx->list);
- }
- spin_unlock_irqrestore(&req->txcmp_lock, flags);
- if (!tx)
- break;
-
- for (i = tx->idx; i >= 0; i--)
- if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT)
- unpin_vector_pages(req, tx->iovecs[i].vec);
-
- seqnum = tx->seqnum;
- kmem_cache_free(pq->txreq_cache, tx);
- tx = NULL;
-
- if (req->status != SDMA_TXREQ_S_OK) {
- if (seqnum == ACCESS_ONCE(req->seqnum) &&
- test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) {
- atomic_dec(&pq->n_reqs);
- set_comp_state(req, ERROR, req->status);
- user_sdma_free_request(req);
- break;
- }
- } else {
- if (seqnum == req->info.npkts - 1) {
- atomic_dec(&pq->n_reqs);
- set_comp_state(req, COMPLETE, 0);
- user_sdma_free_request(req);
- break;
- }
- }
- }
-
- if (!atomic_read(&pq->n_reqs)) {
+ if (atomic_dec_and_test(&pq->n_reqs)) {
xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
wake_up(&pq->wait);
}
}
-static void user_sdma_free_request(struct user_sdma_request *req)
+static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
{
if (!list_empty(&req->txps)) {
struct sdma_txreq *t, *p;
kmem_cache_free(req->pq->txreq_cache, tx);
}
}
- if (req->data_iovs) {
+ if (req->data_iovs && unpin) {
int i;
for (i = 0; i < req->data_iovs; i++)
if (req->iovs[i].npages && req->iovs[i].pages)
- unpin_vector_pages(req, &req->iovs[i]);
+ unpin_vector_pages(&req->iovs[i]);
}
kfree(req->tids);
clear_bit(SDMA_REQ_IN_USE, &req->flags);
}
-static inline void set_comp_state(struct user_sdma_request *req,
- enum hfi1_sdma_comp_state state,
- int ret)
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+ struct hfi1_user_sdma_comp_q *cq,
+ u16 idx, enum hfi1_sdma_comp_state state,
+ int ret)
{
- SDMA_DBG(req, "Setting completion status %u %d", state, ret);
- req->cq->comps[req->info.comp_idx].status = state;
+ hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
+ pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
+ cq->comps[idx].status = state;
if (state == ERROR)
- req->cq->comps[req->info.comp_idx].errcode = -ret;
- trace_hfi1_sdma_user_completion(req->pq->dd, req->pq->ctxt,
- req->pq->subctxt, req->info.comp_idx,
- state, ret);
+ cq->comps[idx].errcode = -ret;
+ trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
+ idx, state, ret);
}