223 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			223 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
 | |
| #include <linux/init.h>
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/module.h>
 | |
| #include <linux/pci.h>
 | |
| #include <uapi/linux/idxd.h>
 | |
| #include "idxd.h"
 | |
| #include "registers.h"
 | |
| 
 | |
| static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
 | |
| {
 | |
| 	struct idxd_desc *desc;
 | |
| 	struct idxd_device *idxd = wq->idxd;
 | |
| 
 | |
| 	desc = wq->descs[idx];
 | |
| 	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
 | |
| 	memset(desc->completion, 0, idxd->data->compl_size);
 | |
| 	desc->cpu = cpu;
 | |
| 
 | |
| 	if (device_pasid_enabled(idxd))
 | |
| 		desc->hw->pasid = idxd->pasid;
 | |
| 
 | |
| 	return desc;
 | |
| }
 | |
| 
 | |
| struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
 | |
| {
 | |
| 	int cpu, idx;
 | |
| 	struct idxd_device *idxd = wq->idxd;
 | |
| 	DEFINE_SBQ_WAIT(wait);
 | |
| 	struct sbq_wait_state *ws;
 | |
| 	struct sbitmap_queue *sbq;
 | |
| 
 | |
| 	if (idxd->state != IDXD_DEV_ENABLED)
 | |
| 		return ERR_PTR(-EIO);
 | |
| 
 | |
| 	sbq = &wq->sbq;
 | |
| 	idx = sbitmap_queue_get(sbq, &cpu);
 | |
| 	if (idx < 0) {
 | |
| 		if (optype == IDXD_OP_NONBLOCK)
 | |
| 			return ERR_PTR(-EAGAIN);
 | |
| 	} else {
 | |
| 		return __get_desc(wq, idx, cpu);
 | |
| 	}
 | |
| 
 | |
| 	ws = &sbq->ws[0];
 | |
| 	for (;;) {
 | |
| 		sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
 | |
| 		if (signal_pending_state(TASK_INTERRUPTIBLE, current))
 | |
| 			break;
 | |
| 		idx = sbitmap_queue_get(sbq, &cpu);
 | |
| 		if (idx >= 0)
 | |
| 			break;
 | |
| 		schedule();
 | |
| 	}
 | |
| 
 | |
| 	sbitmap_finish_wait(sbq, ws, &wait);
 | |
| 	if (idx < 0)
 | |
| 		return ERR_PTR(-EAGAIN);
 | |
| 
 | |
| 	return __get_desc(wq, idx, cpu);
 | |
| }
 | |
| EXPORT_SYMBOL_NS_GPL(idxd_alloc_desc, IDXD);
 | |
| 
 | |
| void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 | |
| {
 | |
| 	int cpu = desc->cpu;
 | |
| 
 | |
| 	desc->cpu = -1;
 | |
| 	sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
 | |
| }
 | |
| EXPORT_SYMBOL_NS_GPL(idxd_free_desc, IDXD);
 | |
| 
 | |
| static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
 | |
| 					 struct idxd_desc *desc)
 | |
| {
 | |
| 	struct idxd_desc *d, *n;
 | |
| 
 | |
| 	lockdep_assert_held(&ie->list_lock);
 | |
| 	list_for_each_entry_safe(d, n, &ie->work_list, list) {
 | |
| 		if (d == desc) {
 | |
| 			list_del(&d->list);
 | |
| 			return d;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * At this point, the desc needs to be aborted is held by the completion
 | |
| 	 * handler where it has taken it off the pending list but has not added to the
 | |
| 	 * work list. It will be cleaned up by the interrupt handler when it sees the
 | |
| 	 * IDXD_COMP_DESC_ABORT for completion status.
 | |
| 	 */
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
 | |
| 			     struct idxd_desc *desc)
 | |
| {
 | |
| 	struct idxd_desc *d, *t, *found = NULL;
 | |
| 	struct llist_node *head;
 | |
| 	LIST_HEAD(flist);
 | |
| 
 | |
| 	desc->completion->status = IDXD_COMP_DESC_ABORT;
 | |
| 	/*
 | |
| 	 * Grab the list lock so it will block the irq thread handler. This allows the
 | |
| 	 * abort code to locate the descriptor need to be aborted.
 | |
| 	 */
 | |
| 	spin_lock(&ie->list_lock);
 | |
| 	head = llist_del_all(&ie->pending_llist);
 | |
| 	if (head) {
 | |
| 		llist_for_each_entry_safe(d, t, head, llnode) {
 | |
| 			if (d == desc) {
 | |
| 				found = desc;
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			if (d->completion->status)
 | |
| 				list_add_tail(&d->list, &flist);
 | |
| 			else
 | |
| 				list_add_tail(&d->list, &ie->work_list);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (!found)
 | |
| 		found = list_abort_desc(wq, ie, desc);
 | |
| 	spin_unlock(&ie->list_lock);
 | |
| 
 | |
| 	if (found)
 | |
| 		idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false,
 | |
| 				      NULL, NULL);
 | |
| 
 | |
| 	/*
 | |
| 	 * completing the descriptor will return desc to allocator and
 | |
| 	 * the desc can be acquired by a different process and the
 | |
| 	 * desc->list can be modified.  Delete desc from list so the
 | |
| 	 * list traversing does not get corrupted by the other process.
 | |
| 	 */
 | |
| 	list_for_each_entry_safe(d, t, &flist, list) {
 | |
| 		list_del_init(&d->list);
 | |
| 		idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true,
 | |
| 				      NULL, NULL);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
 | |
|  * has better control of number of descriptors being submitted to a shared wq by limiting
 | |
|  * the number of driver allocated descriptors to the wq size. However, when the swq is
 | |
|  * exported to a guest kernel, it may be shared with multiple guest kernels. This means
 | |
|  * the likelihood of getting busy returned on the swq when submitting goes significantly up.
 | |
|  * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
 | |
|  * up. The sysfs knob can be tuned by the system administrator.
 | |
|  */
 | |
| int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
 | |
| {
 | |
| 	unsigned int retries = wq->enqcmds_retries;
 | |
| 	int rc;
 | |
| 
 | |
| 	do {
 | |
| 		rc = enqcmds(portal, desc);
 | |
| 		if (rc == 0)
 | |
| 			break;
 | |
| 		cpu_relax();
 | |
| 	} while (retries--);
 | |
| 
 | |
| 	return rc;
 | |
| }
 | |
| 
 | |
| int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 | |
| {
 | |
| 	struct idxd_device *idxd = wq->idxd;
 | |
| 	struct idxd_irq_entry *ie = NULL;
 | |
| 	u32 desc_flags = desc->hw->flags;
 | |
| 	void __iomem *portal;
 | |
| 	int rc;
 | |
| 
 | |
| 	if (idxd->state != IDXD_DEV_ENABLED)
 | |
| 		return -EIO;
 | |
| 
 | |
| 	if (!percpu_ref_tryget_live(&wq->wq_active)) {
 | |
| 		wait_for_completion(&wq->wq_resurrect);
 | |
| 		if (!percpu_ref_tryget_live(&wq->wq_active))
 | |
| 			return -ENXIO;
 | |
| 	}
 | |
| 
 | |
| 	portal = idxd_wq_portal_addr(wq);
 | |
| 
 | |
| 	/*
 | |
| 	 * Pending the descriptor to the lockless list for the irq_entry
 | |
| 	 * that we designated the descriptor to.
 | |
| 	 */
 | |
| 	if (desc_flags & IDXD_OP_FLAG_RCI) {
 | |
| 		ie = &wq->ie;
 | |
| 		desc->hw->int_handle = ie->int_handle;
 | |
| 		llist_add(&desc->llnode, &ie->pending_llist);
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * The wmb() flushes writes to coherent DMA data before
 | |
| 	 * possibly triggering a DMA read. The wmb() is necessary
 | |
| 	 * even on UP because the recipient is a device.
 | |
| 	 */
 | |
| 	wmb();
 | |
| 
 | |
| 	if (wq_dedicated(wq)) {
 | |
| 		iosubmit_cmds512(portal, desc->hw, 1);
 | |
| 	} else {
 | |
| 		rc = idxd_enqcmds(wq, portal, desc->hw);
 | |
| 		if (rc < 0) {
 | |
| 			percpu_ref_put(&wq->wq_active);
 | |
| 			/* abort operation frees the descriptor */
 | |
| 			if (ie)
 | |
| 				llist_abort_desc(wq, ie, desc);
 | |
| 			return rc;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	percpu_ref_put(&wq->wq_active);
 | |
| 	return 0;
 | |
| }
 | |
| EXPORT_SYMBOL_NS_GPL(idxd_submit_desc, IDXD);
 |