Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> #include <uapi/linux/idxd.h> #include "idxd.h" #include "registers.h" static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) { struct idxd_desc *desc; struct idxd_device *idxd = wq->idxd; desc = wq->descs[idx]; memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); memset(desc->completion, 0, idxd->data->compl_size); desc->cpu = cpu; if (device_pasid_enabled(idxd)) desc->hw->pasid = idxd->pasid; return desc; } struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) { int cpu, idx; struct idxd_device *idxd = wq->idxd; DEFINE_SBQ_WAIT(wait); struct sbq_wait_state *ws; struct sbitmap_queue *sbq; if (idxd->state != IDXD_DEV_ENABLED) return ERR_PTR(-EIO); sbq = &wq->sbq; idx = sbitmap_queue_get(sbq, &cpu); if (idx < 0) { if (optype == IDXD_OP_NONBLOCK) return ERR_PTR(-EAGAIN); } else { return __get_desc(wq, idx, cpu); } ws = &sbq->ws[0]; for (;;) { sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE); if (signal_pending_state(TASK_INTERRUPTIBLE, current)) break; idx = sbitmap_queue_get(sbq, &cpu); if (idx >= 0) break; schedule(); } sbitmap_finish_wait(sbq, ws, &wait); if (idx < 0) return ERR_PTR(-EAGAIN); return __get_desc(wq, idx, cpu); } void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) { int cpu = desc->cpu; desc->cpu = -1; sbitmap_queue_clear(&wq->sbq, desc->id, cpu); } static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, struct idxd_desc *desc) { struct idxd_desc *d, *n; lockdep_assert_held(&ie->list_lock); list_for_each_entry_safe(d, n, &ie->work_list, list) { if (d == desc) { list_del(&d->list); return d; } } /* * At this point, the desc needs to be aborted is held by the completion * handler where it has taken it off the pending list but has not added to the * work list. It will be cleaned up by the interrupt handler when it sees the * IDXD_COMP_DESC_ABORT for completion status. */ return NULL; } static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, struct idxd_desc *desc) { struct idxd_desc *d, *t, *found = NULL; struct llist_node *head; LIST_HEAD(flist); desc->completion->status = IDXD_COMP_DESC_ABORT; /* * Grab the list lock so it will block the irq thread handler. This allows the * abort code to locate the descriptor need to be aborted. */ spin_lock(&ie->list_lock); head = llist_del_all(&ie->pending_llist); if (head) { llist_for_each_entry_safe(d, t, head, llnode) { if (d == desc) { found = desc; continue; } if (d->completion->status) list_add_tail(&d->list, &flist); else list_add_tail(&d->list, &ie->work_list); } } if (!found) found = list_abort_desc(wq, ie, desc); spin_unlock(&ie->list_lock); if (found) idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); /* * completing the descriptor will return desc to allocator and * the desc can be acquired by a different process and the * desc->list can be modified. Delete desc from list so the * list trasversing does not get corrupted by the other process. */ list_for_each_entry_safe(d, t, &flist, list) { list_del_init(&d->list); idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true); } } /* * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver * has better control of number of descriptors being submitted to a shared wq by limiting * the number of driver allocated descriptors to the wq size. However, when the swq is * exported to a guest kernel, it may be shared with multiple guest kernels. This means * the likelihood of getting busy returned on the swq when submitting goes significantly up. * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving * up. The sysfs knob can be tuned by the system administrator. */ int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) { unsigned int retries = wq->enqcmds_retries; int rc; do { rc = enqcmds(portal, desc); if (rc == 0) break; cpu_relax(); } while (retries--); return rc; } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; struct idxd_irq_entry *ie = NULL; u32 desc_flags = desc->hw->flags; void __iomem *portal; int rc; if (idxd->state != IDXD_DEV_ENABLED) return -EIO; if (!percpu_ref_tryget_live(&wq->wq_active)) { wait_for_completion(&wq->wq_resurrect); if (!percpu_ref_tryget_live(&wq->wq_active)) return -ENXIO; } portal = idxd_wq_portal_addr(wq); /* * The wmb() flushes writes to coherent DMA data before * possibly triggering a DMA read. The wmb() is necessary * even on UP because the recipient is a device. */ wmb(); /* * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ if (desc_flags & IDXD_OP_FLAG_RCI) { ie = &wq->ie; desc->hw->int_handle = ie->int_handle; llist_add(&desc->llnode, &ie->pending_llist); } if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { rc = idxd_enqcmds(wq, portal, desc->hw); if (rc < 0) { percpu_ref_put(&wq->wq_active); /* abort operation frees the descriptor */ if (ie) llist_abort_desc(wq, ie, desc); return rc; } } percpu_ref_put(&wq->wq_active); return 0; } |