Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | // SPDX-License-Identifier: BSD-3-Clause /* * Copyright (c) 2009-2012,2016,2023 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. */ #include <errno.h> #include <fcntl.h> #include <emmintrin.h> #include <linux/limits.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/uio.h> #include <unistd.h> #include "vmbus_bufring.h" /** * Compiler barrier. * * Guarantees that operation reordering does not occur at compile time * for operations directly before and after the barrier. */ #define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); }) #define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF #define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1))))) void *vmbus_uio_map(int *fd, int size) { void *map; map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0); if (map == MAP_FAILED) return NULL; return map; } /* Increase bufring index by inc with wraparound */ static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz) { idx += inc; if (idx >= sz) idx -= sz; return idx; } void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen) { br->vbr = buf; br->windex = br->vbr->windex; br->dsize = blen - sizeof(struct vmbus_bufring); } static inline __always_inline void rte_smp_mb(void) { asm volatile("lock addl $0, -128(%%rsp); " ::: "memory"); } static inline int rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) { uint8_t res; asm volatile("lock ; " "cmpxchgl %[src], %[dst];" "sete %[res];" : [res] "=a" (res), /* output */ [dst] "=m" (*dst) : [src] "r" (src), /* input */ "a" (exp), "m" (*dst) : "memory"); /* no-clobber list */ return res; } static inline uint32_t vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex, const void *src0, uint32_t cplen) { uint8_t *br_data = tbr->vbr->data; uint32_t br_dsize = tbr->dsize; const uint8_t *src = src0; /* XXX use double mapping like Linux kernel? */ if (cplen > br_dsize - windex) { uint32_t fraglen = br_dsize - windex; /* Wrap-around detected */ memcpy(br_data + windex, src, fraglen); memcpy(br_data, src + fraglen, cplen - fraglen); } else { memcpy(br_data + windex, src, cplen); } return vmbus_br_idxinc(windex, cplen, br_dsize); } /* * Write scattered channel packet to TX bufring. * * The offset of this channel packet is written as a 64bits value * immediately after this channel packet. * * The write goes through three stages: * 1. Reserve space in ring buffer for the new data. * Writer atomically moves priv_write_index. * 2. Copy the new data into the ring. * 3. Update the tail of the ring (visible to host) that indicates * next read location. Writer updates write_index */ static int vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen) { struct vmbus_bufring *vbr = tbr->vbr; uint32_t ring_size = tbr->dsize; uint32_t old_windex, next_windex, windex, total; uint64_t save_windex; int i; total = 0; for (i = 0; i < iovlen; i++) total += iov[i].iov_len; total += sizeof(save_windex); /* Reserve space in ring */ do { uint32_t avail; /* Get current free location */ old_windex = tbr->windex; /* Prevent compiler reordering this with calculation */ rte_compiler_barrier(); avail = vmbus_br_availwrite(tbr, old_windex); /* If not enough space in ring, then tell caller. */ if (avail <= total) return -EAGAIN; next_windex = vmbus_br_idxinc(old_windex, total, ring_size); /* Atomic update of next write_index for other threads */ } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex)); /* Space from old..new is now reserved */ windex = old_windex; for (i = 0; i < iovlen; i++) windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len); /* Set the offset of the current channel packet. */ save_windex = ((uint64_t)old_windex) << 32; windex = vmbus_txbr_copyto(tbr, windex, &save_windex, sizeof(save_windex)); /* The region reserved should match region used */ if (windex != next_windex) return -EINVAL; /* Ensure that data is available before updating host index */ rte_compiler_barrier(); /* Checkin for our reservation. wait for our turn to update host */ while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex)) _mm_pause(); return 0; } int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data, uint32_t dlen, uint32_t flags) { struct vmbus_chanpkt pkt; unsigned int pktlen, pad_pktlen; const uint32_t hlen = sizeof(pkt); uint64_t pad = 0; struct iovec iov[3]; int error; pktlen = hlen + dlen; pad_pktlen = ALIGN(pktlen, sizeof(uint64_t)); pkt.hdr.type = type; pkt.hdr.flags = flags; pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT; pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT; pkt.hdr.xactid = VMBUS_RQST_ERROR; iov[0].iov_base = &pkt; iov[0].iov_len = hlen; iov[1].iov_base = data; iov[1].iov_len = dlen; iov[2].iov_base = &pad; iov[2].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(txbr, iov, 3); return error; } static inline uint32_t vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex, void *dst0, size_t cplen) { const uint8_t *br_data = rbr->vbr->data; uint32_t br_dsize = rbr->dsize; uint8_t *dst = dst0; if (cplen > br_dsize - rindex) { uint32_t fraglen = br_dsize - rindex; /* Wrap-around detected. */ memcpy(dst, br_data + rindex, fraglen); memcpy(dst + fraglen, br_data, cplen - fraglen); } else { memcpy(dst, br_data + rindex, cplen); } return vmbus_br_idxinc(rindex, cplen, br_dsize); } /* Copy data from receive ring but don't change index */ static int vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen) { uint32_t avail; /* * The requested data and the 64bits channel packet * offset should be there at least. */ avail = vmbus_br_availread(rbr); if (avail < dlen + sizeof(uint64_t)) return -EAGAIN; vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen); return 0; } /* * Copy data from receive ring and change index * NOTE: * We assume (dlen + skip) == sizeof(channel packet). */ static int vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip) { struct vmbus_bufring *vbr = rbr->vbr; uint32_t br_dsize = rbr->dsize; uint32_t rindex; if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t)) return -EAGAIN; /* Record where host was when we started read (for debug) */ rbr->windex = rbr->vbr->windex; /* * Copy channel packet from RX bufring. */ rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize); rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen); /* * Discard this channel packet's 64bits offset, which is useless to us. */ rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize); /* Update the read index _after_ the channel packet is fetched. */ rte_compiler_barrier(); vbr->rindex = rindex; return 0; } int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr, void *data, uint32_t *len) { struct vmbus_chanpkt_hdr pkt; uint32_t dlen, bufferlen = *len; int error; error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt)); if (error) return error; if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) /* XXX this channel is dead actually. */ return -EIO; if (unlikely(pkt.hlen > pkt.tlen)) return -EIO; /* Length are in quad words */ dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT; *len = dlen; /* If caller buffer is not large enough */ if (unlikely(dlen > bufferlen)) return -ENOBUFS; /* Read data and skip packet header */ error = vmbus_rxbr_read(rxbr, data, dlen, 0); if (error) return error; /* Return the number of bytes read */ return dlen + sizeof(uint64_t); } |