Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 | /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* * Copyright(c) 2018 Intel Corporation. * */ #ifndef HFI1_TID_RDMA_H #define HFI1_TID_RDMA_H #include <linux/circ_buf.h> #include "common.h" /* Add a convenience helper */ #define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1)) #define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) #define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) #define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT) /* * Bit definitions for priv->s_flags. * These bit flags overload the bit flags defined for the QP's s_flags. * Due to the fact that these bit fields are used only for the QP priv * s_flags, there are no collisions. * * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock * HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock */ #define HFI1_S_TID_BUSY_SET BIT(0) /* BIT(1) reserved for RVT_S_BUSY. */ #define HFI1_R_TID_RSC_TIMER BIT(2) /* BIT(3) reserved for RVT_S_RESP_PENDING. */ /* BIT(4) reserved for RVT_S_ACK_PENDING. */ #define HFI1_S_TID_WAIT_INTERLCK BIT(5) #define HFI1_R_TID_WAIT_INTERLCK BIT(6) /* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */ /* BIT(16) reserved for RVT_S_SEND_ONE */ #define HFI1_S_TID_RETRY_TIMER BIT(17) /* BIT(18) reserved for RVT_S_ECN. */ #define HFI1_R_TID_SW_PSN BIT(19) /* BIT(26) reserved for HFI1_S_WAIT_HALT */ /* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */ /* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */ /* * Unlike regular IB RDMA VERBS, which do not require an entry * in the s_ack_queue, TID RDMA WRITE requests do because they * generate responses. * Therefore, the s_ack_queue needs to be extended by a certain * amount. The key point is that the queue needs to be extended * without letting the "user" know so they user doesn't end up * using these extra entries. */ #define HFI1_TID_RDMA_WRITE_CNT 8 struct tid_rdma_params { struct rcu_head rcu_head; u32 qp; u32 max_len; u16 jkey; u8 max_read; u8 max_write; u8 timeout; u8 urg; u8 version; }; struct tid_rdma_qp_params { struct work_struct trigger_work; struct tid_rdma_params local; struct tid_rdma_params __rcu *remote; }; /* Track state for each hardware flow */ struct tid_flow_state { u32 generation; u32 psn; u32 r_next_psn; /* next PSN to be received (in TID space) */ u8 index; u8 last_index; u8 flags; }; enum tid_rdma_req_state { TID_REQUEST_INACTIVE = 0, TID_REQUEST_INIT, TID_REQUEST_INIT_RESEND, TID_REQUEST_ACTIVE, TID_REQUEST_RESEND, TID_REQUEST_RESEND_ACTIVE, TID_REQUEST_QUEUED, TID_REQUEST_SYNC, TID_REQUEST_RNR_NAK, TID_REQUEST_COMPLETE, }; struct tid_rdma_request { struct rvt_qp *qp; struct hfi1_ctxtdata *rcd; union { struct rvt_swqe *swqe; struct rvt_ack_entry *ack; } e; struct tid_rdma_flow *flows; /* array of tid flows */ struct rvt_sge_state ss; /* SGE state for TID RDMA requests */ u16 n_flows; /* size of the flow buffer window */ u16 setup_head; /* flow index we are setting up */ u16 clear_tail; /* flow index we are clearing */ u16 flow_idx; /* flow index most recently set up */ u16 acked_tail; u32 seg_len; u32 total_len; u32 r_ack_psn; /* next expected ack PSN */ u32 r_flow_psn; /* IB PSN of next segment start */ u32 r_last_acked; /* IB PSN of last ACK'ed packet */ u32 s_next_psn; /* IB PSN of next segment start for read */ u32 total_segs; /* segments required to complete a request */ u32 cur_seg; /* index of current segment */ u32 comp_seg; /* index of last completed segment */ u32 ack_seg; /* index of last ack'ed segment */ u32 alloc_seg; /* index of next segment to be allocated */ u32 isge; /* index of "current" sge */ u32 ack_pending; /* num acks pending for this request */ enum tid_rdma_req_state state; }; /* * When header suppression is used, PSNs associated with a "flow" are * relevant (and not the PSNs maintained by verbs). Track per-flow * PSNs here for a TID RDMA segment. * */ struct flow_state { u32 flags; u32 resp_ib_psn; /* The IB PSN of the response for this flow */ u32 generation; /* generation of flow */ u32 spsn; /* starting PSN in TID space */ u32 lpsn; /* last PSN in TID space */ u32 r_next_psn; /* next PSN to be received (in TID space) */ /* For tid rdma read */ u32 ib_spsn; /* starting PSN in Verbs space */ u32 ib_lpsn; /* last PSn in Verbs space */ }; struct tid_rdma_pageset { dma_addr_t addr : 48; /* Only needed for the first page */ u8 idx: 8; u8 count : 7; u8 mapped: 1; }; /** * kern_tid_node - used for managing TID's in TID groups * * @grp_idx: rcd relative index to tid_group * @map: grp->map captured prior to programming this TID group in HW * @cnt: Only @cnt of available group entries are actually programmed */ struct kern_tid_node { struct tid_group *grp; u8 map; u8 cnt; }; /* Overall info for a TID RDMA segment */ struct tid_rdma_flow { /* * While a TID RDMA segment is being transferred, it uses a QP number * from the "KDETH section of QP numbers" (which is different from the * QP number that originated the request). Bits 11-15 of these QP * numbers identify the "TID flow" for the segment. */ struct flow_state flow_state; struct tid_rdma_request *req; u32 tid_qpn; u32 tid_offset; u32 length; u32 sent; u8 tnode_cnt; u8 tidcnt; u8 tid_idx; u8 idx; u8 npagesets; u8 npkts; u8 pkt; u8 resync_npkts; struct kern_tid_node tnode[TID_RDMA_MAX_PAGES]; struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES]; u32 tid_entry[TID_RDMA_MAX_PAGES]; }; enum tid_rnr_nak_state { TID_RNR_NAK_INIT = 0, TID_RNR_NAK_SEND, TID_RNR_NAK_SENT, }; bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data); bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data); bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data); void tid_rdma_conn_error(struct rvt_qp *qp); void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p); int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit); int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req, struct rvt_sge_state *ss, bool *last); int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req); void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req); void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe); /** * trdma_clean_swqe - clean flows for swqe if large send queue * @qp: the qp * @wqe: the send wqe */ static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe) { if (!wqe->priv) return; __trdma_clean_swqe(qp, wqe); } void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp); int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_init_attr *init_attr); void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp); int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp); void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp); void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd); struct cntr_entry; u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data); u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, u32 *bth2, u32 *len); u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, u32 *bth2, u32 *len); void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet); u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, struct ib_other_headers *ohdr, u32 *bth0, u32 *bth1, u32 *bth2, u32 *len, bool *last); void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet); bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, struct hfi1_packet *packet); void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 *bth2); void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp); bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe); void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe) { if (wqe->priv && (wqe->wr.opcode == IB_WR_RDMA_READ || wqe->wr.opcode == IB_WR_RDMA_WRITE) && wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE) setup_tid_rdma_wqe(qp, wqe); } u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, u32 *bth2, u32 *len); void hfi1_compute_tid_rdma_flow_wt(void); void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet); u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, struct ib_other_headers *ohdr, u32 *bth1, u32 bth2, u32 *len, struct rvt_sge_state **ss); void hfi1_del_tid_reap_timer(struct rvt_qp *qp); void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet); bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, u32 *bth2, u32 *len); void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet); u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e, struct ib_other_headers *ohdr, u16 iflow, u32 *bth1, u32 *bth2); void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet); void hfi1_add_tid_retry_timer(struct rvt_qp *qp); void hfi1_del_tid_retry_timer(struct rvt_qp *qp); u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, u32 *bth2, u16 fidx); void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet); struct hfi1_pkt_state; int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps); void _hfi1_do_tid_send(struct work_struct *work); bool hfi1_schedule_tid_send(struct rvt_qp *qp); bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e); #endif /* HFI1_TID_RDMA_H */ |