xref: /linux/include/rdma/rdma_vt.h (revision d4b996f9ef1fe83d9ce9ad5c1ca0bd8231638ce5)
1 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
2 /*
3  * Copyright(c) 2016 - 2019 Intel Corporation.
4  */
5 
6 #ifndef DEF_RDMA_VT_H
7 #define DEF_RDMA_VT_H
8 
9 /*
10  * Structure that low level drivers will populate in order to register with the
11  * rdmavt layer.
12  */
13 
14 #include <linux/spinlock.h>
15 #include <linux/list.h>
16 #include <linux/hash.h>
17 #include <rdma/ib_verbs.h>
18 #include <rdma/ib_mad.h>
19 #include <rdma/rdmavt_mr.h>
20 
21 #define RVT_MAX_PKEY_VALUES 16
22 
23 #define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */
24 #define RVT_MAX_TRAP_LISTS 5 /*((IB_NOTICE_TYPE_INFO & 0x0F) + 1)*/
25 #define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */
26 
27 struct trap_list {
28 	u32 list_len;
29 	struct list_head list;
30 };
31 
32 struct rvt_qp;
33 struct rvt_qpn_table;
34 struct rvt_ibport {
35 	struct rvt_qp __rcu *qp[2];
36 	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
37 	struct rb_root mcast_tree;
38 	spinlock_t lock;		/* protect changes in this struct */
39 
40 	/* non-zero when timer is set */
41 	unsigned long mkey_lease_timeout;
42 	unsigned long trap_timeout;
43 	__be64 gid_prefix;      /* in network order */
44 	__be64 mkey;
45 	u64 tid;
46 	u32 port_cap_flags;
47 	u16 port_cap3_flags;
48 	u32 pma_sample_start;
49 	u32 pma_sample_interval;
50 	__be16 pma_counter_select[5];
51 	u16 pma_tag;
52 	u16 mkey_lease_period;
53 	u32 sm_lid;
54 	u8 sm_sl;
55 	u8 mkeyprot;
56 	u8 subnet_timeout;
57 	u8 vl_high_limit;
58 
59 	/*
60 	 * Driver is expected to keep these up to date. These
61 	 * counters are informational only and not required to be
62 	 * completely accurate.
63 	 */
64 	u64 n_rc_resends;
65 	u64 n_seq_naks;
66 	u64 n_rdma_seq;
67 	u64 n_rnr_naks;
68 	u64 n_other_naks;
69 	u64 n_loop_pkts;
70 	u64 n_pkt_drops;
71 	u64 n_vl15_dropped;
72 	u64 n_rc_timeouts;
73 	u64 n_dmawait;
74 	u64 n_unaligned;
75 	u64 n_rc_dupreq;
76 	u64 n_rc_seqnak;
77 	u64 n_rc_crwaits;
78 	u16 pkey_violations;
79 	u16 qkey_violations;
80 	u16 mkey_violations;
81 
82 	/* Hot-path per CPU counters to avoid cacheline trading to update */
83 	u64 z_rc_acks;
84 	u64 z_rc_qacks;
85 	u64 z_rc_delayed_comp;
86 	u64 __percpu *rc_acks;
87 	u64 __percpu *rc_qacks;
88 	u64 __percpu *rc_delayed_comp;
89 
90 	void *priv; /* driver private data */
91 
92 	/*
93 	 * The pkey table is allocated and maintained by the driver. Drivers
94 	 * need to have access to this before registering with rdmav. However
95 	 * rdmavt will need access to it so drivers need to provide this during
96 	 * the attach port API call.
97 	 */
98 	u16 *pkey_table;
99 
100 	struct rvt_ah *sm_ah;
101 
102 	/*
103 	 * Keep a list of traps that have not been repressed.  They will be
104 	 * resent based on trap_timer.
105 	 */
106 	struct trap_list trap_lists[RVT_MAX_TRAP_LISTS];
107 	struct timer_list trap_timer;
108 };
109 
110 #define RVT_CQN_MAX 16 /* maximum length of cq name */
111 
112 #define RVT_SGE_COPY_MEMCPY	0
113 #define RVT_SGE_COPY_CACHELESS	1
114 #define RVT_SGE_COPY_ADAPTIVE	2
115 
116 /*
117  * Things that are driver specific, module parameters in hfi1 and qib
118  */
119 struct rvt_driver_params {
120 	struct ib_device_attr props;
121 
122 	/*
123 	 * Anything driver specific that is not covered by props
124 	 * For instance special module parameters. Goes here.
125 	 */
126 	unsigned int lkey_table_size;
127 	unsigned int qp_table_size;
128 	unsigned int sge_copy_mode;
129 	unsigned int wss_threshold;
130 	unsigned int wss_clean_period;
131 	int qpn_start;
132 	int qpn_inc;
133 	int qpn_res_start;
134 	int qpn_res_end;
135 	int nports;
136 	int npkeys;
137 	int node;
138 	int psn_mask;
139 	int psn_shift;
140 	int psn_modify_mask;
141 	u32 core_cap_flags;
142 	u32 max_mad_size;
143 	u8 qos_shift;
144 	u8 max_rdma_atomic;
145 	u8 extra_rdma_atomic;
146 	u8 reserved_operations;
147 };
148 
149 /* User context */
150 struct rvt_ucontext {
151 	struct ib_ucontext ibucontext;
152 };
153 
154 /* Protection domain */
155 struct rvt_pd {
156 	struct ib_pd ibpd;
157 	bool user;
158 };
159 
160 /* Address handle */
161 struct rvt_ah {
162 	struct ib_ah ibah;
163 	struct rdma_ah_attr attr;
164 	u8 vl;
165 	u8 log_pmtu;
166 };
167 
168 /*
169  * This structure is used by rvt_mmap() to validate an offset
170  * when an mmap() request is made.  The vm_area_struct then uses
171  * this as its vm_private_data.
172  */
173 struct rvt_mmap_info {
174 	struct list_head pending_mmaps;
175 	struct ib_ucontext *context;
176 	void *obj;
177 	__u64 offset;
178 	struct kref ref;
179 	u32 size;
180 };
181 
182 /* memory working set size */
183 struct rvt_wss {
184 	unsigned long *entries;
185 	atomic_t total_count;
186 	atomic_t clean_counter;
187 	atomic_t clean_entry;
188 
189 	int threshold;
190 	int num_entries;
191 	long pages_mask;
192 	unsigned int clean_period;
193 };
194 
195 struct rvt_dev_info;
196 struct rvt_swqe;
197 struct rvt_driver_provided {
198 	/*
199 	 * Which functions are required depends on which verbs rdmavt is
200 	 * providing and which verbs the driver is overriding. See
201 	 * check_support() for details.
202 	 */
203 
204 	/* hot path calldowns in a single cacheline */
205 
206 	/*
207 	 * Give the driver a notice that there is send work to do. It is up to
208 	 * the driver to generally push the packets out, this just queues the
209 	 * work with the driver. There are two variants here. The no_lock
210 	 * version requires the s_lock not to be held. The other assumes the
211 	 * s_lock is held.
212 	 */
213 	bool (*schedule_send)(struct rvt_qp *qp);
214 	bool (*schedule_send_no_lock)(struct rvt_qp *qp);
215 
216 	/*
217 	 * Driver specific work request setup and checking.
218 	 * This function is allowed to perform any setup, checks, or
219 	 * adjustments required to the SWQE in order to be usable by
220 	 * underlying protocols. This includes private data structure
221 	 * allocations.
222 	 */
223 	int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
224 			 bool *call_send);
225 
226 	/*
227 	 * Sometimes rdmavt needs to kick the driver's send progress. That is
228 	 * done by this call back.
229 	 */
230 	void (*do_send)(struct rvt_qp *qp);
231 
232 	/*
233 	 * Returns a pointer to the underlying hardware's PCI device. This is
234 	 * used to display information as to what hardware is being referenced
235 	 * in an output message
236 	 */
237 	struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
238 
239 	/*
240 	 * Allocate a private queue pair data structure for driver specific
241 	 * information which is opaque to rdmavt.  Errors are returned via
242 	 * ERR_PTR(err).  The driver is free to return NULL or a valid
243 	 * pointer.
244 	 */
245 	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
246 
247 	/*
248 	 * Init a structure allocated with qp_priv_alloc(). This should be
249 	 * called after all qp fields have been initialized in rdmavt.
250 	 */
251 	int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
252 			    struct ib_qp_init_attr *init_attr);
253 
254 	/*
255 	 * Free the driver's private qp structure.
256 	 */
257 	void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
258 
259 	/*
260 	 * Inform the driver the particular qp in question has been reset so
261 	 * that it can clean up anything it needs to.
262 	 */
263 	void (*notify_qp_reset)(struct rvt_qp *qp);
264 
265 	/*
266 	 * Get a path mtu from the driver based on qp attributes.
267 	 */
268 	int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
269 				  struct ib_qp_attr *attr);
270 
271 	/*
272 	 * Notify driver that it needs to flush any outstanding IO requests that
273 	 * are waiting on a qp.
274 	 */
275 	void (*flush_qp_waiters)(struct rvt_qp *qp);
276 
277 	/*
278 	 * Notify driver to stop its queue of sending packets. Nothing else
279 	 * should be posted to the queue pair after this has been called.
280 	 */
281 	void (*stop_send_queue)(struct rvt_qp *qp);
282 
283 	/*
284 	 * Have the driver drain any in progress operations
285 	 */
286 	void (*quiesce_qp)(struct rvt_qp *qp);
287 
288 	/*
289 	 * Inform the driver a qp has went to error state.
290 	 */
291 	void (*notify_error_qp)(struct rvt_qp *qp);
292 
293 	/*
294 	 * Get an MTU for a qp.
295 	 */
296 	u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
297 			   u32 pmtu);
298 	/*
299 	 * Convert an mtu to a path mtu
300 	 */
301 	int (*mtu_to_path_mtu)(u32 mtu);
302 
303 	/*
304 	 * Get the guid of a port in big endian byte order
305 	 */
306 	int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
307 			   int guid_index, __be64 *guid);
308 
309 	/*
310 	 * Query driver for the state of the port.
311 	 */
312 	int (*query_port_state)(struct rvt_dev_info *rdi, u32 port_num,
313 				struct ib_port_attr *props);
314 
315 	/*
316 	 * Tell driver to shutdown a port
317 	 */
318 	int (*shut_down_port)(struct rvt_dev_info *rdi, u32 port_num);
319 
320 	/* Tell driver to send a trap for changed  port capabilities */
321 	void (*cap_mask_chg)(struct rvt_dev_info *rdi, u32 port_num);
322 
323 	/*
324 	 * The following functions can be safely ignored completely. Any use of
325 	 * these is checked for NULL before blindly calling. Rdmavt should also
326 	 * be functional if drivers omit these.
327 	 */
328 
329 	/* Called to inform the driver that all qps should now be freed. */
330 	unsigned (*free_all_qps)(struct rvt_dev_info *rdi);
331 
332 	/* Driver specific AH validation */
333 	int (*check_ah)(struct ib_device *, struct rdma_ah_attr *);
334 
335 	/* Inform the driver a new AH has been created */
336 	void (*notify_new_ah)(struct ib_device *, struct rdma_ah_attr *,
337 			      struct rvt_ah *);
338 
339 	/* Let the driver pick the next queue pair number*/
340 	int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
341 			 enum ib_qp_type type, u32 port_num);
342 
343 	/* Determine if its safe or allowed to modify the qp */
344 	int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
345 			       int attr_mask, struct ib_udata *udata);
346 
347 	/* Driver specific QP modification/notification-of */
348 	void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
349 			  int attr_mask, struct ib_udata *udata);
350 
351 	/* Notify driver a mad agent has been created */
352 	void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
353 
354 	/* Notify driver a mad agent has been removed */
355 	void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
356 
357 	/* Notify driver to restart rc */
358 	void (*notify_restart_rc)(struct rvt_qp *qp, u32 psn, int wait);
359 
360 	/* Get and return CPU to pin CQ processing thread */
361 	int (*comp_vect_cpu_lookup)(struct rvt_dev_info *rdi, int comp_vect);
362 };
363 
364 struct rvt_dev_info {
365 	struct ib_device ibdev; /* Keep this first. Nothing above here */
366 
367 	/*
368 	 * Prior to calling for registration the driver will be responsible for
369 	 * allocating space for this structure.
370 	 *
371 	 * The driver will also be responsible for filling in certain members of
372 	 * dparms.props. The driver needs to fill in dparms exactly as it would
373 	 * want values reported to a ULP. This will be returned to the caller
374 	 * in rdmavt's device. The driver should also therefore refrain from
375 	 * modifying this directly after registration with rdmavt.
376 	 */
377 
378 	/* Driver specific properties */
379 	struct rvt_driver_params dparms;
380 
381 	/* post send table */
382 	const struct rvt_operation_params *post_parms;
383 
384 	/* opcode translation table */
385 	const enum ib_wc_opcode *wc_opcode;
386 
387 	/* Driver specific helper functions */
388 	struct rvt_driver_provided driver_f;
389 
390 	struct rvt_mregion __rcu *dma_mr;
391 	struct rvt_lkey_table lkey_table;
392 
393 	/* Internal use */
394 	int n_pds_allocated;
395 	spinlock_t n_pds_lock; /* Protect pd allocated count */
396 
397 	int n_ahs_allocated;
398 	spinlock_t n_ahs_lock; /* Protect ah allocated count */
399 
400 	u32 n_srqs_allocated;
401 	spinlock_t n_srqs_lock; /* Protect srqs allocated count */
402 
403 	int flags;
404 	struct rvt_ibport **ports;
405 
406 	/* QP */
407 	struct rvt_qp_ibdev *qp_dev;
408 	u32 n_qps_allocated;    /* number of QPs allocated for device */
409 	u32 n_rc_qps;		/* number of RC QPs allocated for device */
410 	u32 busy_jiffies;	/* timeout scaling based on RC QP count */
411 	spinlock_t n_qps_lock;	/* protect qps, rc qps and busy jiffy counts */
412 
413 	/* memory maps */
414 	struct list_head pending_mmaps;
415 	spinlock_t mmap_offset_lock; /* protect mmap_offset */
416 	u32 mmap_offset;
417 	spinlock_t pending_lock; /* protect pending mmap list */
418 
419 	/* CQ */
420 	u32 n_cqs_allocated;    /* number of CQs allocated for device */
421 	spinlock_t n_cqs_lock; /* protect count of in use cqs */
422 
423 	/* Multicast */
424 	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
425 	spinlock_t n_mcast_grps_lock;
426 
427 	/* Memory Working Set Size */
428 	struct rvt_wss *wss;
429 };
430 
431 /**
432  * rvt_set_ibdev_name - Craft an IB device name from client info
433  * @rdi: pointer to the client rvt_dev_info structure
434  * @name: client specific name
435  * @unit: client specific unit number.
436  */
437 static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
438 				      const char *fmt, const char *name,
439 				      const int unit)
440 {
441 	/*
442 	 * FIXME: rvt and its users want to touch the ibdev before
443 	 * registration and have things like the name work. We don't have the
444 	 * infrastructure in the core to support this directly today, hack it
445 	 * to work by setting the name manually here.
446 	 */
447 	dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
448 	strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
449 }
450 
451 /**
452  * rvt_get_ibdev_name - return the IB name
453  * @rdi: rdmavt device
454  *
455  * Return the registered name of the device.
456  */
457 static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
458 {
459 	return dev_name(&rdi->ibdev.dev);
460 }
461 
462 static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
463 {
464 	return container_of(ibpd, struct rvt_pd, ibpd);
465 }
466 
467 static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah)
468 {
469 	return container_of(ibah, struct rvt_ah, ibah);
470 }
471 
472 static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev)
473 {
474 	return  container_of(ibdev, struct rvt_dev_info, ibdev);
475 }
476 
477 static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
478 {
479 	/*
480 	 * All ports have same number of pkeys.
481 	 */
482 	return rdi->dparms.npkeys;
483 }
484 
485 /*
486  * Return the max atomic suitable for determining
487  * the size of the ack ring buffer in a QP.
488  */
489 static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
490 {
491 	return rdi->dparms.max_rdma_atomic +
492 		rdi->dparms.extra_rdma_atomic + 1;
493 }
494 
495 static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi)
496 {
497 	return rdi->dparms.max_rdma_atomic +
498 		rdi->dparms.extra_rdma_atomic;
499 }
500 
501 /*
502  * Return the indexed PKEY from the port PKEY table.
503  */
504 static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
505 			       int port_index,
506 			       unsigned index)
507 {
508 	if (index >= rvt_get_npkeys(rdi))
509 		return 0;
510 	else
511 		return rdi->ports[port_index]->pkey_table[index];
512 }
513 
514 struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
515 void rvt_dealloc_device(struct rvt_dev_info *rdi);
516 int rvt_register_device(struct rvt_dev_info *rvd);
517 void rvt_unregister_device(struct rvt_dev_info *rvd);
518 int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
519 int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
520 		  int port_index, u16 *pkey_table);
521 int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
522 		    int access);
523 int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey);
524 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
525 		u32 len, u64 vaddr, u32 rkey, int acc);
526 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
527 		struct rvt_sge *isge, struct rvt_sge *last_sge,
528 		struct ib_sge *sge, int acc);
529 struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
530 				 u16 lid);
531 
532 #endif          /* DEF_RDMA_VT_H */
533