xref: /linux/drivers/vfio/pci/mlx5/cmd.c (revision 8f426582e0e0c9bbd58e170e1b209334eb5df79e)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
4  */
5 
6 #include "cmd.h"
7 
8 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
9 				  u16 *vhca_id);
10 
11 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
12 {
13 	u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
14 	u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
15 
16 	lockdep_assert_held(&mvdev->state_mutex);
17 	if (mvdev->mdev_detach)
18 		return -ENOTCONN;
19 
20 	MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
21 	MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id);
22 	MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
23 
24 	return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out);
25 }
26 
27 int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
28 {
29 	u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
30 	u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
31 
32 	lockdep_assert_held(&mvdev->state_mutex);
33 	if (mvdev->mdev_detach)
34 		return -ENOTCONN;
35 
36 	MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
37 	MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id);
38 	MLX5_SET(resume_vhca_in, in, op_mod, op_mod);
39 
40 	return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out);
41 }
42 
43 int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
44 					  size_t *state_size)
45 {
46 	u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
47 	u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
48 	int ret;
49 
50 	lockdep_assert_held(&mvdev->state_mutex);
51 	if (mvdev->mdev_detach)
52 		return -ENOTCONN;
53 
54 	MLX5_SET(query_vhca_migration_state_in, in, opcode,
55 		 MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
56 	MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
57 	MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
58 
59 	ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
60 				  out);
61 	if (ret)
62 		return ret;
63 
64 	*state_size = MLX5_GET(query_vhca_migration_state_out, out,
65 			       required_umem_size);
66 	return 0;
67 }
68 
69 static int mlx5fv_vf_event(struct notifier_block *nb,
70 			   unsigned long event, void *data)
71 {
72 	struct mlx5vf_pci_core_device *mvdev =
73 		container_of(nb, struct mlx5vf_pci_core_device, nb);
74 
75 	mutex_lock(&mvdev->state_mutex);
76 	switch (event) {
77 	case MLX5_PF_NOTIFY_ENABLE_VF:
78 		mvdev->mdev_detach = false;
79 		break;
80 	case MLX5_PF_NOTIFY_DISABLE_VF:
81 		mlx5vf_disable_fds(mvdev);
82 		mvdev->mdev_detach = true;
83 		break;
84 	default:
85 		break;
86 	}
87 	mlx5vf_state_mutex_unlock(mvdev);
88 	return 0;
89 }
90 
91 void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
92 {
93 	if (!mvdev->migrate_cap)
94 		return;
95 
96 	mutex_lock(&mvdev->state_mutex);
97 	mlx5vf_disable_fds(mvdev);
98 	mlx5vf_state_mutex_unlock(mvdev);
99 }
100 
101 void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev)
102 {
103 	if (!mvdev->migrate_cap)
104 		return;
105 
106 	mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id,
107 						&mvdev->nb);
108 	destroy_workqueue(mvdev->cb_wq);
109 }
110 
111 void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
112 			       const struct vfio_migration_ops *mig_ops)
113 {
114 	struct pci_dev *pdev = mvdev->core_device.pdev;
115 	int ret;
116 
117 	if (!pdev->is_virtfn)
118 		return;
119 
120 	mvdev->mdev = mlx5_vf_get_core_dev(pdev);
121 	if (!mvdev->mdev)
122 		return;
123 
124 	if (!MLX5_CAP_GEN(mvdev->mdev, migration))
125 		goto end;
126 
127 	mvdev->vf_id = pci_iov_vf_id(pdev);
128 	if (mvdev->vf_id < 0)
129 		goto end;
130 
131 	if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1,
132 				   &mvdev->vhca_id))
133 		goto end;
134 
135 	mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0);
136 	if (!mvdev->cb_wq)
137 		goto end;
138 
139 	mutex_init(&mvdev->state_mutex);
140 	spin_lock_init(&mvdev->reset_lock);
141 	mvdev->nb.notifier_call = mlx5fv_vf_event;
142 	ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id,
143 						    &mvdev->nb);
144 	if (ret) {
145 		destroy_workqueue(mvdev->cb_wq);
146 		goto end;
147 	}
148 
149 	mvdev->migrate_cap = 1;
150 	mvdev->core_device.vdev.migration_flags =
151 		VFIO_MIGRATION_STOP_COPY |
152 		VFIO_MIGRATION_P2P;
153 	mvdev->core_device.vdev.mig_ops = mig_ops;
154 
155 end:
156 	mlx5_vf_put_core_dev(mvdev->mdev);
157 }
158 
159 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
160 				  u16 *vhca_id)
161 {
162 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
163 	int out_size;
164 	void *out;
165 	int ret;
166 
167 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
168 	out = kzalloc(out_size, GFP_KERNEL);
169 	if (!out)
170 		return -ENOMEM;
171 
172 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
173 	MLX5_SET(query_hca_cap_in, in, other_function, 1);
174 	MLX5_SET(query_hca_cap_in, in, function_id, function_id);
175 	MLX5_SET(query_hca_cap_in, in, op_mod,
176 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
177 		 HCA_CAP_OPMOD_GET_CUR);
178 
179 	ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
180 	if (ret)
181 		goto err_exec;
182 
183 	*vhca_id = MLX5_GET(query_hca_cap_out, out,
184 			    capability.cmd_hca_cap.vhca_id);
185 
186 err_exec:
187 	kfree(out);
188 	return ret;
189 }
190 
191 static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
192 			      struct mlx5_vf_migration_file *migf, u32 *mkey)
193 {
194 	size_t npages = DIV_ROUND_UP(migf->total_length, PAGE_SIZE);
195 	struct sg_dma_page_iter dma_iter;
196 	int err = 0, inlen;
197 	__be64 *mtt;
198 	void *mkc;
199 	u32 *in;
200 
201 	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
202 		sizeof(*mtt) * round_up(npages, 2);
203 
204 	in = kvzalloc(inlen, GFP_KERNEL);
205 	if (!in)
206 		return -ENOMEM;
207 
208 	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
209 		 DIV_ROUND_UP(npages, 2));
210 	mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
211 
212 	for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
213 		*mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
214 
215 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
216 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
217 	MLX5_SET(mkc, mkc, lr, 1);
218 	MLX5_SET(mkc, mkc, lw, 1);
219 	MLX5_SET(mkc, mkc, rr, 1);
220 	MLX5_SET(mkc, mkc, rw, 1);
221 	MLX5_SET(mkc, mkc, pd, pdn);
222 	MLX5_SET(mkc, mkc, bsf_octword_size, 0);
223 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
224 	MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
225 	MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
226 	MLX5_SET64(mkc, mkc, len, migf->total_length);
227 	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
228 	kvfree(in);
229 	return err;
230 }
231 
232 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
233 {
234 	struct mlx5vf_async_data *async_data = container_of(_work,
235 		struct mlx5vf_async_data, work);
236 	struct mlx5_vf_migration_file *migf = container_of(async_data,
237 		struct mlx5_vf_migration_file, async_data);
238 	struct mlx5_core_dev *mdev = migf->mvdev->mdev;
239 
240 	mutex_lock(&migf->lock);
241 	if (async_data->status) {
242 		migf->is_err = true;
243 		wake_up_interruptible(&migf->poll_wait);
244 	}
245 	mutex_unlock(&migf->lock);
246 
247 	mlx5_core_destroy_mkey(mdev, async_data->mkey);
248 	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
249 	mlx5_core_dealloc_pd(mdev, async_data->pdn);
250 	kvfree(async_data->out);
251 	fput(migf->filp);
252 }
253 
254 static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
255 {
256 	struct mlx5vf_async_data *async_data = container_of(context,
257 			struct mlx5vf_async_data, cb_work);
258 	struct mlx5_vf_migration_file *migf = container_of(async_data,
259 			struct mlx5_vf_migration_file, async_data);
260 
261 	if (!status) {
262 		WRITE_ONCE(migf->total_length,
263 			   MLX5_GET(save_vhca_state_out, async_data->out,
264 				    actual_image_size));
265 		wake_up_interruptible(&migf->poll_wait);
266 	}
267 
268 	/*
269 	 * The error and the cleanup flows can't run from an
270 	 * interrupt context
271 	 */
272 	async_data->status = status;
273 	queue_work(migf->mvdev->cb_wq, &async_data->work);
274 }
275 
276 int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
277 			       struct mlx5_vf_migration_file *migf)
278 {
279 	u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out);
280 	u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
281 	struct mlx5vf_async_data *async_data;
282 	struct mlx5_core_dev *mdev;
283 	u32 pdn, mkey;
284 	int err;
285 
286 	lockdep_assert_held(&mvdev->state_mutex);
287 	if (mvdev->mdev_detach)
288 		return -ENOTCONN;
289 
290 	mdev = mvdev->mdev;
291 	err = mlx5_core_alloc_pd(mdev, &pdn);
292 	if (err)
293 		return err;
294 
295 	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
296 			      0);
297 	if (err)
298 		goto err_dma_map;
299 
300 	err = _create_state_mkey(mdev, pdn, migf, &mkey);
301 	if (err)
302 		goto err_create_mkey;
303 
304 	MLX5_SET(save_vhca_state_in, in, opcode,
305 		 MLX5_CMD_OP_SAVE_VHCA_STATE);
306 	MLX5_SET(save_vhca_state_in, in, op_mod, 0);
307 	MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
308 	MLX5_SET(save_vhca_state_in, in, mkey, mkey);
309 	MLX5_SET(save_vhca_state_in, in, size, migf->total_length);
310 
311 	async_data = &migf->async_data;
312 	async_data->out = kvzalloc(out_size, GFP_KERNEL);
313 	if (!async_data->out) {
314 		err = -ENOMEM;
315 		goto err_out;
316 	}
317 
318 	/* no data exists till the callback comes back */
319 	migf->total_length = 0;
320 	get_file(migf->filp);
321 	async_data->mkey = mkey;
322 	async_data->pdn = pdn;
323 	err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
324 			       async_data->out,
325 			       out_size, mlx5vf_save_callback,
326 			       &async_data->cb_work);
327 	if (err)
328 		goto err_exec;
329 
330 	return 0;
331 
332 err_exec:
333 	fput(migf->filp);
334 	kvfree(async_data->out);
335 err_out:
336 	mlx5_core_destroy_mkey(mdev, mkey);
337 err_create_mkey:
338 	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
339 err_dma_map:
340 	mlx5_core_dealloc_pd(mdev, pdn);
341 	return err;
342 }
343 
344 int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
345 			       struct mlx5_vf_migration_file *migf)
346 {
347 	struct mlx5_core_dev *mdev;
348 	u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
349 	u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
350 	u32 pdn, mkey;
351 	int err;
352 
353 	lockdep_assert_held(&mvdev->state_mutex);
354 	if (mvdev->mdev_detach)
355 		return -ENOTCONN;
356 
357 	mutex_lock(&migf->lock);
358 	if (!migf->total_length) {
359 		err = -EINVAL;
360 		goto end;
361 	}
362 
363 	mdev = mvdev->mdev;
364 	err = mlx5_core_alloc_pd(mdev, &pdn);
365 	if (err)
366 		goto end;
367 
368 	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
369 	if (err)
370 		goto err_reg;
371 
372 	err = _create_state_mkey(mdev, pdn, migf, &mkey);
373 	if (err)
374 		goto err_mkey;
375 
376 	MLX5_SET(load_vhca_state_in, in, opcode,
377 		 MLX5_CMD_OP_LOAD_VHCA_STATE);
378 	MLX5_SET(load_vhca_state_in, in, op_mod, 0);
379 	MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id);
380 	MLX5_SET(load_vhca_state_in, in, mkey, mkey);
381 	MLX5_SET(load_vhca_state_in, in, size, migf->total_length);
382 
383 	err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out);
384 
385 	mlx5_core_destroy_mkey(mdev, mkey);
386 err_mkey:
387 	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
388 err_reg:
389 	mlx5_core_dealloc_pd(mdev, pdn);
390 end:
391 	mutex_unlock(&migf->lock);
392 	return err;
393 }
394