// SPDX-License-Identifier: GPL-2.0 OR MIT /* * Copyright 2014-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * */ #include #include #include "kfd_device_queue_manager.h" #include "kfd_priv.h" #include "kfd_kernel_queue.h" #include "amdgpu_amdkfd.h" #include "amdgpu_reset.h" static inline struct process_queue_node *get_queue_by_qid( struct process_queue_manager *pqm, unsigned int qid) { struct process_queue_node *pqn; list_for_each_entry(pqn, &pqm->queues, process_queue_list) { if ((pqn->q && pqn->q->properties.queue_id == qid) || (pqn->kq && pqn->kq->queue->properties.queue_id == qid)) return pqn; } return NULL; } static int assign_queue_slot_by_qid(struct process_queue_manager *pqm, unsigned int qid) { if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return -EINVAL; if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) { pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid); return -ENOSPC; } return 0; } static int find_available_queue_slot(struct process_queue_manager *pqm, unsigned int *qid) { unsigned long found; found = find_first_zero_bit(pqm->queue_slot_bitmap, KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); pr_debug("The new slot id %lu\n", found); if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { pr_info("Cannot open more queues for process with pasid 0x%x\n", pqm->process->pasid); return -ENOMEM; } set_bit(found, pqm->queue_slot_bitmap); *qid = found; return 0; } void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) { struct kfd_node *dev = pdd->dev; if (pdd->already_dequeued) return; dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); if (dev->kfd->shared_resources.enable_mes && down_read_trylock(&dev->adev->reset_domain->sem)) { amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); up_read(&dev->adev->reset_domain->sem); } pdd->already_dequeued = true; } int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, void *gws) { struct mqd_update_info minfo = {0}; struct kfd_node *dev = NULL; struct process_queue_node *pqn; struct kfd_process_device *pdd; struct kgd_mem *mem = NULL; int ret; pqn = get_queue_by_qid(pqm, qid); if (!pqn) { pr_err("Queue id does not match any known queue\n"); return -EINVAL; } if (pqn->q) dev = pqn->q->device; if (WARN_ON(!dev)) return -ENODEV; pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) { pr_err("Process device data doesn't exist\n"); return -EINVAL; } /* Only allow one queue per process can have GWS assigned */ if (gws && pdd->qpd.num_gws) return -EBUSY; if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && !dev->kfd->shared_resources.enable_mes) { if (gws) ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, gws, &mem); else ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, pqn->q->gws); if (unlikely(ret)) return ret; pqn->q->gws = mem; } else { /* * Intentionally set GWS to a non-NULL value * for devices that do not use GWS for global wave * synchronization but require the formality * of setting GWS for cooperative groups. */ pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL; } pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, pqn->q, &minfo); } void kfd_process_dequeue_from_all_devices(struct kfd_process *p) { int i; for (i = 0; i < p->n_pdds; i++) kfd_process_dequeue_from_device(p->pdds[i]); } int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { INIT_LIST_HEAD(&pqm->queues); pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, GFP_KERNEL); if (!pqm->queue_slot_bitmap) return -ENOMEM; pqm->process = p; return 0; } static void pqm_clean_queue_resource(struct process_queue_manager *pqm, struct process_queue_node *pqn) { struct kfd_node *dev; struct kfd_process_device *pdd; dev = pqn->q->device; pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) { pr_err("Process device data doesn't exist\n"); return; } if (pqn->q->gws) { if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) && !dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_remove_gws_from_process( pqm->process->kgd_process_info, pqn->q->gws); pdd->qpd.num_gws = 0; } if (dev->kfd->shared_resources.enable_mes) { amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo); amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart); } } void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { if (pqn->q) { struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); if (pdd) { kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); kfd_queue_release_buffers(pdd, &pqn->q->properties); } else { WARN_ON(!pdd); } pqm_clean_queue_resource(pqm, pqn); } kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); list_del(&pqn->process_queue_list); kfree(pqn); } bitmap_free(pqm->queue_slot_bitmap); pqm->queue_slot_bitmap = NULL; } static int init_user_queue(struct process_queue_manager *pqm, struct kfd_node *dev, struct queue **q, struct queue_properties *q_properties, unsigned int qid) { int retval; /* Doorbell initialized in user space*/ q_properties->doorbell_ptr = NULL; q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW); /* let DQM handle it*/ q_properties->vmid = 0; q_properties->queue_id = qid; retval = init_queue(q, q_properties); if (retval != 0) return retval; (*q)->device = dev; (*q)->process = pqm->process; if (dev->kfd->shared_resources.enable_mes) { retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, AMDGPU_MES_GANG_CTX_SIZE, &(*q)->gang_ctx_bo, &(*q)->gang_ctx_gpu_addr, &(*q)->gang_ctx_cpu_ptr, false); if (retval) { pr_err("failed to allocate gang context bo\n"); goto cleanup; } memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) */ if (((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) { pr_err("Queue memory allocated to wrong device\n"); retval = -EINVAL; goto free_gang_ctx_bo; } retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo, &(*q)->wptr_bo_gart); if (retval) { pr_err("Failed to map wptr bo to GART\n"); goto free_gang_ctx_bo; } } } pr_debug("PQM After init queue"); return 0; free_gang_ctx_bo: amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo); cleanup: uninit_queue(*q); *q = NULL; return retval; } int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_node *dev, struct queue_properties *properties, unsigned int *qid, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, uint32_t *p_doorbell_offset_in_process) { int retval; struct kfd_process_device *pdd; struct queue *q; struct process_queue_node *pqn; struct kernel_queue *kq; enum kfd_queue_type type = properties->type; unsigned int max_queues = 127; /* HWS limit */ /* * On GFX 9.4.3, increase the number of queues that * can be created to 255. No HWS limit on GFX 9.4.3. */ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) max_queues = 255; q = NULL; kq = NULL; pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) { pr_err("Process device data doesn't exist\n"); return -1; } /* * for debug process, verify that it is within the static queues limit * currently limit is set to half of the total avail HQD slots * If we are just about to create DIQ, the is_debug flag is not set yet * Hence we also check the type as well */ if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) max_queues = dev->kfd->device_info.max_no_of_hqd/2; if (pdd->qpd.queue_count >= max_queues) return -ENOSPC; if (q_data) { retval = assign_queue_slot_by_qid(pqm, q_data->q_id); *qid = q_data->q_id; } else retval = find_available_queue_slot(pqm, qid); if (retval != 0) return retval; if (list_empty(&pdd->qpd.queues_list) && list_empty(&pdd->qpd.priv_queue_list)) dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); if (!pqn) { retval = -ENOMEM; goto err_allocate_pqn; } switch (type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: /* SDMA queues are always allocated statically no matter * which scheduler mode is used. We also do not need to * check whether a SDMA queue can be allocated here, because * allocate_sdma_queue() in create_queue() has the * corresponding check logic. */ retval = init_user_queue(pqm, dev, &q, properties, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; pqn->kq = NULL; retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, restore_mqd, restore_ctl_stack); print_queue(q); break; case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((dev->dqm->sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || (dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) { pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n"); retval = -EPERM; goto err_create_queue; } retval = init_user_queue(pqm, dev, &q, properties, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; pqn->kq = NULL; retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, restore_mqd, restore_ctl_stack); print_queue(q); break; case KFD_QUEUE_TYPE_DIQ: kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); if (!kq) { retval = -ENOMEM; goto err_create_queue; } kq->queue->properties.queue_id = *qid; pqn->kq = kq; pqn->q = NULL; retval = kfd_process_drain_interrupts(pdd); if (retval) break; retval = dev->dqm->ops.create_kernel_queue(dev->dqm, kq, &pdd->qpd); break; default: WARN(1, "Invalid queue type %d", type); retval = -EINVAL; } if (retval != 0) { pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n", pqm->process->pasid, type, retval); goto err_create_queue; } if (q && p_doorbell_offset_in_process) { /* Return the doorbell offset within the doorbell page * to the caller so it can be passed up to user mode * (in bytes). * relative doorbell index = Absolute doorbell index - * absolute index of first doorbell in the page. */ uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev, pdd->qpd.proc_doorbells, 0, pdd->dev->kfd->device_info.doorbell_size); *p_doorbell_offset_in_process = (q->properties.doorbell_off - first_db_index) * sizeof(uint32_t); } pr_debug("PQM After DQM create queue\n"); list_add(&pqn->process_queue_list, &pqm->queues); if (q) { pr_debug("PQM done creating queue\n"); kfd_procfs_add_queue(q); print_queue_properties(&q->properties); } return retval; err_create_queue: uninit_queue(q); if (kq) kernel_queue_uninit(kq); kfree(pqn); err_allocate_pqn: /* check if queues list is empty unregister process from device */ clear_bit(*qid, pqm->queue_slot_bitmap); if (list_empty(&pdd->qpd.queues_list) && list_empty(&pdd->qpd.priv_queue_list)) dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); return retval; } int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) { struct process_queue_node *pqn; struct kfd_process_device *pdd; struct device_queue_manager *dqm; struct kfd_node *dev; int retval; dqm = NULL; retval = 0; pqn = get_queue_by_qid(pqm, qid); if (!pqn) { pr_err("Queue id does not match any known queue\n"); return -EINVAL; } dev = NULL; if (pqn->kq) dev = pqn->kq->dev; if (pqn->q) dev = pqn->q->device; if (WARN_ON(!dev)) return -ENODEV; pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) { pr_err("Process device data doesn't exist\n"); return -1; } if (pqn->kq) { /* destroy kernel queue (DIQ) */ dqm = pqn->kq->dev->dqm; dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); kernel_queue_uninit(pqn->kq); } if (pqn->q) { retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); if (retval) goto err_destroy_queue; dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n", pqm->process->pasid, pqn->q->properties.queue_id, retval); if (retval != -ETIME) goto err_destroy_queue; } kfd_procfs_del_queue(pqn->q); kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); } list_del(&pqn->process_queue_list); kfree(pqn); clear_bit(qid, pqm->queue_slot_bitmap); if (list_empty(&pdd->qpd.queues_list) && list_empty(&pdd->qpd.priv_queue_list)) dqm->ops.unregister_process(dqm, &pdd->qpd); err_destroy_queue: return retval; } int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p) { int retval; struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); if (!pqn || !pqn->q) { pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT; } /* * Update with NULL ring address is used to disable the queue */ if (p->queue_address && p->queue_size) { struct kfd_process_device *pdd; struct amdgpu_vm *vm; struct queue *q = pqn->q; int err; pdd = kfd_get_process_device_data(q->device, q->process); if (!pdd) return -ENODEV; vm = drm_priv_to_vm(pdd->drm_priv); err = amdgpu_bo_reserve(vm->root.bo, false); if (err) return err; if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo, p->queue_size)) { pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", p->queue_address, p->queue_size); return -EFAULT; } kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo); kfd_queue_buffer_put(&pqn->q->properties.ring_bo); amdgpu_bo_unreserve(vm->root.bo); pqn->q->properties.ring_bo = p->ring_bo; } pqn->q->properties.queue_address = p->queue_address; pqn->q->properties.queue_size = p->queue_size; pqn->q->properties.queue_percent = p->queue_percent; pqn->q->properties.priority = p->priority; pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc; retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, pqn->q, NULL); if (retval != 0) return retval; return 0; } int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid, struct mqd_update_info *minfo) { int retval; struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); if (!pqn) { pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT; } /* CUs are masked for debugger requirements so deny user mask */ if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr) return -EBUSY; /* ASICs that have WGPs must enforce pairwise enabled mask checks. */ if (minfo && minfo->cu_mask.ptr && KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) { int i; for (i = 0; i < minfo->cu_mask.count; i += 2) { uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3; if (cu_pair && cu_pair != 0x3) { pr_debug("CUs must be adjacent pairwise enabled.\n"); return -EINVAL; } } } retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, pqn->q, minfo); if (retval != 0) return retval; if (minfo && minfo->cu_mask.ptr) pqn->q->properties.is_user_cu_masked = true; return 0; } struct kernel_queue *pqm_get_kernel_queue( struct process_queue_manager *pqm, unsigned int qid) { struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); if (pqn && pqn->kq) return pqn->kq; return NULL; } struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, unsigned int qid) { struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); return pqn ? pqn->q : NULL; } int pqm_get_wave_state(struct process_queue_manager *pqm, unsigned int qid, void __user *ctl_stack, u32 *ctl_stack_used_size, u32 *save_area_used_size) { struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); if (!pqn) { pr_debug("amdkfd: No queue %d exists for operation\n", qid); return -EFAULT; } return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm, pqn->q, ctl_stack, ctl_stack_used_size, save_area_used_size); } int pqm_get_queue_snapshot(struct process_queue_manager *pqm, uint64_t exception_clear_mask, void __user *buf, int *num_qss_entries, uint32_t *entry_size) { struct process_queue_node *pqn; struct kfd_queue_snapshot_entry src; uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries; int r = 0; *num_qss_entries = 0; if (!(*entry_size)) return -EINVAL; *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry)); mutex_lock(&pqm->process->event_mutex); memset(&src, 0, sizeof(src)); list_for_each_entry(pqn, &pqm->queues, process_queue_list) { if (!pqn->q) continue; if (*num_qss_entries < tmp_qss_entries) { set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src); if (copy_to_user(buf, &src, *entry_size)) { r = -EFAULT; break; } buf += tmp_entry_size; } *num_qss_entries += 1; } mutex_unlock(&pqm->process->event_mutex); return r; } static int get_queue_data_sizes(struct kfd_process_device *pdd, struct queue *q, uint32_t *mqd_size, uint32_t *ctl_stack_size) { int ret; ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm, q->properties.queue_id, mqd_size, ctl_stack_size); if (ret) pr_err("Failed to get queue dump info (%d)\n", ret); return ret; } int kfd_process_get_queue_info(struct kfd_process *p, uint32_t *num_queues, uint64_t *priv_data_sizes) { uint32_t extra_data_sizes = 0; struct queue *q; int i; int ret; *num_queues = 0; /* Run over all PDDs of the process */ for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; list_for_each_entry(q, &pdd->qpd.queues_list, list) { if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { uint32_t mqd_size, ctl_stack_size; *num_queues = *num_queues + 1; ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size); if (ret) return ret; extra_data_sizes += mqd_size + ctl_stack_size; } else { pr_err("Unsupported queue type (%d)\n", q->properties.type); return -EOPNOTSUPP; } } } *priv_data_sizes = extra_data_sizes + (*num_queues * sizeof(struct kfd_criu_queue_priv_data)); return 0; } static int pqm_checkpoint_mqd(struct process_queue_manager *pqm, unsigned int qid, void *mqd, void *ctl_stack) { struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); if (!pqn) { pr_debug("amdkfd: No queue %d exists for operation\n", qid); return -EFAULT; } if (!pqn->q->device->dqm->ops.checkpoint_mqd) { pr_err("amdkfd: queue dumping not supported on this device\n"); return -EOPNOTSUPP; } return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm, pqn->q, mqd, ctl_stack); } static int criu_checkpoint_queue(struct kfd_process_device *pdd, struct queue *q, struct kfd_criu_queue_priv_data *q_data) { uint8_t *mqd, *ctl_stack; int ret; mqd = (void *)(q_data + 1); ctl_stack = mqd + q_data->mqd_size; q_data->gpu_id = pdd->user_gpu_id; q_data->type = q->properties.type; q_data->format = q->properties.format; q_data->q_id = q->properties.queue_id; q_data->q_address = q->properties.queue_address; q_data->q_size = q->properties.queue_size; q_data->priority = q->properties.priority; q_data->q_percent = q->properties.queue_percent; q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr; q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr; q_data->doorbell_id = q->doorbell_id; q_data->sdma_id = q->sdma_id; q_data->eop_ring_buffer_address = q->properties.eop_ring_buffer_address; q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size; q_data->ctx_save_restore_area_address = q->properties.ctx_save_restore_area_address; q_data->ctx_save_restore_area_size = q->properties.ctx_save_restore_area_size; q_data->gws = !!q->gws; ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack); if (ret) { pr_err("Failed checkpoint queue_mqd (%d)\n", ret); return ret; } pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id); return ret; } static int criu_checkpoint_queues_device(struct kfd_process_device *pdd, uint8_t __user *user_priv, unsigned int *q_index, uint64_t *queues_priv_data_offset) { unsigned int q_private_data_size = 0; uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */ struct queue *q; int ret = 0; list_for_each_entry(q, &pdd->qpd.queues_list, list) { struct kfd_criu_queue_priv_data *q_data; uint64_t q_data_size; uint32_t mqd_size; uint32_t ctl_stack_size; if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE && q->properties.type != KFD_QUEUE_TYPE_SDMA && q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) { pr_err("Unsupported queue type (%d)\n", q->properties.type); ret = -EOPNOTSUPP; break; } ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size); if (ret) break; q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size; /* Increase local buffer space if needed */ if (q_private_data_size < q_data_size) { kfree(q_private_data); q_private_data = kzalloc(q_data_size, GFP_KERNEL); if (!q_private_data) { ret = -ENOMEM; break; } q_private_data_size = q_data_size; } q_data = (struct kfd_criu_queue_priv_data *)q_private_data; /* data stored in this order: priv_data, mqd, ctl_stack */ q_data->mqd_size = mqd_size; q_data->ctl_stack_size = ctl_stack_size; ret = criu_checkpoint_queue(pdd, q, q_data); if (ret) break; q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE; ret = copy_to_user(user_priv + *queues_priv_data_offset, q_data, q_data_size); if (ret) { ret = -EFAULT; break; } *queues_priv_data_offset += q_data_size; *q_index = *q_index + 1; } kfree(q_private_data); return ret; } int kfd_criu_checkpoint_queues(struct kfd_process *p, uint8_t __user *user_priv_data, uint64_t *priv_data_offset) { int ret = 0, pdd_index, q_index = 0; for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { struct kfd_process_device *pdd = p->pdds[pdd_index]; /* * criu_checkpoint_queues_device will copy data to user and update q_index and * queues_priv_data_offset */ ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index, priv_data_offset); if (ret) break; } return ret; } static void set_queue_properties_from_criu(struct queue_properties *qp, struct kfd_criu_queue_priv_data *q_data) { qp->is_interop = false; qp->queue_percent = q_data->q_percent; qp->priority = q_data->priority; qp->queue_address = q_data->q_address; qp->queue_size = q_data->q_size; qp->read_ptr = (uint32_t *) q_data->read_ptr_addr; qp->write_ptr = (uint32_t *) q_data->write_ptr_addr; qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address; qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size; qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address; qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size; qp->ctl_stack_size = q_data->ctl_stack_size; qp->type = q_data->type; qp->format = q_data->format; } int kfd_criu_restore_queue(struct kfd_process *p, uint8_t __user *user_priv_ptr, uint64_t *priv_data_offset, uint64_t max_priv_data_size) { uint8_t *mqd, *ctl_stack, *q_extra_data = NULL; struct kfd_criu_queue_priv_data *q_data; struct kfd_process_device *pdd; uint64_t q_extra_data_size; struct queue_properties qp; unsigned int queue_id; int ret = 0; if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size) return -EINVAL; q_data = kmalloc(sizeof(*q_data), GFP_KERNEL); if (!q_data) return -ENOMEM; ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data)); if (ret) { ret = -EFAULT; goto exit; } *priv_data_offset += sizeof(*q_data); q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size; if (*priv_data_offset + q_extra_data_size > max_priv_data_size) { ret = -EINVAL; goto exit; } q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL); if (!q_extra_data) { ret = -ENOMEM; goto exit; } ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size); if (ret) { ret = -EFAULT; goto exit; } *priv_data_offset += q_extra_data_size; pdd = kfd_process_device_data_by_id(p, q_data->gpu_id); if (!pdd) { pr_err("Failed to get pdd\n"); ret = -EINVAL; goto exit; } /* data stored in this order: mqd, ctl_stack */ mqd = q_extra_data; ctl_stack = mqd + q_data->mqd_size; memset(&qp, 0, sizeof(qp)); set_queue_properties_from_criu(&qp, q_data); print_queue_properties(&qp); ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); goto exit; } if (q_data->gws) ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws); exit: if (ret) pr_err("Failed to restore queue (%d)\n", ret); else pr_debug("Queue id %d was restored successfully\n", queue_id); kfree(q_data); kfree(q_extra_data); return ret; } int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, unsigned int qid, uint32_t *mqd_size, uint32_t *ctl_stack_size) { struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); if (!pqn) { pr_debug("amdkfd: No queue %d exists for operation\n", qid); return -EFAULT; } if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) { pr_err("amdkfd: queue dumping not supported on this device\n"); return -EOPNOTSUPP; } pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm, pqn->q, mqd_size, ctl_stack_size); return 0; } #if defined(CONFIG_DEBUG_FS) int pqm_debugfs_mqds(struct seq_file *m, void *data) { struct process_queue_manager *pqm = data; struct process_queue_node *pqn; struct queue *q; enum KFD_MQD_TYPE mqd_type; struct mqd_manager *mqd_mgr; int r = 0, xcc, num_xccs = 1; void *mqd; uint64_t size = 0; list_for_each_entry(pqn, &pqm->queues, process_queue_list) { if (pqn->q) { q = pqn->q; switch (q->properties.type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: seq_printf(m, " SDMA queue on device %x\n", q->device->id); mqd_type = KFD_MQD_TYPE_SDMA; break; case KFD_QUEUE_TYPE_COMPUTE: seq_printf(m, " Compute queue on device %x\n", q->device->id); mqd_type = KFD_MQD_TYPE_CP; num_xccs = NUM_XCC(q->device->xcc_mask); break; default: seq_printf(m, " Bad user queue type %d on device %x\n", q->properties.type, q->device->id); continue; } mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; size = mqd_mgr->mqd_stride(mqd_mgr, &q->properties); } else if (pqn->kq) { q = pqn->kq->queue; mqd_mgr = pqn->kq->mqd_mgr; switch (q->properties.type) { case KFD_QUEUE_TYPE_DIQ: seq_printf(m, " DIQ on device %x\n", pqn->kq->dev->id); break; default: seq_printf(m, " Bad kernel queue type %d on device %x\n", q->properties.type, pqn->kq->dev->id); continue; } } else { seq_printf(m, " Weird: Queue node with neither kernel nor user queue\n"); continue; } for (xcc = 0; xcc < num_xccs; xcc++) { mqd = q->mqd + size * xcc; r = mqd_mgr->debugfs_show_mqd(m, mqd); if (r != 0) break; } } return r; } #endif