1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ 3 /* Copyright 2025 Arm, Ltd. */ 4 5 #include <linux/bitfield.h> 6 #include <linux/genalloc.h> 7 #include <linux/interrupt.h> 8 #include <linux/iopoll.h> 9 #include <linux/platform_device.h> 10 #include <linux/pm_runtime.h> 11 12 #include <drm/drm_file.h> 13 #include <drm/drm_gem.h> 14 #include <drm/drm_gem_dma_helper.h> 15 #include <drm/drm_print.h> 16 #include <drm/ethosu_accel.h> 17 18 #include "ethosu_device.h" 19 #include "ethosu_drv.h" 20 #include "ethosu_gem.h" 21 #include "ethosu_job.h" 22 23 #define JOB_TIMEOUT_MS 500 24 25 static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job) 26 { 27 return container_of(sched_job, struct ethosu_job, base); 28 } 29 30 static const char *ethosu_fence_get_driver_name(struct dma_fence *fence) 31 { 32 return "ethosu"; 33 } 34 35 static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence) 36 { 37 return "ethosu-npu"; 38 } 39 40 static const struct dma_fence_ops ethosu_fence_ops = { 41 .get_driver_name = ethosu_fence_get_driver_name, 42 .get_timeline_name = ethosu_fence_get_timeline_name, 43 }; 44 45 static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job) 46 { 47 struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo); 48 struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info; 49 50 for (int i = 0; i < job->region_cnt; i++) { 51 struct drm_gem_dma_object *bo; 52 int region = job->region_bo_num[i]; 53 54 bo = to_drm_gem_dma_obj(job->region_bo[i]); 55 writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region)); 56 writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region)); 57 dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr); 58 } 59 60 if (job->sram_size) { 61 writel_relaxed(lower_32_bits(dev->sramphys), 62 dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION)); 63 writel_relaxed(upper_32_bits(dev->sramphys), 64 dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION)); 65 dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n", 66 ETHOSU_SRAM_REGION, &dev->sramphys); 67 } 68 69 writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE); 70 writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI); 71 writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE); 72 73 writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD); 74 75 dev_dbg(dev->base.dev, 76 "Submitted cmd at %pad to core\n", &cmd_bo->dma_addr); 77 } 78 79 static int ethosu_acquire_object_fences(struct ethosu_job *job) 80 { 81 int i, ret; 82 struct drm_gem_object **bos = job->region_bo; 83 struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; 84 85 for (i = 0; i < job->region_cnt; i++) { 86 bool is_write; 87 88 if (!bos[i]) 89 break; 90 91 ret = dma_resv_reserve_fences(bos[i]->resv, 1); 92 if (ret) 93 return ret; 94 95 is_write = info->output_region[job->region_bo_num[i]]; 96 ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i], 97 is_write); 98 if (ret) 99 return ret; 100 } 101 102 return 0; 103 } 104 105 static void ethosu_attach_object_fences(struct ethosu_job *job) 106 { 107 int i; 108 struct dma_fence *fence = job->inference_done_fence; 109 struct drm_gem_object **bos = job->region_bo; 110 struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; 111 112 for (i = 0; i < job->region_cnt; i++) 113 if (info->output_region[job->region_bo_num[i]]) 114 dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); 115 } 116 117 static int ethosu_job_push(struct ethosu_job *job) 118 { 119 struct ww_acquire_ctx acquire_ctx; 120 int ret; 121 122 ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); 123 if (ret) 124 return ret; 125 126 ret = ethosu_acquire_object_fences(job); 127 if (ret) 128 goto out; 129 130 ret = pm_runtime_resume_and_get(job->dev->base.dev); 131 if (!ret) { 132 guard(mutex)(&job->dev->sched_lock); 133 134 drm_sched_job_arm(&job->base); 135 job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished); 136 kref_get(&job->refcount); /* put by scheduler job completion */ 137 drm_sched_entity_push_job(&job->base); 138 ethosu_attach_object_fences(job); 139 } 140 141 out: 142 drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); 143 return ret; 144 } 145 146 static void ethosu_job_cleanup(struct kref *ref) 147 { 148 struct ethosu_job *job = container_of(ref, struct ethosu_job, 149 refcount); 150 unsigned int i; 151 152 pm_runtime_put_autosuspend(job->dev->base.dev); 153 154 dma_fence_put(job->done_fence); 155 dma_fence_put(job->inference_done_fence); 156 157 for (i = 0; i < job->region_cnt; i++) 158 drm_gem_object_put(job->region_bo[i]); 159 160 drm_gem_object_put(job->cmd_bo); 161 162 kfree(job); 163 } 164 165 static void ethosu_job_put(struct ethosu_job *job) 166 { 167 kref_put(&job->refcount, ethosu_job_cleanup); 168 } 169 170 static void ethosu_job_free(struct drm_sched_job *sched_job) 171 { 172 struct ethosu_job *job = to_ethosu_job(sched_job); 173 174 drm_sched_job_cleanup(sched_job); 175 ethosu_job_put(job); 176 } 177 178 static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job) 179 { 180 struct ethosu_job *job = to_ethosu_job(sched_job); 181 struct ethosu_device *dev = job->dev; 182 struct dma_fence *fence = job->done_fence; 183 184 if (unlikely(job->base.s_fence->finished.error)) 185 return NULL; 186 187 dma_fence_init(fence, ðosu_fence_ops, &dev->fence_lock, 188 dev->fence_context, ++dev->emit_seqno); 189 dma_fence_get(fence); 190 191 scoped_guard(mutex, &dev->job_lock) { 192 dev->in_flight_job = job; 193 ethosu_job_hw_submit(dev, job); 194 } 195 196 return fence; 197 } 198 199 static void ethosu_job_handle_irq(struct ethosu_device *dev) 200 { 201 u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); 202 203 if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) { 204 dev_err(dev->base.dev, "Error IRQ - %x\n", status); 205 drm_sched_fault(&dev->sched); 206 return; 207 } 208 209 scoped_guard(mutex, &dev->job_lock) { 210 if (dev->in_flight_job) { 211 dma_fence_signal(dev->in_flight_job->done_fence); 212 dev->in_flight_job = NULL; 213 } 214 } 215 } 216 217 static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data) 218 { 219 struct ethosu_device *dev = data; 220 221 ethosu_job_handle_irq(dev); 222 223 return IRQ_HANDLED; 224 } 225 226 static irqreturn_t ethosu_job_irq_handler(int irq, void *data) 227 { 228 struct ethosu_device *dev = data; 229 u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); 230 231 if (!(status & STATUS_IRQ_RAISED)) 232 return IRQ_NONE; 233 234 writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD); 235 return IRQ_WAKE_THREAD; 236 } 237 238 static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad) 239 { 240 struct ethosu_job *job = to_ethosu_job(bad); 241 struct ethosu_device *dev = job->dev; 242 bool running; 243 u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr; 244 u32 cmdaddr; 245 246 cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD); 247 running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS)); 248 249 if (running) { 250 int ret; 251 u32 reg; 252 253 ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD, 254 reg, 255 reg != cmdaddr, 256 USEC_PER_MSEC, 100 * USEC_PER_MSEC); 257 258 /* If still running and progress is being made, just return */ 259 if (!ret) 260 return DRM_GPU_SCHED_STAT_NO_HANG; 261 } 262 263 dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n", 264 running ? "running" : "stopped", 265 cmdaddr, bocmds[cmdaddr / 4]); 266 267 drm_sched_stop(&dev->sched, bad); 268 269 scoped_guard(mutex, &dev->job_lock) 270 dev->in_flight_job = NULL; 271 272 /* Proceed with reset now. */ 273 pm_runtime_force_suspend(dev->base.dev); 274 pm_runtime_force_resume(dev->base.dev); 275 276 /* Restart the scheduler */ 277 drm_sched_start(&dev->sched, 0); 278 279 return DRM_GPU_SCHED_STAT_RESET; 280 } 281 282 static const struct drm_sched_backend_ops ethosu_sched_ops = { 283 .run_job = ethosu_job_run, 284 .timedout_job = ethosu_job_timedout, 285 .free_job = ethosu_job_free 286 }; 287 288 int ethosu_job_init(struct ethosu_device *edev) 289 { 290 struct device *dev = edev->base.dev; 291 struct drm_sched_init_args args = { 292 .ops = ðosu_sched_ops, 293 .num_rqs = DRM_SCHED_PRIORITY_COUNT, 294 .credit_limit = 1, 295 .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), 296 .name = dev_name(dev), 297 .dev = dev, 298 }; 299 int ret; 300 301 spin_lock_init(&edev->fence_lock); 302 ret = devm_mutex_init(dev, &edev->job_lock); 303 if (ret) 304 return ret; 305 ret = devm_mutex_init(dev, &edev->sched_lock); 306 if (ret) 307 return ret; 308 309 edev->irq = platform_get_irq(to_platform_device(dev), 0); 310 if (edev->irq < 0) 311 return edev->irq; 312 313 ret = devm_request_threaded_irq(dev, edev->irq, 314 ethosu_job_irq_handler, 315 ethosu_job_irq_handler_thread, 316 IRQF_SHARED, KBUILD_MODNAME, 317 edev); 318 if (ret) { 319 dev_err(dev, "failed to request irq\n"); 320 return ret; 321 } 322 323 edev->fence_context = dma_fence_context_alloc(1); 324 325 ret = drm_sched_init(&edev->sched, &args); 326 if (ret) { 327 dev_err(dev, "Failed to create scheduler: %d\n", ret); 328 goto err_sched; 329 } 330 331 return 0; 332 333 err_sched: 334 drm_sched_fini(&edev->sched); 335 return ret; 336 } 337 338 void ethosu_job_fini(struct ethosu_device *dev) 339 { 340 drm_sched_fini(&dev->sched); 341 } 342 343 int ethosu_job_open(struct ethosu_file_priv *ethosu_priv) 344 { 345 struct ethosu_device *dev = ethosu_priv->edev; 346 struct drm_gpu_scheduler *sched = &dev->sched; 347 int ret; 348 349 ret = drm_sched_entity_init(ðosu_priv->sched_entity, 350 DRM_SCHED_PRIORITY_NORMAL, 351 &sched, 1, NULL); 352 return WARN_ON(ret); 353 } 354 355 void ethosu_job_close(struct ethosu_file_priv *ethosu_priv) 356 { 357 struct drm_sched_entity *entity = ðosu_priv->sched_entity; 358 359 drm_sched_entity_destroy(entity); 360 } 361 362 static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, 363 struct drm_ethosu_job *job) 364 { 365 struct ethosu_device *edev = to_ethosu_device(dev); 366 struct ethosu_file_priv *file_priv = file->driver_priv; 367 struct ethosu_job *ejob = NULL; 368 struct ethosu_validated_cmdstream_info *cmd_info; 369 int ret = 0; 370 371 /* BO region 2 is reserved if SRAM is used */ 372 if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size) 373 return -EINVAL; 374 375 if (edev->npu_info.sram_size < job->sram_size) 376 return -EINVAL; 377 378 ejob = kzalloc(sizeof(*ejob), GFP_KERNEL); 379 if (!ejob) 380 return -ENOMEM; 381 382 kref_init(&ejob->refcount); 383 384 ejob->dev = edev; 385 ejob->sram_size = job->sram_size; 386 387 ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL); 388 if (!ejob->done_fence) { 389 ret = -ENOMEM; 390 goto out_cleanup_job; 391 } 392 393 ret = drm_sched_job_init(&ejob->base, 394 &file_priv->sched_entity, 395 1, NULL, file->client_id); 396 if (ret) 397 goto out_put_job; 398 399 ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo); 400 if (!ejob->cmd_bo) { 401 ret = -ENOENT; 402 goto out_cleanup_job; 403 } 404 cmd_info = to_ethosu_bo(ejob->cmd_bo)->info; 405 if (!cmd_info) { 406 ret = -EINVAL; 407 goto out_cleanup_job; 408 } 409 410 for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) { 411 struct drm_gem_object *gem; 412 413 /* Can only omit a BO handle if the region is not used or used for SRAM */ 414 if (!job->region_bo_handles[i] && 415 (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size))) 416 continue; 417 418 if (job->region_bo_handles[i] && !cmd_info->region_size[i]) { 419 dev_err(dev->dev, 420 "Cmdstream BO handle %d set for unused region %d\n", 421 job->region_bo_handles[i], i); 422 ret = -EINVAL; 423 goto out_cleanup_job; 424 } 425 426 gem = drm_gem_object_lookup(file, job->region_bo_handles[i]); 427 if (!gem) { 428 dev_err(dev->dev, 429 "Invalid BO handle %d for region %d\n", 430 job->region_bo_handles[i], i); 431 ret = -ENOENT; 432 goto out_cleanup_job; 433 } 434 435 ejob->region_bo[ejob->region_cnt] = gem; 436 ejob->region_bo_num[ejob->region_cnt] = i; 437 ejob->region_cnt++; 438 439 if (to_ethosu_bo(gem)->info) { 440 dev_err(dev->dev, 441 "Cmdstream BO handle %d used for region %d\n", 442 job->region_bo_handles[i], i); 443 ret = -EINVAL; 444 goto out_cleanup_job; 445 } 446 447 /* Verify the command stream doesn't have accesses outside the BO */ 448 if (cmd_info->region_size[i] > gem->size) { 449 dev_err(dev->dev, 450 "cmd stream region %d size greater than BO size (%llu > %zu)\n", 451 i, cmd_info->region_size[i], gem->size); 452 ret = -EOVERFLOW; 453 goto out_cleanup_job; 454 } 455 } 456 ret = ethosu_job_push(ejob); 457 458 out_cleanup_job: 459 if (ret) 460 drm_sched_job_cleanup(&ejob->base); 461 out_put_job: 462 ethosu_job_put(ejob); 463 464 return ret; 465 } 466 467 int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) 468 { 469 struct drm_ethosu_submit *args = data; 470 int ret = 0; 471 unsigned int i = 0; 472 473 if (args->pad) { 474 drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n"); 475 return -EINVAL; 476 } 477 478 struct drm_ethosu_job __free(kvfree) *jobs = 479 kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); 480 if (!jobs) 481 return -ENOMEM; 482 483 if (copy_from_user(jobs, 484 (void __user *)(uintptr_t)args->jobs, 485 args->job_count * sizeof(*jobs))) { 486 drm_dbg(dev, "Failed to copy incoming job array\n"); 487 return -EFAULT; 488 } 489 490 for (i = 0; i < args->job_count; i++) { 491 ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]); 492 if (ret) 493 return ret; 494 } 495 496 return 0; 497 } 498