1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Tegra host1x Channel 4 * 5 * Copyright (c) 2010-2013, NVIDIA Corporation. 6 */ 7 8 #include <linux/host1x.h> 9 #include <linux/iommu.h> 10 #include <linux/slab.h> 11 12 #include <trace/events/host1x.h> 13 14 #include "../channel.h" 15 #include "../dev.h" 16 #include "../intr.h" 17 #include "../job.h" 18 19 #define TRACE_MAX_LENGTH 128U 20 21 static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, 22 u32 offset, u32 words) 23 { 24 struct device *dev = cdma_to_channel(cdma)->dev; 25 void *mem = NULL; 26 27 if (host1x_debug_trace_cmdbuf) 28 mem = host1x_bo_mmap(bo); 29 30 if (mem) { 31 u32 i; 32 /* 33 * Write in batches of 128 as there seems to be a limit 34 * of how much you can output to ftrace at once. 35 */ 36 for (i = 0; i < words; i += TRACE_MAX_LENGTH) { 37 u32 num_words = min(words - i, TRACE_MAX_LENGTH); 38 39 offset += i * sizeof(u32); 40 41 trace_host1x_cdma_push_gather(dev_name(dev), bo, 42 num_words, offset, 43 mem); 44 } 45 46 host1x_bo_munmap(bo, mem); 47 } 48 } 49 50 static void submit_wait(struct host1x_job *job, u32 id, u32 threshold) 51 { 52 struct host1x_cdma *cdma = &job->channel->cdma; 53 54 #if HOST1X_HW >= 2 55 host1x_cdma_push_wide(cdma, 56 host1x_opcode_setclass( 57 HOST1X_CLASS_HOST1X, 58 HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, 59 /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ 60 BIT(0) | BIT(2) 61 ), 62 threshold, 63 id, 64 HOST1X_OPCODE_NOP 65 ); 66 #else 67 /* TODO add waitchk or use waitbases or other mitigation */ 68 host1x_cdma_push(cdma, 69 host1x_opcode_setclass( 70 HOST1X_CLASS_HOST1X, 71 host1x_uclass_wait_syncpt_r(), 72 BIT(0) 73 ), 74 host1x_class_host_wait_syncpt(id, threshold) 75 ); 76 #endif 77 } 78 79 static void submit_setclass(struct host1x_job *job, u32 next_class) 80 { 81 struct host1x_cdma *cdma = &job->channel->cdma; 82 83 #if HOST1X_HW >= 6 84 u32 stream_id; 85 86 /* 87 * If a memory context has been set, use it. Otherwise 88 * (if context isolation is disabled) use the engine's 89 * firmware stream ID. 90 */ 91 if (job->memory_context) 92 stream_id = job->memory_context->stream_id; 93 else 94 stream_id = job->engine_fallback_streamid; 95 96 host1x_cdma_push_wide(cdma, 97 host1x_opcode_setclass(next_class, 0, 0), 98 host1x_opcode_setpayload(stream_id), 99 host1x_opcode_setstreamid(job->engine_streamid_offset / 4), 100 HOST1X_OPCODE_NOP); 101 #else 102 host1x_cdma_push(cdma, 103 host1x_opcode_setclass(next_class, 0, 0), 104 HOST1X_OPCODE_NOP 105 ); 106 #endif 107 } 108 109 static void submit_gathers(struct host1x_job *job, struct host1x_job_cmd *cmds, u32 num_cmds, 110 u32 job_syncpt_base) 111 { 112 struct host1x_cdma *cdma = &job->channel->cdma; 113 #if HOST1X_HW < 6 114 struct device *dev = job->channel->dev; 115 #endif 116 unsigned int i; 117 u32 threshold; 118 119 for (i = 0; i < num_cmds; i++) { 120 struct host1x_job_cmd *cmd = &cmds[i]; 121 122 if (cmd->is_wait) { 123 if (cmd->wait.relative) 124 threshold = job_syncpt_base + cmd->wait.threshold; 125 else 126 threshold = cmd->wait.threshold; 127 128 submit_wait(job, cmd->wait.id, threshold); 129 submit_setclass(job, cmd->wait.next_class); 130 } else { 131 struct host1x_job_gather *g = &cmd->gather; 132 133 dma_addr_t addr = g->base + g->offset; 134 u32 op2, op3; 135 136 op2 = lower_32_bits(addr); 137 op3 = upper_32_bits(addr); 138 139 trace_write_gather(cdma, g->bo, g->offset, g->words); 140 141 if (op3 != 0) { 142 #if HOST1X_HW >= 6 143 u32 op1 = host1x_opcode_gather_wide(g->words); 144 u32 op4 = HOST1X_OPCODE_NOP; 145 146 host1x_cdma_push_wide(cdma, op1, op2, op3, op4); 147 #else 148 dev_err(dev, "invalid gather for push buffer %pad\n", 149 &addr); 150 continue; 151 #endif 152 } else { 153 u32 op1 = host1x_opcode_gather(g->words); 154 155 host1x_cdma_push(cdma, op1, op2); 156 } 157 } 158 } 159 } 160 161 static inline void synchronize_syncpt_base(struct host1x_job *job) 162 { 163 struct host1x_syncpt *sp = job->syncpt; 164 unsigned int id; 165 u32 value; 166 167 value = host1x_syncpt_read_max(sp); 168 id = sp->base->id; 169 170 host1x_cdma_push(&job->channel->cdma, 171 host1x_opcode_setclass(HOST1X_CLASS_HOST1X, 172 HOST1X_UCLASS_LOAD_SYNCPT_BASE, 1), 173 HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(id) | 174 HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value)); 175 } 176 177 static void host1x_channel_set_streamid(struct host1x_channel *channel) 178 { 179 #if HOST1X_HW >= 6 180 u32 stream_id; 181 182 if (!tegra_dev_iommu_get_stream_id(channel->dev->parent, &stream_id)) 183 stream_id = TEGRA_STREAM_ID_BYPASS; 184 185 host1x_ch_writel(channel, stream_id, HOST1X_CHANNEL_SMMU_STREAMID); 186 #endif 187 } 188 189 static void host1x_enable_gather_filter(struct host1x_channel *ch) 190 { 191 #if HOST1X_HW >= 6 192 struct host1x *host = dev_get_drvdata(ch->dev->parent); 193 u32 val; 194 195 if (!host->hv_regs) 196 return; 197 198 val = host1x_hypervisor_readl( 199 host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); 200 val |= BIT(ch->id % 32); 201 host1x_hypervisor_writel( 202 host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); 203 #elif HOST1X_HW >= 4 204 host1x_ch_writel(ch, 205 HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), 206 HOST1X_CHANNEL_CHANNELCTRL); 207 #endif 208 } 209 210 static void channel_program_cdma(struct host1x_job *job) 211 { 212 struct host1x_cdma *cdma = &job->channel->cdma; 213 struct host1x_syncpt *sp = job->syncpt; 214 215 #if HOST1X_HW >= 6 216 u32 fence; 217 int i = 0; 218 219 if (job->num_cmds == 0) 220 goto prefences_done; 221 if (!job->cmds[0].is_wait || job->cmds[0].wait.relative) 222 goto prefences_done; 223 224 /* Enter host1x class with invalid stream ID for prefence waits. */ 225 host1x_cdma_push_wide(cdma, 226 host1x_opcode_acquire_mlock(1), 227 host1x_opcode_setclass(1, 0, 0), 228 host1x_opcode_setpayload(0), 229 host1x_opcode_setstreamid(0x1fffff)); 230 231 for (i = 0; i < job->num_cmds; i++) { 232 struct host1x_job_cmd *cmd = &job->cmds[i]; 233 234 if (!cmd->is_wait || cmd->wait.relative) 235 break; 236 237 submit_wait(job, cmd->wait.id, cmd->wait.threshold); 238 } 239 240 host1x_cdma_push(cdma, 241 HOST1X_OPCODE_NOP, 242 host1x_opcode_release_mlock(1)); 243 244 prefences_done: 245 /* Enter engine class with invalid stream ID. */ 246 host1x_cdma_push_wide(cdma, 247 host1x_opcode_acquire_mlock(job->class), 248 host1x_opcode_setclass(job->class, 0, 0), 249 host1x_opcode_setpayload(0), 250 host1x_opcode_setstreamid(job->engine_streamid_offset / 4)); 251 252 /* Before switching stream ID to real stream ID, ensure engine is idle. */ 253 fence = host1x_syncpt_incr_max(sp, 1); 254 host1x_cdma_push(&job->channel->cdma, 255 host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), 256 HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | 257 HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); 258 submit_wait(job, job->syncpt->id, fence); 259 submit_setclass(job, job->class); 260 261 /* Submit work. */ 262 job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); 263 submit_gathers(job, job->cmds + i, job->num_cmds - i, job->syncpt_end - job->syncpt_incrs); 264 265 /* Before releasing MLOCK, ensure engine is idle again. */ 266 fence = host1x_syncpt_incr_max(sp, 1); 267 host1x_cdma_push(&job->channel->cdma, 268 host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), 269 HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | 270 HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); 271 submit_wait(job, job->syncpt->id, fence); 272 273 /* Release MLOCK. */ 274 host1x_cdma_push(cdma, 275 HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class)); 276 #else 277 if (job->serialize) { 278 /* 279 * Force serialization by inserting a host wait for the 280 * previous job to finish before this one can commence. 281 */ 282 host1x_cdma_push(cdma, 283 host1x_opcode_setclass(HOST1X_CLASS_HOST1X, 284 host1x_uclass_wait_syncpt_r(), 1), 285 host1x_class_host_wait_syncpt(job->syncpt->id, 286 host1x_syncpt_read_max(sp))); 287 } 288 289 /* Synchronize base register to allow using it for relative waiting */ 290 if (sp->base) 291 synchronize_syncpt_base(job); 292 293 /* add a setclass for modules that require it */ 294 if (job->class) 295 host1x_cdma_push(cdma, 296 host1x_opcode_setclass(job->class, 0, 0), 297 HOST1X_OPCODE_NOP); 298 299 job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); 300 301 submit_gathers(job, job->cmds, job->num_cmds, job->syncpt_end - job->syncpt_incrs); 302 #endif 303 } 304 305 static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb) 306 { 307 struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb); 308 309 /* Schedules CDMA update. */ 310 host1x_cdma_update(&job->channel->cdma); 311 } 312 313 static int channel_submit(struct host1x_job *job) 314 { 315 struct host1x_channel *ch = job->channel; 316 struct host1x_syncpt *sp = job->syncpt; 317 u32 prev_max = 0; 318 u32 syncval; 319 int err; 320 struct host1x *host = dev_get_drvdata(ch->dev->parent); 321 322 trace_host1x_channel_submit(dev_name(ch->dev), 323 job->num_cmds, job->num_relocs, 324 job->syncpt->id, job->syncpt_incrs); 325 326 /* before error checks, return current max */ 327 prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); 328 329 /* get submit lock */ 330 err = mutex_lock_interruptible(&ch->submitlock); 331 if (err) 332 return err; 333 334 host1x_channel_set_streamid(ch); 335 host1x_enable_gather_filter(ch); 336 host1x_hw_syncpt_assign_to_channel(host, sp, ch); 337 338 /* begin a CDMA submit */ 339 err = host1x_cdma_begin(&ch->cdma, job); 340 if (err) { 341 mutex_unlock(&ch->submitlock); 342 return err; 343 } 344 345 channel_program_cdma(job); 346 syncval = host1x_syncpt_read_max(sp); 347 348 /* 349 * Create fence before submitting job to HW to avoid job completing 350 * before the fence is set up. 351 */ 352 job->fence = host1x_fence_create(sp, syncval, true); 353 if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) { 354 job->fence = NULL; 355 } else { 356 err = dma_fence_add_callback(job->fence, &job->fence_cb, 357 job_complete_callback); 358 } 359 360 /* end CDMA submit & stash pinned hMems into sync queue */ 361 host1x_cdma_end(&ch->cdma, job); 362 363 trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval); 364 365 mutex_unlock(&ch->submitlock); 366 367 if (err == -ENOENT) 368 host1x_cdma_update(&ch->cdma); 369 else 370 WARN(err, "Failed to set submit complete interrupt"); 371 372 return 0; 373 } 374 375 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, 376 unsigned int index) 377 { 378 #if HOST1X_HW < 6 379 ch->regs = dev->regs + index * 0x4000; 380 #else 381 ch->regs = dev->regs + index * 0x100; 382 #endif 383 return 0; 384 } 385 386 static const struct host1x_channel_ops host1x_channel_ops = { 387 .init = host1x_channel_init, 388 .submit = channel_submit, 389 }; 390