xref: /linux/drivers/gpu/drm/xe/xe_mmio.c (revision 68a052239fc4b351e961f698b824f7654a346091)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2023 Intel Corporation
4  */
5 
6 #include "xe_mmio.h"
7 
8 #include <linux/delay.h>
9 #include <linux/io-64-nonatomic-lo-hi.h>
10 #include <linux/minmax.h>
11 #include <linux/pci.h>
12 
13 #include <drm/drm_managed.h>
14 #include <drm/drm_print.h>
15 
16 #include "regs/xe_bars.h"
17 #include "regs/xe_regs.h"
18 #include "xe_device.h"
19 #include "xe_gt.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_sriov_vf.h"
22 #include "xe_macros.h"
23 #include "xe_sriov.h"
24 #include "xe_trace.h"
25 #include "xe_wa.h"
26 
27 #include "generated/xe_device_wa_oob.h"
28 
29 static void tiles_fini(void *arg)
30 {
31 	struct xe_device *xe = arg;
32 	struct xe_tile *tile;
33 	int id;
34 
35 	for_each_remote_tile(tile, xe, id)
36 		tile->mmio.regs = NULL;
37 }
38 
39 /*
40  * On multi-tile devices, partition the BAR space for MMIO on each tile,
41  * possibly accounting for register override on the number of tiles available.
42  * tile_mmio_size contains both the tile's 4MB register space, as well as
43  * additional space for the GTT and other (possibly unused) regions).
44  * Resulting memory layout is like below:
45  *
46  * .----------------------. <- tile_count * tile_mmio_size
47  * |         ....         |
48  * |----------------------| <- 2 * tile_mmio_size
49  * |   tile1 GTT + other  |
50  * |----------------------| <- 1 * tile_mmio_size + 4MB
51  * |   tile1->mmio.regs   |
52  * |----------------------| <- 1 * tile_mmio_size
53  * |   tile0 GTT + other  |
54  * |----------------------| <- 4MB
55  * |   tile0->mmio.regs   |
56  * '----------------------' <- 0MB
57  */
58 static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
59 {
60 	struct xe_tile *tile;
61 	u8 id;
62 
63 	/*
64 	 * Nothing to be done as tile 0 has already been setup earlier with the
65 	 * entire BAR mapped - see xe_mmio_probe_early()
66 	 */
67 	if (xe->info.tile_count == 1)
68 		return;
69 
70 	for_each_remote_tile(tile, xe, id)
71 		xe_mmio_init(&tile->mmio, tile, xe->mmio.regs + id * tile_mmio_size, SZ_4M);
72 }
73 
74 int xe_mmio_probe_tiles(struct xe_device *xe)
75 {
76 	size_t tile_mmio_size = SZ_16M;
77 
78 	mmio_multi_tile_setup(xe, tile_mmio_size);
79 
80 	return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe);
81 }
82 
83 static void mmio_fini(void *arg)
84 {
85 	struct xe_device *xe = arg;
86 	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
87 
88 	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
89 	xe->mmio.regs = NULL;
90 	root_tile->mmio.regs = NULL;
91 }
92 
93 int xe_mmio_probe_early(struct xe_device *xe)
94 {
95 	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
96 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
97 
98 	/*
99 	 * Map the entire BAR.
100 	 * The first 16MB of the BAR, belong to the root tile, and include:
101 	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
102 	 */
103 	xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR);
104 	xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0);
105 	if (!xe->mmio.regs) {
106 		drm_err(&xe->drm, "failed to map registers\n");
107 		return -EIO;
108 	}
109 
110 	/* Setup first tile; other tiles (if present) will be setup later. */
111 	xe_mmio_init(&root_tile->mmio, root_tile, xe->mmio.regs, SZ_4M);
112 
113 	return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
114 }
115 ALLOW_ERROR_INJECTION(xe_mmio_probe_early, ERRNO); /* See xe_pci_probe() */
116 
117 /**
118  * xe_mmio_init() - Initialize an MMIO instance
119  * @mmio: Pointer to the MMIO instance to initialize
120  * @tile: The tile to which the MMIO region belongs
121  * @ptr: Pointer to the start of the MMIO region
122  * @size: The size of the MMIO region in bytes
123  *
124  * This is a convenience function for minimal initialization of struct xe_mmio.
125  */
126 void xe_mmio_init(struct xe_mmio *mmio, struct xe_tile *tile, void __iomem *ptr, u32 size)
127 {
128 	xe_tile_assert(tile, size <= XE_REG_ADDR_MAX);
129 
130 	mmio->regs = ptr;
131 	mmio->regs_size = size;
132 	mmio->tile = tile;
133 }
134 
135 static void mmio_flush_pending_writes(struct xe_mmio *mmio)
136 {
137 #define DUMMY_REG_OFFSET	0x130030
138 	int i;
139 
140 	if (!XE_DEVICE_WA(mmio->tile->xe, 15015404425))
141 		return;
142 
143 	/* 4 dummy writes */
144 	for (i = 0; i < 4; i++)
145 		writel(0, mmio->regs + DUMMY_REG_OFFSET);
146 }
147 
148 u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg)
149 {
150 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
151 	u8 val;
152 
153 	mmio_flush_pending_writes(mmio);
154 
155 	val = readb(mmio->regs + addr);
156 	trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
157 
158 	return val;
159 }
160 
161 u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg)
162 {
163 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
164 	u16 val;
165 
166 	mmio_flush_pending_writes(mmio);
167 
168 	val = readw(mmio->regs + addr);
169 	trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
170 
171 	return val;
172 }
173 
174 void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val)
175 {
176 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
177 
178 	trace_xe_reg_rw(mmio, true, addr, val, sizeof(val));
179 
180 	if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe))
181 		xe_gt_sriov_vf_write32(mmio->sriov_vf_gt ?:
182 				       mmio->tile->primary_gt, reg, val);
183 	else
184 		writel(val, mmio->regs + addr);
185 }
186 
187 u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg)
188 {
189 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
190 	u32 val;
191 
192 	mmio_flush_pending_writes(mmio);
193 
194 	if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe))
195 		val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt ?:
196 					    mmio->tile->primary_gt, reg);
197 	else
198 		val = readl(mmio->regs + addr);
199 
200 	trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
201 
202 	return val;
203 }
204 
205 u32 xe_mmio_rmw32(struct xe_mmio *mmio, struct xe_reg reg, u32 clr, u32 set)
206 {
207 	u32 old, reg_val;
208 
209 	old = xe_mmio_read32(mmio, reg);
210 	reg_val = (old & ~clr) | set;
211 	xe_mmio_write32(mmio, reg, reg_val);
212 
213 	return old;
214 }
215 
216 int xe_mmio_write32_and_verify(struct xe_mmio *mmio,
217 			       struct xe_reg reg, u32 val, u32 mask, u32 eval)
218 {
219 	u32 reg_val;
220 
221 	xe_mmio_write32(mmio, reg, val);
222 	reg_val = xe_mmio_read32(mmio, reg);
223 
224 	return (reg_val & mask) != eval ? -EINVAL : 0;
225 }
226 
227 bool xe_mmio_in_range(const struct xe_mmio *mmio,
228 		      const struct xe_mmio_range *range,
229 		      struct xe_reg reg)
230 {
231 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
232 
233 	return range && addr >= range->start && addr <= range->end;
234 }
235 
236 /**
237  * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
238  * @mmio: MMIO target
239  * @reg: register to read value from
240  *
241  * Although Intel GPUs have some 64-bit registers, the hardware officially
242  * only supports GTTMMADR register reads of 32 bits or smaller.  Even if
243  * a readq operation may return a reasonable value, that violation of the
244  * spec shouldn't be relied upon and all 64-bit register reads should be
245  * performed as two 32-bit reads of the upper and lower dwords.
246  *
247  * When reading registers that may be changing (such as
248  * counters), a rollover of the lower dword between the two 32-bit reads
249  * can be problematic.  This function attempts to ensure the upper dword has
250  * stabilized before returning the 64-bit value.
251  *
252  * Note that because this function may re-read the register multiple times
253  * while waiting for the value to stabilize it should not be used to read
254  * any registers where read operations have side effects.
255  *
256  * Returns the value of the 64-bit register.
257  */
258 u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg)
259 {
260 	struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
261 	u32 ldw, udw, oldudw, retries;
262 
263 	reg.addr = xe_mmio_adjusted_addr(mmio, reg.addr);
264 	reg_udw.addr = xe_mmio_adjusted_addr(mmio, reg_udw.addr);
265 
266 	/* we shouldn't adjust just one register address */
267 	xe_tile_assert(mmio->tile, reg_udw.addr == reg.addr + 0x4);
268 
269 	oldudw = xe_mmio_read32(mmio, reg_udw);
270 	for (retries = 5; retries; --retries) {
271 		ldw = xe_mmio_read32(mmio, reg);
272 		udw = xe_mmio_read32(mmio, reg_udw);
273 
274 		if (udw == oldudw)
275 			break;
276 
277 		oldudw = udw;
278 	}
279 
280 	drm_WARN(&mmio->tile->xe->drm, retries == 0,
281 		 "64-bit read of %#x did not stabilize\n", reg.addr);
282 
283 	return (u64)udw << 32 | ldw;
284 }
285 
286 static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val,
287 			    u32 timeout_us, u32 *out_val, bool atomic, bool expect_match)
288 {
289 	ktime_t cur = ktime_get_raw();
290 	const ktime_t end = ktime_add_us(cur, timeout_us);
291 	int ret = -ETIMEDOUT;
292 	s64 wait = 10;
293 	u32 read;
294 	bool check;
295 
296 	for (;;) {
297 		read = xe_mmio_read32(mmio, reg);
298 
299 		check = (read & mask) == val;
300 		if (!expect_match)
301 			check = !check;
302 
303 		if (check) {
304 			ret = 0;
305 			break;
306 		}
307 
308 		cur = ktime_get_raw();
309 		if (!ktime_before(cur, end))
310 			break;
311 
312 		if (ktime_after(ktime_add_us(cur, wait), end))
313 			wait = ktime_us_delta(end, cur);
314 
315 		if (atomic)
316 			udelay(wait);
317 		else
318 			usleep_range(wait, wait << 1);
319 		wait <<= 1;
320 	}
321 
322 	if (ret != 0) {
323 		read = xe_mmio_read32(mmio, reg);
324 
325 		check = (read & mask) == val;
326 		if (!expect_match)
327 			check = !check;
328 
329 		if (check)
330 			ret = 0;
331 	}
332 
333 	if (out_val)
334 		*out_val = read;
335 
336 	return ret;
337 }
338 
339 /**
340  * xe_mmio_wait32() - Wait for a register to match the desired masked value
341  * @mmio: MMIO target
342  * @reg: register to read value from
343  * @mask: mask to be applied to the value read from the register
344  * @val: desired value after applying the mask
345  * @timeout_us: time out after this period of time. Wait logic tries to be
346  * smart, applying an exponential backoff until @timeout_us is reached.
347  * @out_val: if not NULL, points where to store the last unmasked value
348  * @atomic: needs to be true if calling from an atomic context
349  *
350  * This function polls for the desired masked value and returns zero on success
351  * or -ETIMEDOUT if timed out.
352  *
353  * Note that @timeout_us represents the minimum amount of time to wait before
354  * giving up. The actual time taken by this function can be a little more than
355  * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
356  * it is possible that this function succeeds even after @timeout_us has passed.
357  */
358 int xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
359 		   u32 *out_val, bool atomic)
360 {
361 	return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, true);
362 }
363 
364 /**
365  * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
366  * @mmio: MMIO target
367  * @reg: register to read value from
368  * @mask: mask to be applied to the value read from the register
369  * @val: value not to be matched after applying the mask
370  * @timeout_us: time out after this period of time
371  * @out_val: if not NULL, points where to store the last unmasked value
372  * @atomic: needs to be true if calling from an atomic context
373  *
374  * This function works exactly like xe_mmio_wait32() with the exception that
375  * @val is expected not to be matched.
376  */
377 int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
378 		       u32 *out_val, bool atomic)
379 {
380 	return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false);
381 }
382