xref: /linux/drivers/gpu/drm/xe/xe_mmio.c (revision beb5a9bea8239cdf4adf6b62672e30db3e9fa5ce)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2023 Intel Corporation
4  */
5 
6 #include "xe_mmio.h"
7 
8 #include <linux/delay.h>
9 #include <linux/io-64-nonatomic-lo-hi.h>
10 #include <linux/minmax.h>
11 #include <linux/pci.h>
12 
13 #include <drm/drm_managed.h>
14 #include <drm/drm_print.h>
15 
16 #include "regs/xe_bars.h"
17 #include "regs/xe_regs.h"
18 #include "xe_device.h"
19 #include "xe_gt.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_sriov_vf.h"
22 #include "xe_macros.h"
23 #include "xe_sriov.h"
24 #include "xe_trace.h"
25 
26 static void tiles_fini(void *arg)
27 {
28 	struct xe_device *xe = arg;
29 	struct xe_tile *tile;
30 	int id;
31 
32 	for_each_tile(tile, xe, id)
33 		if (tile != xe_device_get_root_tile(xe))
34 			tile->mmio.regs = NULL;
35 }
36 
37 int xe_mmio_probe_tiles(struct xe_device *xe)
38 {
39 	size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
40 	u8 id, tile_count = xe->info.tile_count;
41 	struct xe_gt *gt = xe_root_mmio_gt(xe);
42 	struct xe_tile *tile;
43 	void __iomem *regs;
44 	u32 mtcfg;
45 
46 	if (tile_count == 1)
47 		goto add_mmio_ext;
48 
49 	if (!xe->info.skip_mtcfg) {
50 		mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
51 		tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
52 		if (tile_count < xe->info.tile_count) {
53 			drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
54 					xe->info.tile_count, tile_count);
55 			xe->info.tile_count = tile_count;
56 
57 			/*
58 			 * FIXME: Needs some work for standalone media, but should be impossible
59 			 * with multi-tile for now.
60 			 */
61 			xe->info.gt_count = xe->info.tile_count;
62 		}
63 	}
64 
65 	regs = xe->mmio.regs;
66 	for_each_tile(tile, xe, id) {
67 		tile->mmio.size = tile_mmio_size;
68 		tile->mmio.regs = regs;
69 		regs += tile_mmio_size;
70 	}
71 
72 add_mmio_ext:
73 	/*
74 	 * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile).
75 	 * When supported, there could be an additional contiguous multi-tile MMIO extension
76 	 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces.
77 	 */
78 	if (xe->info.has_mmio_ext) {
79 		regs = xe->mmio.regs + tile_mmio_size * tile_count;
80 
81 		for_each_tile(tile, xe, id) {
82 			tile->mmio_ext.size = tile_mmio_ext_size;
83 			tile->mmio_ext.regs = regs;
84 
85 			regs += tile_mmio_ext_size;
86 		}
87 	}
88 
89 	return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe);
90 }
91 
92 static void mmio_fini(void *arg)
93 {
94 	struct xe_device *xe = arg;
95 	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
96 
97 	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
98 	xe->mmio.regs = NULL;
99 	root_tile->mmio.regs = NULL;
100 }
101 
102 int xe_mmio_init(struct xe_device *xe)
103 {
104 	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
105 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
106 	const int mmio_bar = 0;
107 
108 	/*
109 	 * Map the entire BAR.
110 	 * The first 16MB of the BAR, belong to the root tile, and include:
111 	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
112 	 */
113 	xe->mmio.size = pci_resource_len(pdev, mmio_bar);
114 	xe->mmio.regs = pci_iomap(pdev, mmio_bar, GTTMMADR_BAR);
115 	if (xe->mmio.regs == NULL) {
116 		drm_err(&xe->drm, "failed to map registers\n");
117 		return -EIO;
118 	}
119 
120 	/* Setup first tile; other tiles (if present) will be setup later. */
121 	root_tile->mmio.size = SZ_16M;
122 	root_tile->mmio.regs = xe->mmio.regs;
123 
124 	return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
125 }
126 
127 static void mmio_flush_pending_writes(struct xe_gt *gt)
128 {
129 #define DUMMY_REG_OFFSET	0x130030
130 	struct xe_tile *tile = gt_to_tile(gt);
131 	int i;
132 
133 	if (tile->xe->info.platform != XE_LUNARLAKE)
134 		return;
135 
136 	/* 4 dummy writes */
137 	for (i = 0; i < 4; i++)
138 		writel(0, tile->mmio.regs + DUMMY_REG_OFFSET);
139 }
140 
141 u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
142 {
143 	struct xe_tile *tile = gt_to_tile(gt);
144 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
145 	u8 val;
146 
147 	/* Wa_15015404425 */
148 	mmio_flush_pending_writes(gt);
149 
150 	val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
151 	trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
152 
153 	return val;
154 }
155 
156 u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
157 {
158 	struct xe_tile *tile = gt_to_tile(gt);
159 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
160 	u16 val;
161 
162 	/* Wa_15015404425 */
163 	mmio_flush_pending_writes(gt);
164 
165 	val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
166 	trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
167 
168 	return val;
169 }
170 
171 void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
172 {
173 	struct xe_tile *tile = gt_to_tile(gt);
174 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
175 
176 	trace_xe_reg_rw(gt, true, addr, val, sizeof(val));
177 	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
178 }
179 
180 u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
181 {
182 	struct xe_tile *tile = gt_to_tile(gt);
183 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
184 	u32 val;
185 
186 	/* Wa_15015404425 */
187 	mmio_flush_pending_writes(gt);
188 
189 	if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
190 		val = xe_gt_sriov_vf_read32(gt, reg);
191 	else
192 		val = readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
193 
194 	trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
195 
196 	return val;
197 }
198 
199 u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
200 {
201 	u32 old, reg_val;
202 
203 	old = xe_mmio_read32(gt, reg);
204 	reg_val = (old & ~clr) | set;
205 	xe_mmio_write32(gt, reg, reg_val);
206 
207 	return old;
208 }
209 
210 int xe_mmio_write32_and_verify(struct xe_gt *gt,
211 			       struct xe_reg reg, u32 val, u32 mask, u32 eval)
212 {
213 	u32 reg_val;
214 
215 	xe_mmio_write32(gt, reg, val);
216 	reg_val = xe_mmio_read32(gt, reg);
217 
218 	return (reg_val & mask) != eval ? -EINVAL : 0;
219 }
220 
221 bool xe_mmio_in_range(const struct xe_gt *gt,
222 		      const struct xe_mmio_range *range,
223 		      struct xe_reg reg)
224 {
225 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
226 
227 	return range && addr >= range->start && addr <= range->end;
228 }
229 
230 /**
231  * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
232  * @gt: MMIO target GT
233  * @reg: register to read value from
234  *
235  * Although Intel GPUs have some 64-bit registers, the hardware officially
236  * only supports GTTMMADR register reads of 32 bits or smaller.  Even if
237  * a readq operation may return a reasonable value, that violation of the
238  * spec shouldn't be relied upon and all 64-bit register reads should be
239  * performed as two 32-bit reads of the upper and lower dwords.
240  *
241  * When reading registers that may be changing (such as
242  * counters), a rollover of the lower dword between the two 32-bit reads
243  * can be problematic.  This function attempts to ensure the upper dword has
244  * stabilized before returning the 64-bit value.
245  *
246  * Note that because this function may re-read the register multiple times
247  * while waiting for the value to stabilize it should not be used to read
248  * any registers where read operations have side effects.
249  *
250  * Returns the value of the 64-bit register.
251  */
252 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
253 {
254 	struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
255 	u32 ldw, udw, oldudw, retries;
256 
257 	reg.addr = xe_mmio_adjusted_addr(gt, reg.addr);
258 	reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr);
259 
260 	/* we shouldn't adjust just one register address */
261 	xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4);
262 
263 	oldudw = xe_mmio_read32(gt, reg_udw);
264 	for (retries = 5; retries; --retries) {
265 		ldw = xe_mmio_read32(gt, reg);
266 		udw = xe_mmio_read32(gt, reg_udw);
267 
268 		if (udw == oldudw)
269 			break;
270 
271 		oldudw = udw;
272 	}
273 
274 	xe_gt_WARN(gt, retries == 0,
275 		   "64-bit read of %#x did not stabilize\n", reg.addr);
276 
277 	return (u64)udw << 32 | ldw;
278 }
279 
280 /**
281  * xe_mmio_wait32() - Wait for a register to match the desired masked value
282  * @gt: MMIO target GT
283  * @reg: register to read value from
284  * @mask: mask to be applied to the value read from the register
285  * @val: desired value after applying the mask
286  * @timeout_us: time out after this period of time. Wait logic tries to be
287  * smart, applying an exponential backoff until @timeout_us is reached.
288  * @out_val: if not NULL, points where to store the last unmasked value
289  * @atomic: needs to be true if calling from an atomic context
290  *
291  * This function polls for the desired masked value and returns zero on success
292  * or -ETIMEDOUT if timed out.
293  *
294  * Note that @timeout_us represents the minimum amount of time to wait before
295  * giving up. The actual time taken by this function can be a little more than
296  * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
297  * it is possible that this function succeeds even after @timeout_us has passed.
298  */
299 int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
300 		   u32 *out_val, bool atomic)
301 {
302 	ktime_t cur = ktime_get_raw();
303 	const ktime_t end = ktime_add_us(cur, timeout_us);
304 	int ret = -ETIMEDOUT;
305 	s64 wait = 10;
306 	u32 read;
307 
308 	for (;;) {
309 		read = xe_mmio_read32(gt, reg);
310 		if ((read & mask) == val) {
311 			ret = 0;
312 			break;
313 		}
314 
315 		cur = ktime_get_raw();
316 		if (!ktime_before(cur, end))
317 			break;
318 
319 		if (ktime_after(ktime_add_us(cur, wait), end))
320 			wait = ktime_us_delta(end, cur);
321 
322 		if (atomic)
323 			udelay(wait);
324 		else
325 			usleep_range(wait, wait << 1);
326 		wait <<= 1;
327 	}
328 
329 	if (ret != 0) {
330 		read = xe_mmio_read32(gt, reg);
331 		if ((read & mask) == val)
332 			ret = 0;
333 	}
334 
335 	if (out_val)
336 		*out_val = read;
337 
338 	return ret;
339 }
340 
341 /**
342  * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
343  * @gt: MMIO target GT
344  * @reg: register to read value from
345  * @mask: mask to be applied to the value read from the register
346  * @val: value to match after applying the mask
347  * @timeout_us: time out after this period of time. Wait logic tries to be
348  * smart, applying an exponential backoff until @timeout_us is reached.
349  * @out_val: if not NULL, points where to store the last unmasked value
350  * @atomic: needs to be true if calling from an atomic context
351  *
352  * This function polls for a masked value to change from a given value and
353  * returns zero on success or -ETIMEDOUT if timed out.
354  *
355  * Note that @timeout_us represents the minimum amount of time to wait before
356  * giving up. The actual time taken by this function can be a little more than
357  * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
358  * it is possible that this function succeeds even after @timeout_us has passed.
359  */
360 int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
361 		       u32 *out_val, bool atomic)
362 {
363 	ktime_t cur = ktime_get_raw();
364 	const ktime_t end = ktime_add_us(cur, timeout_us);
365 	int ret = -ETIMEDOUT;
366 	s64 wait = 10;
367 	u32 read;
368 
369 	for (;;) {
370 		read = xe_mmio_read32(gt, reg);
371 		if ((read & mask) != val) {
372 			ret = 0;
373 			break;
374 		}
375 
376 		cur = ktime_get_raw();
377 		if (!ktime_before(cur, end))
378 			break;
379 
380 		if (ktime_after(ktime_add_us(cur, wait), end))
381 			wait = ktime_us_delta(end, cur);
382 
383 		if (atomic)
384 			udelay(wait);
385 		else
386 			usleep_range(wait, wait << 1);
387 		wait <<= 1;
388 	}
389 
390 	if (ret != 0) {
391 		read = xe_mmio_read32(gt, reg);
392 		if ((read & mask) != val)
393 			ret = 0;
394 	}
395 
396 	if (out_val)
397 		*out_val = read;
398 
399 	return ret;
400 }
401