xref: /linux/drivers/gpu/drm/xe/xe_mmio.c (revision 429508c84d95811dd1300181dfe84743caff9a38)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2023 Intel Corporation
4  */
5 
6 #include "xe_mmio.h"
7 
8 #include <linux/delay.h>
9 #include <linux/io-64-nonatomic-lo-hi.h>
10 #include <linux/minmax.h>
11 #include <linux/pci.h>
12 
13 #include <drm/drm_managed.h>
14 #include <drm/drm_print.h>
15 
16 #include "regs/xe_bars.h"
17 #include "regs/xe_regs.h"
18 #include "xe_device.h"
19 #include "xe_gt.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_sriov_vf.h"
22 #include "xe_macros.h"
23 #include "xe_sriov.h"
24 
25 static void tiles_fini(void *arg)
26 {
27 	struct xe_device *xe = arg;
28 	struct xe_tile *tile;
29 	int id;
30 
31 	for_each_tile(tile, xe, id)
32 		tile->mmio.regs = NULL;
33 }
34 
35 int xe_mmio_probe_tiles(struct xe_device *xe)
36 {
37 	size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
38 	u8 id, tile_count = xe->info.tile_count;
39 	struct xe_gt *gt = xe_root_mmio_gt(xe);
40 	struct xe_tile *tile;
41 	void __iomem *regs;
42 	u32 mtcfg;
43 
44 	if (tile_count == 1)
45 		goto add_mmio_ext;
46 
47 	if (!xe->info.skip_mtcfg) {
48 		mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
49 		tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
50 		if (tile_count < xe->info.tile_count) {
51 			drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
52 					xe->info.tile_count, tile_count);
53 			xe->info.tile_count = tile_count;
54 
55 			/*
56 			 * FIXME: Needs some work for standalone media, but should be impossible
57 			 * with multi-tile for now.
58 			 */
59 			xe->info.gt_count = xe->info.tile_count;
60 		}
61 	}
62 
63 	regs = xe->mmio.regs;
64 	for_each_tile(tile, xe, id) {
65 		tile->mmio.size = tile_mmio_size;
66 		tile->mmio.regs = regs;
67 		regs += tile_mmio_size;
68 	}
69 
70 add_mmio_ext:
71 	/*
72 	 * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile).
73 	 * When supported, there could be an additional contiguous multi-tile MMIO extension
74 	 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces.
75 	 */
76 	if (xe->info.has_mmio_ext) {
77 		regs = xe->mmio.regs + tile_mmio_size * tile_count;
78 
79 		for_each_tile(tile, xe, id) {
80 			tile->mmio_ext.size = tile_mmio_ext_size;
81 			tile->mmio_ext.regs = regs;
82 
83 			regs += tile_mmio_ext_size;
84 		}
85 	}
86 
87 	return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe);
88 }
89 
90 static void mmio_fini(void *arg)
91 {
92 	struct xe_device *xe = arg;
93 
94 	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
95 	xe->mmio.regs = NULL;
96 }
97 
98 int xe_mmio_init(struct xe_device *xe)
99 {
100 	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
101 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
102 	const int mmio_bar = 0;
103 
104 	/*
105 	 * Map the entire BAR.
106 	 * The first 16MB of the BAR, belong to the root tile, and include:
107 	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
108 	 */
109 	xe->mmio.size = pci_resource_len(pdev, mmio_bar);
110 	xe->mmio.regs = pci_iomap(pdev, mmio_bar, GTTMMADR_BAR);
111 	if (xe->mmio.regs == NULL) {
112 		drm_err(&xe->drm, "failed to map registers\n");
113 		return -EIO;
114 	}
115 
116 	/* Setup first tile; other tiles (if present) will be setup later. */
117 	root_tile->mmio.size = SZ_16M;
118 	root_tile->mmio.regs = xe->mmio.regs;
119 
120 	return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
121 }
122 
123 u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
124 {
125 	struct xe_tile *tile = gt_to_tile(gt);
126 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
127 
128 	return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
129 }
130 
131 u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
132 {
133 	struct xe_tile *tile = gt_to_tile(gt);
134 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
135 
136 	return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
137 }
138 
139 void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
140 {
141 	struct xe_tile *tile = gt_to_tile(gt);
142 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
143 
144 	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
145 }
146 
147 u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
148 {
149 	struct xe_tile *tile = gt_to_tile(gt);
150 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
151 
152 	if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
153 		return xe_gt_sriov_vf_read32(gt, reg);
154 
155 	return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
156 }
157 
158 u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
159 {
160 	u32 old, reg_val;
161 
162 	old = xe_mmio_read32(gt, reg);
163 	reg_val = (old & ~clr) | set;
164 	xe_mmio_write32(gt, reg, reg_val);
165 
166 	return old;
167 }
168 
169 int xe_mmio_write32_and_verify(struct xe_gt *gt,
170 			       struct xe_reg reg, u32 val, u32 mask, u32 eval)
171 {
172 	u32 reg_val;
173 
174 	xe_mmio_write32(gt, reg, val);
175 	reg_val = xe_mmio_read32(gt, reg);
176 
177 	return (reg_val & mask) != eval ? -EINVAL : 0;
178 }
179 
180 bool xe_mmio_in_range(const struct xe_gt *gt,
181 		      const struct xe_mmio_range *range,
182 		      struct xe_reg reg)
183 {
184 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
185 
186 	return range && addr >= range->start && addr <= range->end;
187 }
188 
189 /**
190  * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
191  * @gt: MMIO target GT
192  * @reg: register to read value from
193  *
194  * Although Intel GPUs have some 64-bit registers, the hardware officially
195  * only supports GTTMMADR register reads of 32 bits or smaller.  Even if
196  * a readq operation may return a reasonable value, that violation of the
197  * spec shouldn't be relied upon and all 64-bit register reads should be
198  * performed as two 32-bit reads of the upper and lower dwords.
199  *
200  * When reading registers that may be changing (such as
201  * counters), a rollover of the lower dword between the two 32-bit reads
202  * can be problematic.  This function attempts to ensure the upper dword has
203  * stabilized before returning the 64-bit value.
204  *
205  * Note that because this function may re-read the register multiple times
206  * while waiting for the value to stabilize it should not be used to read
207  * any registers where read operations have side effects.
208  *
209  * Returns the value of the 64-bit register.
210  */
211 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
212 {
213 	struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
214 	u32 ldw, udw, oldudw, retries;
215 
216 	reg.addr = xe_mmio_adjusted_addr(gt, reg.addr);
217 	reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr);
218 
219 	/* we shouldn't adjust just one register address */
220 	xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4);
221 
222 	oldudw = xe_mmio_read32(gt, reg_udw);
223 	for (retries = 5; retries; --retries) {
224 		ldw = xe_mmio_read32(gt, reg);
225 		udw = xe_mmio_read32(gt, reg_udw);
226 
227 		if (udw == oldudw)
228 			break;
229 
230 		oldudw = udw;
231 	}
232 
233 	xe_gt_WARN(gt, retries == 0,
234 		   "64-bit read of %#x did not stabilize\n", reg.addr);
235 
236 	return (u64)udw << 32 | ldw;
237 }
238 
239 /**
240  * xe_mmio_wait32() - Wait for a register to match the desired masked value
241  * @gt: MMIO target GT
242  * @reg: register to read value from
243  * @mask: mask to be applied to the value read from the register
244  * @val: desired value after applying the mask
245  * @timeout_us: time out after this period of time. Wait logic tries to be
246  * smart, applying an exponential backoff until @timeout_us is reached.
247  * @out_val: if not NULL, points where to store the last unmasked value
248  * @atomic: needs to be true if calling from an atomic context
249  *
250  * This function polls for the desired masked value and returns zero on success
251  * or -ETIMEDOUT if timed out.
252  *
253  * Note that @timeout_us represents the minimum amount of time to wait before
254  * giving up. The actual time taken by this function can be a little more than
255  * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
256  * it is possible that this function succeeds even after @timeout_us has passed.
257  */
258 int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
259 		   u32 *out_val, bool atomic)
260 {
261 	ktime_t cur = ktime_get_raw();
262 	const ktime_t end = ktime_add_us(cur, timeout_us);
263 	int ret = -ETIMEDOUT;
264 	s64 wait = 10;
265 	u32 read;
266 
267 	for (;;) {
268 		read = xe_mmio_read32(gt, reg);
269 		if ((read & mask) == val) {
270 			ret = 0;
271 			break;
272 		}
273 
274 		cur = ktime_get_raw();
275 		if (!ktime_before(cur, end))
276 			break;
277 
278 		if (ktime_after(ktime_add_us(cur, wait), end))
279 			wait = ktime_us_delta(end, cur);
280 
281 		if (atomic)
282 			udelay(wait);
283 		else
284 			usleep_range(wait, wait << 1);
285 		wait <<= 1;
286 	}
287 
288 	if (ret != 0) {
289 		read = xe_mmio_read32(gt, reg);
290 		if ((read & mask) == val)
291 			ret = 0;
292 	}
293 
294 	if (out_val)
295 		*out_val = read;
296 
297 	return ret;
298 }
299 
300 /**
301  * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
302  * @gt: MMIO target GT
303  * @reg: register to read value from
304  * @mask: mask to be applied to the value read from the register
305  * @val: value to match after applying the mask
306  * @timeout_us: time out after this period of time. Wait logic tries to be
307  * smart, applying an exponential backoff until @timeout_us is reached.
308  * @out_val: if not NULL, points where to store the last unmasked value
309  * @atomic: needs to be true if calling from an atomic context
310  *
311  * This function polls for a masked value to change from a given value and
312  * returns zero on success or -ETIMEDOUT if timed out.
313  *
314  * Note that @timeout_us represents the minimum amount of time to wait before
315  * giving up. The actual time taken by this function can be a little more than
316  * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
317  * it is possible that this function succeeds even after @timeout_us has passed.
318  */
319 int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
320 		       u32 *out_val, bool atomic)
321 {
322 	ktime_t cur = ktime_get_raw();
323 	const ktime_t end = ktime_add_us(cur, timeout_us);
324 	int ret = -ETIMEDOUT;
325 	s64 wait = 10;
326 	u32 read;
327 
328 	for (;;) {
329 		read = xe_mmio_read32(gt, reg);
330 		if ((read & mask) != val) {
331 			ret = 0;
332 			break;
333 		}
334 
335 		cur = ktime_get_raw();
336 		if (!ktime_before(cur, end))
337 			break;
338 
339 		if (ktime_after(ktime_add_us(cur, wait), end))
340 			wait = ktime_us_delta(end, cur);
341 
342 		if (atomic)
343 			udelay(wait);
344 		else
345 			usleep_range(wait, wait << 1);
346 		wait <<= 1;
347 	}
348 
349 	if (ret != 0) {
350 		read = xe_mmio_read32(gt, reg);
351 		if ((read & mask) != val)
352 			ret = 0;
353 	}
354 
355 	if (out_val)
356 		*out_val = read;
357 
358 	return ret;
359 }
360