xref: /linux/drivers/pci/controller/pci-xgene-msi.c (revision 0bd0a41a5120f78685a132834865b0a631b9026a)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * APM X-Gene MSI Driver
4  *
5  * Copyright (c) 2014, Applied Micro Circuits Corporation
6  * Author: Tanmay Inamdar <tinamdar@apm.com>
7  *	   Duc Dang <dhdang@apm.com>
8  */
9 #include <linux/bitfield.h>
10 #include <linux/cpu.h>
11 #include <linux/interrupt.h>
12 #include <linux/irqdomain.h>
13 #include <linux/module.h>
14 #include <linux/msi.h>
15 #include <linux/irqchip/chained_irq.h>
16 #include <linux/irqchip/irq-msi-lib.h>
17 #include <linux/pci.h>
18 #include <linux/platform_device.h>
19 #include <linux/of_pci.h>
20 
21 #define MSI_IR0			0x000000
22 #define MSI_INT0		0x800000
23 #define IDX_PER_GROUP		8
24 #define IRQS_PER_IDX		16
25 #define NR_HW_IRQS		16
26 #define NR_MSI_BITS		(IDX_PER_GROUP * IRQS_PER_IDX * NR_HW_IRQS)
27 #define NR_MSI_VEC		(NR_MSI_BITS / num_possible_cpus())
28 
29 #define MSI_GROUP_MASK		GENMASK(22, 19)
30 #define MSI_INDEX_MASK		GENMASK(18, 16)
31 #define MSI_INTR_MASK		GENMASK(19, 16)
32 
33 #define MSInRx_HWIRQ_MASK	GENMASK(6, 4)
34 #define DATA_HWIRQ_MASK		GENMASK(3, 0)
35 
36 struct xgene_msi {
37 	struct irq_domain	*inner_domain;
38 	u64			msi_addr;
39 	void __iomem		*msi_regs;
40 	unsigned long		*bitmap;
41 	struct mutex		bitmap_lock;
42 	unsigned int		gic_irq[NR_HW_IRQS];
43 };
44 
45 /* Global data */
46 static struct xgene_msi *xgene_msi_ctrl;
47 
48 /*
49  * X-Gene v1 has 16 frames of MSI termination registers MSInIRx, where n is
50  * frame number (0..15), x is index of registers in each frame (0..7).  Each
51  * 32b register is at the beginning of a 64kB region, each frame occupying
52  * 512kB (and the whole thing 8MB of PA space).
53  *
54  * Each register supports 16 MSI vectors (0..15) to generate interrupts. A
55  * write to the MSInIRx from the PCI side generates an interrupt. A read
56  * from the MSInRx on the CPU side returns a bitmap of the pending MSIs in
57  * the lower 16 bits. A side effect of this read is that all pending
58  * interrupts are acknowledged and cleared).
59  *
60  * Additionally, each MSI termination frame has 1 MSIINTn register (n is
61  * 0..15) to indicate the MSI pending status caused by any of its 8
62  * termination registers, reported as a bitmap in the lower 8 bits. Each 32b
63  * register is at the beginning of a 64kB region (and overall occupying an
64  * extra 1MB).
65  *
66  * There is one GIC IRQ assigned for each MSI termination frame, 16 in
67  * total.
68  *
69  * The register layout is as follows:
70  * MSI0IR0			base_addr
71  * MSI0IR1			base_addr +  0x10000
72  * ...				...
73  * MSI0IR6			base_addr +  0x60000
74  * MSI0IR7			base_addr +  0x70000
75  * MSI1IR0			base_addr +  0x80000
76  * MSI1IR1			base_addr +  0x90000
77  * ...				...
78  * MSI1IR7			base_addr +  0xF0000
79  * MSI2IR0			base_addr + 0x100000
80  * ...				...
81  * MSIFIR0			base_addr + 0x780000
82  * MSIFIR1			base_addr + 0x790000
83  * ...				...
84  * MSIFIR7			base_addr + 0x7F0000
85  * MSIINT0			base_addr + 0x800000
86  * MSIINT1			base_addr + 0x810000
87  * ...				...
88  * MSIINTF			base_addr + 0x8F0000
89  */
90 
91 /* MSInIRx read helper */
xgene_msi_ir_read(struct xgene_msi * msi,u32 msi_grp,u32 msir_idx)92 static u32 xgene_msi_ir_read(struct xgene_msi *msi, u32 msi_grp, u32 msir_idx)
93 {
94 	return readl_relaxed(msi->msi_regs + MSI_IR0 +
95 			     (FIELD_PREP(MSI_GROUP_MASK, msi_grp) |
96 			      FIELD_PREP(MSI_INDEX_MASK, msir_idx)));
97 }
98 
99 /* MSIINTn read helper */
xgene_msi_int_read(struct xgene_msi * msi,u32 msi_grp)100 static u32 xgene_msi_int_read(struct xgene_msi *msi, u32 msi_grp)
101 {
102 	return readl_relaxed(msi->msi_regs + MSI_INT0 +
103 			     FIELD_PREP(MSI_INTR_MASK, msi_grp));
104 }
105 
106 /*
107  * In order to allow an MSI to be moved from one CPU to another without
108  * having to repaint both the address and the data (which cannot be done
109  * atomically), we statically partitions the MSI frames between CPUs. Given
110  * that XGene-1 has 8 CPUs, each CPU gets two frames assigned to it
111  *
112  * We adopt the convention that when an MSI is moved, it is configured to
113  * target the same register number in the congruent frame assigned to the
114  * new target CPU. This reserves a given MSI across all CPUs, and reduces
115  * the MSI capacity from 2048 to 256.
116  *
117  * Effectively, this amounts to:
118  * - hwirq[7]::cpu[2:0] is the target frame number (n in MSInIRx)
119  * - hwirq[6:4] is the register index in any given frame (x in MSInIRx)
120  * - hwirq[3:0] is the MSI data
121  */
compute_hwirq(u8 frame,u8 index,u8 data)122 static irq_hw_number_t compute_hwirq(u8 frame, u8 index, u8 data)
123 {
124 	return (FIELD_PREP(BIT(7), FIELD_GET(BIT(3), frame))	|
125 		FIELD_PREP(MSInRx_HWIRQ_MASK, index)		|
126 		FIELD_PREP(DATA_HWIRQ_MASK, data));
127 }
128 
xgene_compose_msi_msg(struct irq_data * data,struct msi_msg * msg)129 static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
130 {
131 	struct xgene_msi *msi = irq_data_get_irq_chip_data(data);
132 	u64 target_addr;
133 	u32 frame, msir;
134 	int cpu;
135 
136 	cpu	= cpumask_first(irq_data_get_effective_affinity_mask(data));
137 	msir	= FIELD_GET(MSInRx_HWIRQ_MASK, data->hwirq);
138 	frame	= FIELD_PREP(BIT(3), FIELD_GET(BIT(7), data->hwirq)) | cpu;
139 
140 	target_addr = msi->msi_addr;
141 	target_addr += (FIELD_PREP(MSI_GROUP_MASK, frame) |
142 			FIELD_PREP(MSI_INTR_MASK, msir));
143 
144 	msg->address_hi = upper_32_bits(target_addr);
145 	msg->address_lo = lower_32_bits(target_addr);
146 	msg->data = FIELD_GET(DATA_HWIRQ_MASK, data->hwirq);
147 }
148 
xgene_msi_set_affinity(struct irq_data * irqdata,const struct cpumask * mask,bool force)149 static int xgene_msi_set_affinity(struct irq_data *irqdata,
150 				  const struct cpumask *mask, bool force)
151 {
152 	int target_cpu = cpumask_first(mask);
153 
154 	irq_data_update_effective_affinity(irqdata, cpumask_of(target_cpu));
155 
156 	/* Force the core code to regenerate the message */
157 	return IRQ_SET_MASK_OK;
158 }
159 
160 static struct irq_chip xgene_msi_bottom_irq_chip = {
161 	.name			= "MSI",
162 	.irq_set_affinity       = xgene_msi_set_affinity,
163 	.irq_compose_msi_msg	= xgene_compose_msi_msg,
164 };
165 
xgene_irq_domain_alloc(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs,void * args)166 static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
167 				  unsigned int nr_irqs, void *args)
168 {
169 	struct xgene_msi *msi = domain->host_data;
170 	irq_hw_number_t hwirq;
171 
172 	mutex_lock(&msi->bitmap_lock);
173 
174 	hwirq = find_first_zero_bit(msi->bitmap, NR_MSI_VEC);
175 	if (hwirq < NR_MSI_VEC)
176 		set_bit(hwirq, msi->bitmap);
177 
178 	mutex_unlock(&msi->bitmap_lock);
179 
180 	if (hwirq >= NR_MSI_VEC)
181 		return -ENOSPC;
182 
183 	irq_domain_set_info(domain, virq, hwirq,
184 			    &xgene_msi_bottom_irq_chip, domain->host_data,
185 			    handle_simple_irq, NULL, NULL);
186 	irqd_set_resend_when_in_progress(irq_get_irq_data(virq));
187 
188 	return 0;
189 }
190 
xgene_irq_domain_free(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs)191 static void xgene_irq_domain_free(struct irq_domain *domain,
192 				  unsigned int virq, unsigned int nr_irqs)
193 {
194 	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
195 	struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
196 
197 	mutex_lock(&msi->bitmap_lock);
198 
199 	clear_bit(d->hwirq, msi->bitmap);
200 
201 	mutex_unlock(&msi->bitmap_lock);
202 
203 	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
204 }
205 
206 static const struct irq_domain_ops xgene_msi_domain_ops = {
207 	.alloc  = xgene_irq_domain_alloc,
208 	.free   = xgene_irq_domain_free,
209 };
210 
211 static const struct msi_parent_ops xgene_msi_parent_ops = {
212 	.supported_flags	= (MSI_GENERIC_FLAGS_MASK	|
213 				   MSI_FLAG_PCI_MSIX),
214 	.required_flags		= (MSI_FLAG_USE_DEF_DOM_OPS	|
215 				   MSI_FLAG_USE_DEF_CHIP_OPS),
216 	.bus_select_token	= DOMAIN_BUS_PCI_MSI,
217 	.init_dev_msi_info	= msi_lib_init_dev_msi_info,
218 };
219 
xgene_allocate_domains(struct device_node * node,struct xgene_msi * msi)220 static int xgene_allocate_domains(struct device_node *node,
221 				  struct xgene_msi *msi)
222 {
223 	struct irq_domain_info info = {
224 		.fwnode		= of_fwnode_handle(node),
225 		.ops		= &xgene_msi_domain_ops,
226 		.size		= NR_MSI_VEC,
227 		.host_data	= msi,
228 	};
229 
230 	msi->inner_domain = msi_create_parent_irq_domain(&info, &xgene_msi_parent_ops);
231 	return msi->inner_domain ? 0 : -ENOMEM;
232 }
233 
xgene_msi_init_allocator(struct device * dev)234 static int xgene_msi_init_allocator(struct device *dev)
235 {
236 	xgene_msi_ctrl->bitmap = devm_bitmap_zalloc(dev, NR_MSI_VEC, GFP_KERNEL);
237 	if (!xgene_msi_ctrl->bitmap)
238 		return -ENOMEM;
239 
240 	mutex_init(&xgene_msi_ctrl->bitmap_lock);
241 
242 	return 0;
243 }
244 
xgene_msi_isr(struct irq_desc * desc)245 static void xgene_msi_isr(struct irq_desc *desc)
246 {
247 	unsigned int *irqp = irq_desc_get_handler_data(desc);
248 	struct irq_chip *chip = irq_desc_get_chip(desc);
249 	struct xgene_msi *xgene_msi = xgene_msi_ctrl;
250 	unsigned long grp_pending;
251 	int msir_idx;
252 	u32 msi_grp;
253 
254 	chained_irq_enter(chip, desc);
255 
256 	msi_grp = irqp - xgene_msi->gic_irq;
257 
258 	grp_pending = xgene_msi_int_read(xgene_msi, msi_grp);
259 
260 	for_each_set_bit(msir_idx, &grp_pending, IDX_PER_GROUP) {
261 		unsigned long msir;
262 		int intr_idx;
263 
264 		msir = xgene_msi_ir_read(xgene_msi, msi_grp, msir_idx);
265 
266 		for_each_set_bit(intr_idx, &msir, IRQS_PER_IDX) {
267 			irq_hw_number_t hwirq;
268 			int ret;
269 
270 			hwirq = compute_hwirq(msi_grp, msir_idx, intr_idx);
271 			ret = generic_handle_domain_irq(xgene_msi->inner_domain,
272 							hwirq);
273 			WARN_ON_ONCE(ret);
274 		}
275 	}
276 
277 	chained_irq_exit(chip, desc);
278 }
279 
xgene_msi_remove(struct platform_device * pdev)280 static void xgene_msi_remove(struct platform_device *pdev)
281 {
282 	for (int i = 0; i < NR_HW_IRQS; i++) {
283 		unsigned int irq = xgene_msi_ctrl->gic_irq[i];
284 		if (!irq)
285 			continue;
286 		irq_set_chained_handler_and_data(irq, NULL, NULL);
287 	}
288 
289 	if (xgene_msi_ctrl->inner_domain)
290 		irq_domain_remove(xgene_msi_ctrl->inner_domain);
291 }
292 
xgene_msi_handler_setup(struct platform_device * pdev)293 static int xgene_msi_handler_setup(struct platform_device *pdev)
294 {
295 	struct xgene_msi *xgene_msi = xgene_msi_ctrl;
296 	int i;
297 
298 	for (i = 0; i < NR_HW_IRQS; i++) {
299 		u32 msi_val;
300 		int irq, err;
301 
302 		/*
303 		 * MSInIRx registers are read-to-clear; before registering
304 		 * interrupt handlers, read all of them to clear spurious
305 		 * interrupts that may occur before the driver is probed.
306 		 */
307 		for (int msi_idx = 0; msi_idx < IDX_PER_GROUP; msi_idx++)
308 			xgene_msi_ir_read(xgene_msi, i, msi_idx);
309 
310 		/* Read MSIINTn to confirm */
311 		msi_val = xgene_msi_int_read(xgene_msi, i);
312 		if (msi_val) {
313 			dev_err(&pdev->dev, "Failed to clear spurious IRQ\n");
314 			return EINVAL;
315 		}
316 
317 		irq = platform_get_irq(pdev, i);
318 		if (irq < 0)
319 			return irq;
320 
321 		xgene_msi->gic_irq[i] = irq;
322 
323 		/*
324 		 * Statically allocate MSI GIC IRQs to each CPU core.
325 		 * With 8-core X-Gene v1, 2 MSI GIC IRQs are allocated
326 		 * to each core.
327 		 */
328 		irq_set_status_flags(irq, IRQ_NO_BALANCING);
329 		err = irq_set_affinity(irq, cpumask_of(i % num_possible_cpus()));
330 		if (err) {
331 			pr_err("failed to set affinity for GIC IRQ");
332 			return err;
333 		}
334 
335 		irq_set_chained_handler_and_data(irq, xgene_msi_isr,
336 						 &xgene_msi_ctrl->gic_irq[i]);
337 	}
338 
339 	return 0;
340 }
341 
342 static const struct of_device_id xgene_msi_match_table[] = {
343 	{.compatible = "apm,xgene1-msi"},
344 	{},
345 };
346 
xgene_msi_probe(struct platform_device * pdev)347 static int xgene_msi_probe(struct platform_device *pdev)
348 {
349 	struct resource *res;
350 	struct xgene_msi *xgene_msi;
351 	int rc;
352 
353 	xgene_msi_ctrl = devm_kzalloc(&pdev->dev, sizeof(*xgene_msi_ctrl),
354 				      GFP_KERNEL);
355 	if (!xgene_msi_ctrl)
356 		return -ENOMEM;
357 
358 	xgene_msi = xgene_msi_ctrl;
359 
360 	xgene_msi->msi_regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
361 	if (IS_ERR(xgene_msi->msi_regs)) {
362 		rc = PTR_ERR(xgene_msi->msi_regs);
363 		goto error;
364 	}
365 	xgene_msi->msi_addr = res->start;
366 
367 	rc = xgene_msi_init_allocator(&pdev->dev);
368 	if (rc) {
369 		dev_err(&pdev->dev, "Error allocating MSI bitmap\n");
370 		goto error;
371 	}
372 
373 	rc = xgene_allocate_domains(dev_of_node(&pdev->dev), xgene_msi);
374 	if (rc) {
375 		dev_err(&pdev->dev, "Failed to allocate MSI domain\n");
376 		goto error;
377 	}
378 
379 	rc = xgene_msi_handler_setup(pdev);
380 	if (rc)
381 		goto error;
382 
383 	dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
384 
385 	return 0;
386 error:
387 	xgene_msi_remove(pdev);
388 	return rc;
389 }
390 
391 static struct platform_driver xgene_msi_driver = {
392 	.driver = {
393 		.name = "xgene-msi",
394 		.of_match_table = xgene_msi_match_table,
395 	},
396 	.probe = xgene_msi_probe,
397 	.remove = xgene_msi_remove,
398 };
399 builtin_platform_driver(xgene_msi_driver);
400