xref: /linux/drivers/mtd/nand/ecc-realtek.c (revision 45bd2d77fbedec862204bb5c0fcaba2b7fa5fb56)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Support for Realtek hardware ECC engine in RTL93xx SoCs
4  */
5 
6 #include <linux/bitfield.h>
7 #include <linux/dma-mapping.h>
8 #include <linux/mtd/nand.h>
9 #include <linux/mutex.h>
10 #include <linux/platform_device.h>
11 #include <linux/regmap.h>
12 
13 /*
14  * The Realtek ECC engine has two operation modes.
15  *
16  * - BCH6  : Generate 10 ECC bytes from 512 data bytes plus 6 free bytes
17  * - BCH12 : Generate 20 ECC bytes from 512 data bytes plus 6 free bytes
18  *
19  * It can run for arbitrary NAND flash chips with different block and OOB sizes. Currently there
20  * are a few known devices in the wild that make use of this ECC engine
21  * (Linksys LGS328C, LGS352C & Netlink HG323DAC). To keep compatibility with vendor firmware,
22  * new modes can only be added when new data layouts have been analyzed. For now allow BCH6 on
23  * flash with 2048 byte blocks and at least 64 bytes oob. Some vendors make use of
24  * 128 bytes OOB NAND chips (e.g. Macronix MX35LF1G24AD) but only use BCH6 and thus the first
25  * 64 bytes of the OOB area. In this case the engine leaves any extra bytes unused.
26  *
27  * This driver aligns with kernel ECC naming conventions. Neverthless a short notice on the
28  * Realtek naming conventions for the different structures in the OOB area.
29  *
30  * - BBI      : Bad block indicator. The first two bytes of OOB. Protected by ECC!
31  * - tag      : 6 User/free bytes. First tag "contains" 2 bytes BBI. Protected by ECC!
32  * - syndrome : ECC/parity bytes
33  *
34  * Altogether this gives currently the following block layout.
35  *
36  * +------+------+------+------+-----+------+------+------+------+-----+-----+-----+-----+
37  * |  512 |  512 |  512 |  512 |   2 |    4 |    6 |    6 |    6 |  10 |  10 |  10 |  10 |
38  * +------+------+------+------+-----+------+------+------+------+-----+-----+-----+-----+
39  * | data | data | data | data | BBI | free | free | free | free | ECC | ECC | ECC | ECC |
40  * +------+------+------+------+-----+------+------+------+------+-----+-----+-----+-----+
41  */
42 
43 #define RTL_ECC_ALLOWED_PAGE_SIZE 	2048
44 #define RTL_ECC_ALLOWED_MIN_OOB_SIZE	64
45 #define RTL_ECC_ALLOWED_STRENGTH	6
46 
47 #define RTL_ECC_BLOCK_SIZE 		512
48 #define RTL_ECC_FREE_SIZE 		6
49 #define RTL_ECC_PARITY_SIZE_BCH6	10
50 #define RTL_ECC_PARITY_SIZE_BCH12	20
51 
52 /*
53  * The engine is fed with two DMA regions. One for data (always 512 bytes) and one for free bytes
54  * and parity (either 16 bytes for BCH6 or 26 bytes for BCH12). Start and length of each must be
55  * aligned to a multiple of 4.
56  */
57 
58 #define RTL_ECC_DMA_FREE_PARITY_SIZE	ALIGN(RTL_ECC_FREE_SIZE + RTL_ECC_PARITY_SIZE_BCH12, 4)
59 #define RTL_ECC_DMA_SIZE		(RTL_ECC_BLOCK_SIZE + RTL_ECC_DMA_FREE_PARITY_SIZE)
60 
61 #define RTL_ECC_CFG			0x00
62 #define   RTL_ECC_BCH6			0
63 #define   RTL_ECC_BCH12			BIT(28)
64 #define   RTL_ECC_DMA_PRECISE		BIT(12)
65 #define   RTL_ECC_BURST_128		GENMASK(1, 0)
66 #define RTL_ECC_DMA_TRIGGER 		0x08
67 #define   RTL_ECC_OP_DECODE		0
68 #define   RTL_ECC_OP_ENCODE		BIT(0)
69 #define RTL_ECC_DMA_START		0x0c
70 #define RTL_ECC_DMA_TAG			0x10
71 #define RTL_ECC_STATUS			0x14
72 #define   RTL_ECC_CORR_COUNT		GENMASK(19, 12)
73 #define   RTL_ECC_RESULT		BIT(8)
74 #define   RTL_ECC_ALL_ONE		BIT(4)
75 #define   RTL_ECC_OP_STATUS		BIT(0)
76 
77 struct rtl_ecc_engine {
78 	struct device *dev;
79 	struct nand_ecc_engine engine;
80 	struct mutex lock;
81 	char *buf;
82 	dma_addr_t buf_dma;
83 	struct regmap *regmap;
84 };
85 
86 struct rtl_ecc_ctx {
87 	struct rtl_ecc_engine * rtlc;
88 	struct nand_ecc_req_tweak_ctx req_ctx;
89 	int steps;
90 	int bch_mode;
91 	int strength;
92 	int parity_size;
93 };
94 
95 static const struct regmap_config rtl_ecc_regmap_config = {
96 	.reg_bits	= 32,
97 	.val_bits	= 32,
98 	.reg_stride	= 4,
99 };
100 
101 static inline void *nand_to_ctx(struct nand_device *nand)
102 {
103 	return nand->ecc.ctx.priv;
104 }
105 
106 static inline struct rtl_ecc_engine *nand_to_rtlc(struct nand_device *nand)
107 {
108 	struct nand_ecc_engine *eng = nand->ecc.engine;
109 
110 	return container_of(eng, struct rtl_ecc_engine, engine);
111 }
112 
113 static int rtl_ecc_ooblayout_ecc(struct mtd_info *mtd, int section,
114 				 struct mtd_oob_region *oobregion)
115 {
116 	struct nand_device *nand = mtd_to_nanddev(mtd);
117 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
118 
119 	if (section < 0 || section >= ctx->steps)
120 		return -ERANGE;
121 
122 	oobregion->offset = ctx->steps * RTL_ECC_FREE_SIZE + section * ctx->parity_size;
123 	oobregion->length = ctx->parity_size;
124 
125 	return 0;
126 }
127 
128 static int rtl_ecc_ooblayout_free(struct mtd_info *mtd, int section,
129 				  struct mtd_oob_region *oobregion)
130 {
131 	struct nand_device *nand = mtd_to_nanddev(mtd);
132 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
133 	int bbm;
134 
135 	if (section < 0 || section >= ctx->steps)
136 		return -ERANGE;
137 
138 	/* reserve 2 BBM bytes in first block */
139 	bbm = section ? 0 : 2;
140 	oobregion->offset = section * RTL_ECC_FREE_SIZE + bbm;
141 	oobregion->length = RTL_ECC_FREE_SIZE - bbm;
142 
143 	return 0;
144 }
145 
146 static const struct mtd_ooblayout_ops rtl_ecc_ooblayout_ops = {
147 	.ecc = rtl_ecc_ooblayout_ecc,
148 	.free = rtl_ecc_ooblayout_free,
149 };
150 
151 static void rtl_ecc_kick_engine(struct rtl_ecc_ctx *ctx, int operation)
152 {
153 	struct rtl_ecc_engine *rtlc = ctx->rtlc;
154 
155 	regmap_write(rtlc->regmap, RTL_ECC_CFG,
156 		     ctx->bch_mode | RTL_ECC_BURST_128 | RTL_ECC_DMA_PRECISE);
157 
158 	regmap_write(rtlc->regmap, RTL_ECC_DMA_START, rtlc->buf_dma);
159 	regmap_write(rtlc->regmap, RTL_ECC_DMA_TAG, rtlc->buf_dma + RTL_ECC_BLOCK_SIZE);
160 	regmap_write(rtlc->regmap, RTL_ECC_DMA_TRIGGER, operation);
161 }
162 
163 static int rtl_ecc_wait_for_engine(struct rtl_ecc_ctx *ctx)
164 {
165 	struct rtl_ecc_engine *rtlc = ctx->rtlc;
166 	int ret, status, bitflips;
167 	bool all_one;
168 
169 	/*
170 	 * The ECC engine needs 6-8 us to encode/decode a BCH6 syndrome for 512 bytes of data
171 	 * and 6 free bytes. In case the NAND area has been erased and all data and oob is
172 	 * set to 0xff, decoding takes 30us (reason unknown). Although the engine can trigger
173 	 * interrupts when finished, use active polling for now. 12 us maximum wait time has
174 	 * proven to be a good tradeoff between performance and overhead.
175 	 */
176 
177 	ret = regmap_read_poll_timeout(rtlc->regmap, RTL_ECC_STATUS, status,
178 				       !(status & RTL_ECC_OP_STATUS), 12, 1000000);
179 	if (ret)
180 		return ret;
181 
182 	ret = FIELD_GET(RTL_ECC_RESULT, status);
183 	all_one = FIELD_GET(RTL_ECC_ALL_ONE, status);
184 	bitflips = FIELD_GET(RTL_ECC_CORR_COUNT, status);
185 
186 	/* For erased blocks (all bits one) error status can be ignored */
187 	if (all_one)
188 		ret = 0;
189 
190 	return ret ? -EBADMSG : bitflips;
191 }
192 
193 static int rtl_ecc_run_engine(struct rtl_ecc_ctx *ctx, char *data, char *free,
194 			      char *parity, int operation)
195 {
196 	struct rtl_ecc_engine *rtlc = ctx->rtlc;
197 	char *buf_parity = rtlc->buf + RTL_ECC_BLOCK_SIZE + RTL_ECC_FREE_SIZE;
198 	char *buf_free = rtlc->buf + RTL_ECC_BLOCK_SIZE;
199 	char *buf_data = rtlc->buf;
200 	int ret;
201 
202 	mutex_lock(&rtlc->lock);
203 
204 	memcpy(buf_data, data, RTL_ECC_BLOCK_SIZE);
205 	memcpy(buf_free, free, RTL_ECC_FREE_SIZE);
206 	memcpy(buf_parity, parity, ctx->parity_size);
207 
208 	dma_sync_single_for_device(rtlc->dev, rtlc->buf_dma, RTL_ECC_DMA_SIZE, DMA_TO_DEVICE);
209 	rtl_ecc_kick_engine(ctx, operation);
210 	ret = rtl_ecc_wait_for_engine(ctx);
211 	dma_sync_single_for_cpu(rtlc->dev, rtlc->buf_dma, RTL_ECC_DMA_SIZE, DMA_FROM_DEVICE);
212 
213 	if (ret >= 0) {
214 		memcpy(data, buf_data, RTL_ECC_BLOCK_SIZE);
215 		memcpy(free, buf_free, RTL_ECC_FREE_SIZE);
216 		memcpy(parity, buf_parity, ctx->parity_size);
217 	}
218 
219 	mutex_unlock(&rtlc->lock);
220 
221 	return ret;
222 }
223 
224 static int rtl_ecc_prepare_io_req(struct nand_device *nand, struct nand_page_io_req *req)
225 {
226 	struct rtl_ecc_engine *rtlc = nand_to_rtlc(nand);
227 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
228 	char *data, *free, *parity;
229 	int ret = 0;
230 
231 	if (req->mode == MTD_OPS_RAW)
232 		return 0;
233 
234 	nand_ecc_tweak_req(&ctx->req_ctx, req);
235 
236 	if (req->type == NAND_PAGE_READ)
237 		return 0;
238 
239 	free = req->oobbuf.in;
240 	data = req->databuf.in;
241 	parity = req->oobbuf.in + ctx->steps * RTL_ECC_FREE_SIZE;
242 
243 	for (int i = 0; i < ctx->steps; i++) {
244 		ret |= rtl_ecc_run_engine(ctx, data, free, parity, RTL_ECC_OP_ENCODE);
245 
246 		free += RTL_ECC_FREE_SIZE;
247 		data += RTL_ECC_BLOCK_SIZE;
248 		parity += ctx->parity_size;
249 	}
250 
251 	if (unlikely(ret))
252 		dev_dbg(rtlc->dev, "ECC calculation failed\n");
253 
254 	return ret ? -EBADMSG : 0;
255 }
256 
257 static int rtl_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req)
258 {
259 	struct rtl_ecc_engine *rtlc = nand_to_rtlc(nand);
260 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
261 	struct mtd_info *mtd = nanddev_to_mtd(nand);
262 	char *data, *free, *parity;
263 	bool failure = false;
264 	int bitflips = 0;
265 
266 	if (req->mode == MTD_OPS_RAW)
267 		return 0;
268 
269 	if (req->type == NAND_PAGE_WRITE) {
270 		nand_ecc_restore_req(&ctx->req_ctx, req);
271 		return 0;
272 	}
273 
274 	free = req->oobbuf.in;
275 	data = req->databuf.in;
276 	parity = req->oobbuf.in + ctx->steps * RTL_ECC_FREE_SIZE;
277 
278 	for (int i = 0 ; i < ctx->steps; i++) {
279 		int ret = rtl_ecc_run_engine(ctx, data, free, parity, RTL_ECC_OP_DECODE);
280 
281 		if (unlikely(ret < 0))
282 			/* ECC totally fails for bitflips in erased blocks */
283 			ret = nand_check_erased_ecc_chunk(data, RTL_ECC_BLOCK_SIZE,
284 							  parity, ctx->parity_size,
285 							  free, RTL_ECC_FREE_SIZE,
286 							  ctx->strength);
287 		if (unlikely(ret < 0)) {
288 			failure = true;
289 			mtd->ecc_stats.failed++;
290 		} else {
291 			mtd->ecc_stats.corrected += ret;
292 			bitflips = max_t(unsigned int, bitflips, ret);
293 		}
294 
295 		free += RTL_ECC_FREE_SIZE;
296 		data += RTL_ECC_BLOCK_SIZE;
297 		parity += ctx->parity_size;
298 	}
299 
300 	nand_ecc_restore_req(&ctx->req_ctx, req);
301 
302 	if (unlikely(failure))
303 		dev_dbg(rtlc->dev, "ECC correction failed\n");
304 	else if (unlikely(bitflips > 2))
305 		dev_dbg(rtlc->dev, "%d bitflips detected\n", bitflips);
306 
307 	return failure ? -EBADMSG : bitflips;
308 }
309 
310 static int rtl_ecc_check_support(struct nand_device *nand)
311 {
312 	struct mtd_info *mtd = nanddev_to_mtd(nand);
313 	struct device *dev = nand->ecc.engine->dev;
314 
315 	if (mtd->oobsize < RTL_ECC_ALLOWED_MIN_OOB_SIZE ||
316 	    mtd->writesize != RTL_ECC_ALLOWED_PAGE_SIZE) {
317 		dev_err(dev, "only flash geometry data=%d, oob>=%d supported\n",
318 			RTL_ECC_ALLOWED_PAGE_SIZE, RTL_ECC_ALLOWED_MIN_OOB_SIZE);
319 		return -EINVAL;
320 	}
321 
322 	if (nand->ecc.user_conf.algo != NAND_ECC_ALGO_BCH ||
323 	    nand->ecc.user_conf.strength != RTL_ECC_ALLOWED_STRENGTH ||
324 	    nand->ecc.user_conf.placement != NAND_ECC_PLACEMENT_OOB ||
325 	    nand->ecc.user_conf.step_size != RTL_ECC_BLOCK_SIZE) {
326 		dev_err(dev, "only algo=bch, strength=%d, placement=oob, step=%d supported\n",
327 			RTL_ECC_ALLOWED_STRENGTH, RTL_ECC_BLOCK_SIZE);
328 		return -EINVAL;
329 	}
330 
331 	return 0;
332 }
333 
334 static int rtl_ecc_init_ctx(struct nand_device *nand)
335 {
336 	struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
337 	struct rtl_ecc_engine *rtlc = nand_to_rtlc(nand);
338 	struct mtd_info *mtd = nanddev_to_mtd(nand);
339 	int strength = nand->ecc.user_conf.strength;
340 	struct device *dev = nand->ecc.engine->dev;
341 	struct rtl_ecc_ctx *ctx;
342 	int ret;
343 
344 	ret = rtl_ecc_check_support(nand);
345 	if (ret)
346 		return ret;
347 
348 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
349 	if (!ctx)
350 		return -ENOMEM;
351 
352 	nand->ecc.ctx.priv = ctx;
353 	mtd_set_ooblayout(mtd, &rtl_ecc_ooblayout_ops);
354 
355 	conf->algo = NAND_ECC_ALGO_BCH;
356 	conf->strength = strength;
357 	conf->step_size = RTL_ECC_BLOCK_SIZE;
358 	conf->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
359 
360 	ctx->rtlc = rtlc;
361 	ctx->steps = mtd->writesize / RTL_ECC_BLOCK_SIZE;
362 	ctx->strength = strength;
363 	ctx->bch_mode = strength == 6 ? RTL_ECC_BCH6 : RTL_ECC_BCH12;
364 	ctx->parity_size = strength == 6 ? RTL_ECC_PARITY_SIZE_BCH6 : RTL_ECC_PARITY_SIZE_BCH12;
365 
366 	ret = nand_ecc_init_req_tweaking(&ctx->req_ctx, nand);
367 	if (ret)
368 		return ret;
369 
370 	dev_dbg(dev, "using bch%d with geometry data=%dx%d, free=%dx6, parity=%dx%d",
371 		conf->strength, ctx->steps, conf->step_size,
372 		ctx->steps, ctx->steps, ctx->parity_size);
373 
374 	return 0;
375 }
376 
377 static void rtl_ecc_cleanup_ctx(struct nand_device *nand)
378 {
379 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
380 
381 	if (ctx)
382 		nand_ecc_cleanup_req_tweaking(&ctx->req_ctx);
383 }
384 
385 static const struct nand_ecc_engine_ops rtl_ecc_engine_ops = {
386 	.init_ctx = rtl_ecc_init_ctx,
387 	.cleanup_ctx = rtl_ecc_cleanup_ctx,
388 	.prepare_io_req = rtl_ecc_prepare_io_req,
389 	.finish_io_req = rtl_ecc_finish_io_req,
390 };
391 
392 static int rtl_ecc_probe(struct platform_device *pdev)
393 {
394 	struct device *dev = &pdev->dev;
395 	struct rtl_ecc_engine *rtlc;
396 	void __iomem *base;
397 	int ret;
398 
399 	rtlc = devm_kzalloc(dev, sizeof(*rtlc), GFP_KERNEL);
400 	if (!rtlc)
401 		return -ENOMEM;
402 
403 	base = devm_platform_ioremap_resource(pdev, 0);
404 	if (IS_ERR(base))
405 		return PTR_ERR(base);
406 
407 	ret = devm_mutex_init(dev, &rtlc->lock);
408 	if (ret)
409 		return ret;
410 
411 	rtlc->regmap = devm_regmap_init_mmio(dev, base, &rtl_ecc_regmap_config);
412 	if (IS_ERR(rtlc->regmap))
413 		return PTR_ERR(rtlc->regmap);
414 
415 	/*
416 	 * Focus on simplicity and use a preallocated DMA buffer for data exchange with the
417 	 * engine. For now make it a noncoherent memory model as invalidating/flushing caches
418 	 * is faster than reading/writing uncached memory on the known architectures.
419 	 */
420 
421 	rtlc->buf = dma_alloc_noncoherent(dev, RTL_ECC_DMA_SIZE, &rtlc->buf_dma,
422 					  DMA_BIDIRECTIONAL, GFP_KERNEL);
423 	if (!rtlc->buf)
424 		return -ENOMEM;
425 
426 	rtlc->dev = dev;
427 	rtlc->engine.dev = dev;
428 	rtlc->engine.ops = &rtl_ecc_engine_ops;
429 	rtlc->engine.integration = NAND_ECC_ENGINE_INTEGRATION_EXTERNAL;
430 
431 	nand_ecc_register_on_host_hw_engine(&rtlc->engine);
432 
433 	platform_set_drvdata(pdev, rtlc);
434 
435 	return 0;
436 }
437 
438 static void rtl_ecc_remove(struct platform_device *pdev)
439 {
440 	struct rtl_ecc_engine *rtlc = platform_get_drvdata(pdev);
441 
442 	nand_ecc_unregister_on_host_hw_engine(&rtlc->engine);
443 	dma_free_noncoherent(rtlc->dev, RTL_ECC_DMA_SIZE, rtlc->buf, rtlc->buf_dma,
444 			     DMA_BIDIRECTIONAL);
445 }
446 
447 static const struct of_device_id rtl_ecc_of_ids[] = {
448 	{
449 		.compatible = "realtek,rtl9301-ecc",
450 	},
451 	{ /* sentinel */ },
452 };
453 
454 static struct platform_driver rtl_ecc_driver = {
455 	.driver	= {
456 		.name = "rtl-nand-ecc-engine",
457 		.of_match_table = rtl_ecc_of_ids,
458 	},
459 	.probe = rtl_ecc_probe,
460 	.remove = rtl_ecc_remove,
461 };
462 module_platform_driver(rtl_ecc_driver);
463 
464 MODULE_LICENSE("GPL");
465 MODULE_AUTHOR("Markus Stockhausen <markus.stockhausen@gmx.de>");
466 MODULE_DESCRIPTION("Realtek NAND hardware ECC controller");
467