xref: /linux/drivers/mtd/nand/ecc-realtek.c (revision e3966940559d52aa1800a008dcfeec218dd31f88)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Support for Realtek hardware ECC engine in RTL93xx SoCs
4  */
5 
6 #include <linux/bitfield.h>
7 #include <linux/dma-mapping.h>
8 #include <linux/mtd/nand.h>
9 #include <linux/mutex.h>
10 #include <linux/platform_device.h>
11 #include <linux/regmap.h>
12 
13 /*
14  * The Realtek ECC engine has two operation modes.
15  *
16  * - BCH6  : Generate 10 ECC bytes from 512 data bytes plus 6 free bytes
17  * - BCH12 : Generate 20 ECC bytes from 512 data bytes plus 6 free bytes
18  *
19  * It can run for arbitrary NAND flash chips with different block and OOB sizes. Currently there
20  * are only two known devices in the wild that have NAND flash and make use of this ECC engine
21  * (Linksys LGS328C & LGS352C). To keep compatibility with vendor firmware, new modes can only
22  * be added when new data layouts have been analyzed. For now allow BCH6 on flash with 2048 byte
23  * blocks and 64 bytes oob.
24  *
25  * This driver aligns with kernel ECC naming conventions. Neverthless a short notice on the
26  * Realtek naming conventions for the different structures in the OOB area.
27  *
28  * - BBI      : Bad block indicator. The first two bytes of OOB. Protected by ECC!
29  * - tag      : 6 User/free bytes. First tag "contains" 2 bytes BBI. Protected by ECC!
30  * - syndrome : ECC/parity bytes
31  *
32  * Altogether this gives currently the following block layout.
33  *
34  * +------+------+------+------+-----+------+------+------+------+-----+-----+-----+-----+
35  * |  512 |  512 |  512 |  512 |   2 |    4 |    6 |    6 |    6 |  10 |  10 |  10 |  10 |
36  * +------+------+------+------+-----+------+------+------+------+-----+-----+-----+-----+
37  * | data | data | data | data | BBI | free | free | free | free | ECC | ECC | ECC | ECC |
38  * +------+------+------+------+-----+------+------+------+------+-----+-----+-----+-----+
39  */
40 
41 #define RTL_ECC_ALLOWED_PAGE_SIZE 	2048
42 #define RTL_ECC_ALLOWED_OOB_SIZE	64
43 #define RTL_ECC_ALLOWED_STRENGTH	6
44 
45 #define RTL_ECC_BLOCK_SIZE 		512
46 #define RTL_ECC_FREE_SIZE 		6
47 #define RTL_ECC_PARITY_SIZE_BCH6	10
48 #define RTL_ECC_PARITY_SIZE_BCH12	20
49 
50 /*
51  * The engine is fed with two DMA regions. One for data (always 512 bytes) and one for free bytes
52  * and parity (either 16 bytes for BCH6 or 26 bytes for BCH12). Start and length of each must be
53  * aligned to a multiple of 4.
54  */
55 
56 #define RTL_ECC_DMA_FREE_PARITY_SIZE	ALIGN(RTL_ECC_FREE_SIZE + RTL_ECC_PARITY_SIZE_BCH12, 4)
57 #define RTL_ECC_DMA_SIZE		(RTL_ECC_BLOCK_SIZE + RTL_ECC_DMA_FREE_PARITY_SIZE)
58 
59 #define RTL_ECC_CFG			0x00
60 #define   RTL_ECC_BCH6			0
61 #define   RTL_ECC_BCH12			BIT(28)
62 #define   RTL_ECC_DMA_PRECISE		BIT(12)
63 #define   RTL_ECC_BURST_128		GENMASK(1, 0)
64 #define RTL_ECC_DMA_TRIGGER 		0x08
65 #define   RTL_ECC_OP_DECODE		0
66 #define   RTL_ECC_OP_ENCODE		BIT(0)
67 #define RTL_ECC_DMA_START		0x0c
68 #define RTL_ECC_DMA_TAG			0x10
69 #define RTL_ECC_STATUS			0x14
70 #define   RTL_ECC_CORR_COUNT		GENMASK(19, 12)
71 #define   RTL_ECC_RESULT		BIT(8)
72 #define   RTL_ECC_ALL_ONE		BIT(4)
73 #define   RTL_ECC_OP_STATUS		BIT(0)
74 
75 struct rtl_ecc_engine {
76 	struct device *dev;
77 	struct nand_ecc_engine engine;
78 	struct mutex lock;
79 	char *buf;
80 	dma_addr_t buf_dma;
81 	struct regmap *regmap;
82 };
83 
84 struct rtl_ecc_ctx {
85 	struct rtl_ecc_engine * rtlc;
86 	struct nand_ecc_req_tweak_ctx req_ctx;
87 	int steps;
88 	int bch_mode;
89 	int strength;
90 	int parity_size;
91 };
92 
93 static const struct regmap_config rtl_ecc_regmap_config = {
94 	.reg_bits	= 32,
95 	.val_bits	= 32,
96 	.reg_stride	= 4,
97 };
98 
99 static inline void *nand_to_ctx(struct nand_device *nand)
100 {
101 	return nand->ecc.ctx.priv;
102 }
103 
104 static inline struct rtl_ecc_engine *nand_to_rtlc(struct nand_device *nand)
105 {
106 	struct nand_ecc_engine *eng = nand->ecc.engine;
107 
108 	return container_of(eng, struct rtl_ecc_engine, engine);
109 }
110 
111 static int rtl_ecc_ooblayout_ecc(struct mtd_info *mtd, int section,
112 				 struct mtd_oob_region *oobregion)
113 {
114 	struct nand_device *nand = mtd_to_nanddev(mtd);
115 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
116 
117 	if (section < 0 || section >= ctx->steps)
118 		return -ERANGE;
119 
120 	oobregion->offset = ctx->steps * RTL_ECC_FREE_SIZE + section * ctx->parity_size;
121 	oobregion->length = ctx->parity_size;
122 
123 	return 0;
124 }
125 
126 static int rtl_ecc_ooblayout_free(struct mtd_info *mtd, int section,
127 				  struct mtd_oob_region *oobregion)
128 {
129 	struct nand_device *nand = mtd_to_nanddev(mtd);
130 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
131 	int bbm;
132 
133 	if (section < 0 || section >= ctx->steps)
134 		return -ERANGE;
135 
136 	/* reserve 2 BBM bytes in first block */
137 	bbm = section ? 0 : 2;
138 	oobregion->offset = section * RTL_ECC_FREE_SIZE + bbm;
139 	oobregion->length = RTL_ECC_FREE_SIZE - bbm;
140 
141 	return 0;
142 }
143 
144 static const struct mtd_ooblayout_ops rtl_ecc_ooblayout_ops = {
145 	.ecc = rtl_ecc_ooblayout_ecc,
146 	.free = rtl_ecc_ooblayout_free,
147 };
148 
149 static void rtl_ecc_kick_engine(struct rtl_ecc_ctx *ctx, int operation)
150 {
151 	struct rtl_ecc_engine *rtlc = ctx->rtlc;
152 
153 	regmap_write(rtlc->regmap, RTL_ECC_CFG,
154 		     ctx->bch_mode | RTL_ECC_BURST_128 | RTL_ECC_DMA_PRECISE);
155 
156 	regmap_write(rtlc->regmap, RTL_ECC_DMA_START, rtlc->buf_dma);
157 	regmap_write(rtlc->regmap, RTL_ECC_DMA_TAG, rtlc->buf_dma + RTL_ECC_BLOCK_SIZE);
158 	regmap_write(rtlc->regmap, RTL_ECC_DMA_TRIGGER, operation);
159 }
160 
161 static int rtl_ecc_wait_for_engine(struct rtl_ecc_ctx *ctx)
162 {
163 	struct rtl_ecc_engine *rtlc = ctx->rtlc;
164 	int ret, status, bitflips;
165 	bool all_one;
166 
167 	/*
168 	 * The ECC engine needs 6-8 us to encode/decode a BCH6 syndrome for 512 bytes of data
169 	 * and 6 free bytes. In case the NAND area has been erased and all data and oob is
170 	 * set to 0xff, decoding takes 30us (reason unknown). Although the engine can trigger
171 	 * interrupts when finished, use active polling for now. 12 us maximum wait time has
172 	 * proven to be a good tradeoff between performance and overhead.
173 	 */
174 
175 	ret = regmap_read_poll_timeout(rtlc->regmap, RTL_ECC_STATUS, status,
176 				       !(status & RTL_ECC_OP_STATUS), 12, 1000000);
177 	if (ret)
178 		return ret;
179 
180 	ret = FIELD_GET(RTL_ECC_RESULT, status);
181 	all_one = FIELD_GET(RTL_ECC_ALL_ONE, status);
182 	bitflips = FIELD_GET(RTL_ECC_CORR_COUNT, status);
183 
184 	/* For erased blocks (all bits one) error status can be ignored */
185 	if (all_one)
186 		ret = 0;
187 
188 	return ret ? -EBADMSG : bitflips;
189 }
190 
191 static int rtl_ecc_run_engine(struct rtl_ecc_ctx *ctx, char *data, char *free,
192 			      char *parity, int operation)
193 {
194 	struct rtl_ecc_engine *rtlc = ctx->rtlc;
195 	char *buf_parity = rtlc->buf + RTL_ECC_BLOCK_SIZE + RTL_ECC_FREE_SIZE;
196 	char *buf_free = rtlc->buf + RTL_ECC_BLOCK_SIZE;
197 	char *buf_data = rtlc->buf;
198 	int ret;
199 
200 	mutex_lock(&rtlc->lock);
201 
202 	memcpy(buf_data, data, RTL_ECC_BLOCK_SIZE);
203 	memcpy(buf_free, free, RTL_ECC_FREE_SIZE);
204 	memcpy(buf_parity, parity, ctx->parity_size);
205 
206 	dma_sync_single_for_device(rtlc->dev, rtlc->buf_dma, RTL_ECC_DMA_SIZE, DMA_TO_DEVICE);
207 	rtl_ecc_kick_engine(ctx, operation);
208 	ret = rtl_ecc_wait_for_engine(ctx);
209 	dma_sync_single_for_cpu(rtlc->dev, rtlc->buf_dma, RTL_ECC_DMA_SIZE, DMA_FROM_DEVICE);
210 
211 	if (ret >= 0) {
212 		memcpy(data, buf_data, RTL_ECC_BLOCK_SIZE);
213 		memcpy(free, buf_free, RTL_ECC_FREE_SIZE);
214 		memcpy(parity, buf_parity, ctx->parity_size);
215 	}
216 
217 	mutex_unlock(&rtlc->lock);
218 
219 	return ret;
220 }
221 
222 static int rtl_ecc_prepare_io_req(struct nand_device *nand, struct nand_page_io_req *req)
223 {
224 	struct rtl_ecc_engine *rtlc = nand_to_rtlc(nand);
225 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
226 	char *data, *free, *parity;
227 	int ret = 0;
228 
229 	if (req->mode == MTD_OPS_RAW)
230 		return 0;
231 
232 	nand_ecc_tweak_req(&ctx->req_ctx, req);
233 
234 	if (req->type == NAND_PAGE_READ)
235 		return 0;
236 
237 	free = req->oobbuf.in;
238 	data = req->databuf.in;
239 	parity = req->oobbuf.in + ctx->steps * RTL_ECC_FREE_SIZE;
240 
241 	for (int i = 0; i < ctx->steps; i++) {
242 		ret |= rtl_ecc_run_engine(ctx, data, free, parity, RTL_ECC_OP_ENCODE);
243 
244 		free += RTL_ECC_FREE_SIZE;
245 		data += RTL_ECC_BLOCK_SIZE;
246 		parity += ctx->parity_size;
247 	}
248 
249 	if (unlikely(ret))
250 		dev_dbg(rtlc->dev, "ECC calculation failed\n");
251 
252 	return ret ? -EBADMSG : 0;
253 }
254 
255 static int rtl_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req)
256 {
257 	struct rtl_ecc_engine *rtlc = nand_to_rtlc(nand);
258 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
259 	struct mtd_info *mtd = nanddev_to_mtd(nand);
260 	char *data, *free, *parity;
261 	bool failure = false;
262 	int bitflips = 0;
263 
264 	if (req->mode == MTD_OPS_RAW)
265 		return 0;
266 
267 	if (req->type == NAND_PAGE_WRITE) {
268 		nand_ecc_restore_req(&ctx->req_ctx, req);
269 		return 0;
270 	}
271 
272 	free = req->oobbuf.in;
273 	data = req->databuf.in;
274 	parity = req->oobbuf.in + ctx->steps * RTL_ECC_FREE_SIZE;
275 
276 	for (int i = 0 ; i < ctx->steps; i++) {
277 		int ret = rtl_ecc_run_engine(ctx, data, free, parity, RTL_ECC_OP_DECODE);
278 
279 		if (unlikely(ret < 0))
280 			/* ECC totally fails for bitflips in erased blocks */
281 			ret = nand_check_erased_ecc_chunk(data, RTL_ECC_BLOCK_SIZE,
282 							  parity, ctx->parity_size,
283 							  free, RTL_ECC_FREE_SIZE,
284 							  ctx->strength);
285 		if (unlikely(ret < 0)) {
286 			failure = true;
287 			mtd->ecc_stats.failed++;
288 		} else {
289 			mtd->ecc_stats.corrected += ret;
290 			bitflips = max_t(unsigned int, bitflips, ret);
291 		}
292 
293 		free += RTL_ECC_FREE_SIZE;
294 		data += RTL_ECC_BLOCK_SIZE;
295 		parity += ctx->parity_size;
296 	}
297 
298 	nand_ecc_restore_req(&ctx->req_ctx, req);
299 
300 	if (unlikely(failure))
301 		dev_dbg(rtlc->dev, "ECC correction failed\n");
302 	else if (unlikely(bitflips > 2))
303 		dev_dbg(rtlc->dev, "%d bitflips detected\n", bitflips);
304 
305 	return failure ? -EBADMSG : bitflips;
306 }
307 
308 static int rtl_ecc_check_support(struct nand_device *nand)
309 {
310 	struct mtd_info *mtd = nanddev_to_mtd(nand);
311 	struct device *dev = nand->ecc.engine->dev;
312 
313 	if (mtd->oobsize != RTL_ECC_ALLOWED_OOB_SIZE ||
314 	    mtd->writesize != RTL_ECC_ALLOWED_PAGE_SIZE) {
315 		dev_err(dev, "only flash geometry data=%d, oob=%d supported\n",
316 			RTL_ECC_ALLOWED_PAGE_SIZE, RTL_ECC_ALLOWED_OOB_SIZE);
317 		return -EINVAL;
318 	}
319 
320 	if (nand->ecc.user_conf.algo != NAND_ECC_ALGO_BCH ||
321 	    nand->ecc.user_conf.strength != RTL_ECC_ALLOWED_STRENGTH ||
322 	    nand->ecc.user_conf.placement != NAND_ECC_PLACEMENT_OOB ||
323 	    nand->ecc.user_conf.step_size != RTL_ECC_BLOCK_SIZE) {
324 		dev_err(dev, "only algo=bch, strength=%d, placement=oob, step=%d supported\n",
325 			RTL_ECC_ALLOWED_STRENGTH, RTL_ECC_BLOCK_SIZE);
326 		return -EINVAL;
327 	}
328 
329 	return 0;
330 }
331 
332 static int rtl_ecc_init_ctx(struct nand_device *nand)
333 {
334 	struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
335 	struct rtl_ecc_engine *rtlc = nand_to_rtlc(nand);
336 	struct mtd_info *mtd = nanddev_to_mtd(nand);
337 	int strength = nand->ecc.user_conf.strength;
338 	struct device *dev = nand->ecc.engine->dev;
339 	struct rtl_ecc_ctx *ctx;
340 	int ret;
341 
342 	ret = rtl_ecc_check_support(nand);
343 	if (ret)
344 		return ret;
345 
346 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
347 	if (!ctx)
348 		return -ENOMEM;
349 
350 	nand->ecc.ctx.priv = ctx;
351 	mtd_set_ooblayout(mtd, &rtl_ecc_ooblayout_ops);
352 
353 	conf->algo = NAND_ECC_ALGO_BCH;
354 	conf->strength = strength;
355 	conf->step_size = RTL_ECC_BLOCK_SIZE;
356 	conf->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
357 
358 	ctx->rtlc = rtlc;
359 	ctx->steps = mtd->writesize / RTL_ECC_BLOCK_SIZE;
360 	ctx->strength = strength;
361 	ctx->bch_mode = strength == 6 ? RTL_ECC_BCH6 : RTL_ECC_BCH12;
362 	ctx->parity_size = strength == 6 ? RTL_ECC_PARITY_SIZE_BCH6 : RTL_ECC_PARITY_SIZE_BCH12;
363 
364 	ret = nand_ecc_init_req_tweaking(&ctx->req_ctx, nand);
365 	if (ret)
366 		return ret;
367 
368 	dev_dbg(dev, "using bch%d with geometry data=%dx%d, free=%dx6, parity=%dx%d",
369 		conf->strength, ctx->steps, conf->step_size,
370 		ctx->steps, ctx->steps, ctx->parity_size);
371 
372 	return 0;
373 }
374 
375 static void rtl_ecc_cleanup_ctx(struct nand_device *nand)
376 {
377 	struct rtl_ecc_ctx *ctx = nand_to_ctx(nand);
378 
379 	if (ctx)
380 		nand_ecc_cleanup_req_tweaking(&ctx->req_ctx);
381 }
382 
383 static struct nand_ecc_engine_ops rtl_ecc_engine_ops = {
384 	.init_ctx = rtl_ecc_init_ctx,
385 	.cleanup_ctx = rtl_ecc_cleanup_ctx,
386 	.prepare_io_req = rtl_ecc_prepare_io_req,
387 	.finish_io_req = rtl_ecc_finish_io_req,
388 };
389 
390 static int rtl_ecc_probe(struct platform_device *pdev)
391 {
392 	struct device *dev = &pdev->dev;
393 	struct rtl_ecc_engine *rtlc;
394 	void __iomem *base;
395 	int ret;
396 
397 	rtlc = devm_kzalloc(dev, sizeof(*rtlc), GFP_KERNEL);
398 	if (!rtlc)
399 		return -ENOMEM;
400 
401 	base = devm_platform_ioremap_resource(pdev, 0);
402 	if (IS_ERR(base))
403 		return PTR_ERR(base);
404 
405 	ret = devm_mutex_init(dev, &rtlc->lock);
406 	if (ret)
407 		return ret;
408 
409 	rtlc->regmap = devm_regmap_init_mmio(dev, base, &rtl_ecc_regmap_config);
410 	if (IS_ERR(rtlc->regmap))
411 		return PTR_ERR(rtlc->regmap);
412 
413 	/*
414 	 * Focus on simplicity and use a preallocated DMA buffer for data exchange with the
415 	 * engine. For now make it a noncoherent memory model as invalidating/flushing caches
416 	 * is faster than reading/writing uncached memory on the known architectures.
417 	 */
418 
419 	rtlc->buf = dma_alloc_noncoherent(dev, RTL_ECC_DMA_SIZE, &rtlc->buf_dma,
420 					  DMA_BIDIRECTIONAL, GFP_KERNEL);
421 	if (IS_ERR(rtlc->buf))
422 		return PTR_ERR(rtlc->buf);
423 
424 	rtlc->dev = dev;
425 	rtlc->engine.dev = dev;
426 	rtlc->engine.ops = &rtl_ecc_engine_ops;
427 	rtlc->engine.integration = NAND_ECC_ENGINE_INTEGRATION_EXTERNAL;
428 
429 	nand_ecc_register_on_host_hw_engine(&rtlc->engine);
430 
431 	platform_set_drvdata(pdev, rtlc);
432 
433 	return 0;
434 }
435 
436 static void rtl_ecc_remove(struct platform_device *pdev)
437 {
438 	struct rtl_ecc_engine *rtlc = platform_get_drvdata(pdev);
439 
440 	nand_ecc_unregister_on_host_hw_engine(&rtlc->engine);
441 	dma_free_noncoherent(rtlc->dev, RTL_ECC_DMA_SIZE, rtlc->buf, rtlc->buf_dma,
442 			     DMA_BIDIRECTIONAL);
443 }
444 
445 static const struct of_device_id rtl_ecc_of_ids[] = {
446 	{
447 		.compatible = "realtek,rtl9301-ecc",
448 	},
449 	{ /* sentinel */ },
450 };
451 
452 static struct platform_driver rtl_ecc_driver = {
453 	.driver	= {
454 		.name = "rtl-nand-ecc-engine",
455 		.of_match_table = rtl_ecc_of_ids,
456 	},
457 	.probe = rtl_ecc_probe,
458 	.remove = rtl_ecc_remove,
459 };
460 module_platform_driver(rtl_ecc_driver);
461 
462 MODULE_LICENSE("GPL");
463 MODULE_AUTHOR("Markus Stockhausen <markus.stockhausen@gmx.de>");
464 MODULE_DESCRIPTION("Realtek NAND hardware ECC controller");
465