1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Support for Realtek hardware ECC engine in RTL93xx SoCs 4 */ 5 6 #include <linux/bitfield.h> 7 #include <linux/dma-mapping.h> 8 #include <linux/mtd/nand.h> 9 #include <linux/mutex.h> 10 #include <linux/platform_device.h> 11 #include <linux/regmap.h> 12 13 /* 14 * The Realtek ECC engine has two operation modes. 15 * 16 * - BCH6 : Generate 10 ECC bytes from 512 data bytes plus 6 free bytes 17 * - BCH12 : Generate 20 ECC bytes from 512 data bytes plus 6 free bytes 18 * 19 * It can run for arbitrary NAND flash chips with different block and OOB sizes. Currently there 20 * are a few known devices in the wild that make use of this ECC engine 21 * (Linksys LGS328C, LGS352C & Netlink HG323DAC). To keep compatibility with vendor firmware, 22 * new modes can only be added when new data layouts have been analyzed. For now allow BCH6 on 23 * flash with 2048 byte blocks and at least 64 bytes oob. Some vendors make use of 24 * 128 bytes OOB NAND chips (e.g. Macronix MX35LF1G24AD) but only use BCH6 and thus the first 25 * 64 bytes of the OOB area. In this case the engine leaves any extra bytes unused. 26 * 27 * This driver aligns with kernel ECC naming conventions. Neverthless a short notice on the 28 * Realtek naming conventions for the different structures in the OOB area. 29 * 30 * - BBI : Bad block indicator. The first two bytes of OOB. Protected by ECC! 31 * - tag : 6 User/free bytes. First tag "contains" 2 bytes BBI. Protected by ECC! 32 * - syndrome : ECC/parity bytes 33 * 34 * Altogether this gives currently the following block layout. 35 * 36 * +------+------+------+------+-----+------+------+------+------+-----+-----+-----+-----+ 37 * | 512 | 512 | 512 | 512 | 2 | 4 | 6 | 6 | 6 | 10 | 10 | 10 | 10 | 38 * +------+------+------+------+-----+------+------+------+------+-----+-----+-----+-----+ 39 * | data | data | data | data | BBI | free | free | free | free | ECC | ECC | ECC | ECC | 40 * +------+------+------+------+-----+------+------+------+------+-----+-----+-----+-----+ 41 */ 42 43 #define RTL_ECC_ALLOWED_PAGE_SIZE 2048 44 #define RTL_ECC_ALLOWED_MIN_OOB_SIZE 64 45 #define RTL_ECC_ALLOWED_STRENGTH 6 46 47 #define RTL_ECC_BLOCK_SIZE 512 48 #define RTL_ECC_FREE_SIZE 6 49 #define RTL_ECC_PARITY_SIZE_BCH6 10 50 #define RTL_ECC_PARITY_SIZE_BCH12 20 51 52 /* 53 * The engine is fed with two DMA regions. One for data (always 512 bytes) and one for free bytes 54 * and parity (either 16 bytes for BCH6 or 26 bytes for BCH12). Start and length of each must be 55 * aligned to a multiple of 4. 56 */ 57 58 #define RTL_ECC_DMA_FREE_PARITY_SIZE ALIGN(RTL_ECC_FREE_SIZE + RTL_ECC_PARITY_SIZE_BCH12, 4) 59 #define RTL_ECC_DMA_SIZE (RTL_ECC_BLOCK_SIZE + RTL_ECC_DMA_FREE_PARITY_SIZE) 60 61 #define RTL_ECC_CFG 0x00 62 #define RTL_ECC_BCH6 0 63 #define RTL_ECC_BCH12 BIT(28) 64 #define RTL_ECC_DMA_PRECISE BIT(12) 65 #define RTL_ECC_BURST_128 GENMASK(1, 0) 66 #define RTL_ECC_DMA_TRIGGER 0x08 67 #define RTL_ECC_OP_DECODE 0 68 #define RTL_ECC_OP_ENCODE BIT(0) 69 #define RTL_ECC_DMA_START 0x0c 70 #define RTL_ECC_DMA_TAG 0x10 71 #define RTL_ECC_STATUS 0x14 72 #define RTL_ECC_CORR_COUNT GENMASK(19, 12) 73 #define RTL_ECC_RESULT BIT(8) 74 #define RTL_ECC_ALL_ONE BIT(4) 75 #define RTL_ECC_OP_STATUS BIT(0) 76 77 struct rtl_ecc_engine { 78 struct device *dev; 79 struct nand_ecc_engine engine; 80 struct mutex lock; 81 char *buf; 82 dma_addr_t buf_dma; 83 struct regmap *regmap; 84 }; 85 86 struct rtl_ecc_ctx { 87 struct rtl_ecc_engine * rtlc; 88 struct nand_ecc_req_tweak_ctx req_ctx; 89 int steps; 90 int bch_mode; 91 int strength; 92 int parity_size; 93 }; 94 95 static const struct regmap_config rtl_ecc_regmap_config = { 96 .reg_bits = 32, 97 .val_bits = 32, 98 .reg_stride = 4, 99 }; 100 101 static inline void *nand_to_ctx(struct nand_device *nand) 102 { 103 return nand->ecc.ctx.priv; 104 } 105 106 static inline struct rtl_ecc_engine *nand_to_rtlc(struct nand_device *nand) 107 { 108 struct nand_ecc_engine *eng = nand->ecc.engine; 109 110 return container_of(eng, struct rtl_ecc_engine, engine); 111 } 112 113 static int rtl_ecc_ooblayout_ecc(struct mtd_info *mtd, int section, 114 struct mtd_oob_region *oobregion) 115 { 116 struct nand_device *nand = mtd_to_nanddev(mtd); 117 struct rtl_ecc_ctx *ctx = nand_to_ctx(nand); 118 119 if (section < 0 || section >= ctx->steps) 120 return -ERANGE; 121 122 oobregion->offset = ctx->steps * RTL_ECC_FREE_SIZE + section * ctx->parity_size; 123 oobregion->length = ctx->parity_size; 124 125 return 0; 126 } 127 128 static int rtl_ecc_ooblayout_free(struct mtd_info *mtd, int section, 129 struct mtd_oob_region *oobregion) 130 { 131 struct nand_device *nand = mtd_to_nanddev(mtd); 132 struct rtl_ecc_ctx *ctx = nand_to_ctx(nand); 133 int bbm; 134 135 if (section < 0 || section >= ctx->steps) 136 return -ERANGE; 137 138 /* reserve 2 BBM bytes in first block */ 139 bbm = section ? 0 : 2; 140 oobregion->offset = section * RTL_ECC_FREE_SIZE + bbm; 141 oobregion->length = RTL_ECC_FREE_SIZE - bbm; 142 143 return 0; 144 } 145 146 static const struct mtd_ooblayout_ops rtl_ecc_ooblayout_ops = { 147 .ecc = rtl_ecc_ooblayout_ecc, 148 .free = rtl_ecc_ooblayout_free, 149 }; 150 151 static void rtl_ecc_kick_engine(struct rtl_ecc_ctx *ctx, int operation) 152 { 153 struct rtl_ecc_engine *rtlc = ctx->rtlc; 154 155 regmap_write(rtlc->regmap, RTL_ECC_CFG, 156 ctx->bch_mode | RTL_ECC_BURST_128 | RTL_ECC_DMA_PRECISE); 157 158 regmap_write(rtlc->regmap, RTL_ECC_DMA_START, rtlc->buf_dma); 159 regmap_write(rtlc->regmap, RTL_ECC_DMA_TAG, rtlc->buf_dma + RTL_ECC_BLOCK_SIZE); 160 regmap_write(rtlc->regmap, RTL_ECC_DMA_TRIGGER, operation); 161 } 162 163 static int rtl_ecc_wait_for_engine(struct rtl_ecc_ctx *ctx) 164 { 165 struct rtl_ecc_engine *rtlc = ctx->rtlc; 166 int ret, status, bitflips; 167 bool all_one; 168 169 /* 170 * The ECC engine needs 6-8 us to encode/decode a BCH6 syndrome for 512 bytes of data 171 * and 6 free bytes. In case the NAND area has been erased and all data and oob is 172 * set to 0xff, decoding takes 30us (reason unknown). Although the engine can trigger 173 * interrupts when finished, use active polling for now. 12 us maximum wait time has 174 * proven to be a good tradeoff between performance and overhead. 175 */ 176 177 ret = regmap_read_poll_timeout(rtlc->regmap, RTL_ECC_STATUS, status, 178 !(status & RTL_ECC_OP_STATUS), 12, 1000000); 179 if (ret) 180 return ret; 181 182 ret = FIELD_GET(RTL_ECC_RESULT, status); 183 all_one = FIELD_GET(RTL_ECC_ALL_ONE, status); 184 bitflips = FIELD_GET(RTL_ECC_CORR_COUNT, status); 185 186 /* For erased blocks (all bits one) error status can be ignored */ 187 if (all_one) 188 ret = 0; 189 190 return ret ? -EBADMSG : bitflips; 191 } 192 193 static int rtl_ecc_run_engine(struct rtl_ecc_ctx *ctx, char *data, char *free, 194 char *parity, int operation) 195 { 196 struct rtl_ecc_engine *rtlc = ctx->rtlc; 197 char *buf_parity = rtlc->buf + RTL_ECC_BLOCK_SIZE + RTL_ECC_FREE_SIZE; 198 char *buf_free = rtlc->buf + RTL_ECC_BLOCK_SIZE; 199 char *buf_data = rtlc->buf; 200 int ret; 201 202 mutex_lock(&rtlc->lock); 203 204 memcpy(buf_data, data, RTL_ECC_BLOCK_SIZE); 205 memcpy(buf_free, free, RTL_ECC_FREE_SIZE); 206 memcpy(buf_parity, parity, ctx->parity_size); 207 208 dma_sync_single_for_device(rtlc->dev, rtlc->buf_dma, RTL_ECC_DMA_SIZE, DMA_TO_DEVICE); 209 rtl_ecc_kick_engine(ctx, operation); 210 ret = rtl_ecc_wait_for_engine(ctx); 211 dma_sync_single_for_cpu(rtlc->dev, rtlc->buf_dma, RTL_ECC_DMA_SIZE, DMA_FROM_DEVICE); 212 213 if (ret >= 0) { 214 memcpy(data, buf_data, RTL_ECC_BLOCK_SIZE); 215 memcpy(free, buf_free, RTL_ECC_FREE_SIZE); 216 memcpy(parity, buf_parity, ctx->parity_size); 217 } 218 219 mutex_unlock(&rtlc->lock); 220 221 return ret; 222 } 223 224 static int rtl_ecc_prepare_io_req(struct nand_device *nand, struct nand_page_io_req *req) 225 { 226 struct rtl_ecc_engine *rtlc = nand_to_rtlc(nand); 227 struct rtl_ecc_ctx *ctx = nand_to_ctx(nand); 228 char *data, *free, *parity; 229 int ret = 0; 230 231 if (req->mode == MTD_OPS_RAW) 232 return 0; 233 234 nand_ecc_tweak_req(&ctx->req_ctx, req); 235 236 if (req->type == NAND_PAGE_READ) 237 return 0; 238 239 free = req->oobbuf.in; 240 data = req->databuf.in; 241 parity = req->oobbuf.in + ctx->steps * RTL_ECC_FREE_SIZE; 242 243 for (int i = 0; i < ctx->steps; i++) { 244 ret |= rtl_ecc_run_engine(ctx, data, free, parity, RTL_ECC_OP_ENCODE); 245 246 free += RTL_ECC_FREE_SIZE; 247 data += RTL_ECC_BLOCK_SIZE; 248 parity += ctx->parity_size; 249 } 250 251 if (unlikely(ret)) 252 dev_dbg(rtlc->dev, "ECC calculation failed\n"); 253 254 return ret ? -EBADMSG : 0; 255 } 256 257 static int rtl_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req) 258 { 259 struct rtl_ecc_engine *rtlc = nand_to_rtlc(nand); 260 struct rtl_ecc_ctx *ctx = nand_to_ctx(nand); 261 struct mtd_info *mtd = nanddev_to_mtd(nand); 262 char *data, *free, *parity; 263 bool failure = false; 264 int bitflips = 0; 265 266 if (req->mode == MTD_OPS_RAW) 267 return 0; 268 269 if (req->type == NAND_PAGE_WRITE) { 270 nand_ecc_restore_req(&ctx->req_ctx, req); 271 return 0; 272 } 273 274 free = req->oobbuf.in; 275 data = req->databuf.in; 276 parity = req->oobbuf.in + ctx->steps * RTL_ECC_FREE_SIZE; 277 278 for (int i = 0 ; i < ctx->steps; i++) { 279 int ret = rtl_ecc_run_engine(ctx, data, free, parity, RTL_ECC_OP_DECODE); 280 281 if (unlikely(ret < 0)) 282 /* ECC totally fails for bitflips in erased blocks */ 283 ret = nand_check_erased_ecc_chunk(data, RTL_ECC_BLOCK_SIZE, 284 parity, ctx->parity_size, 285 free, RTL_ECC_FREE_SIZE, 286 ctx->strength); 287 if (unlikely(ret < 0)) { 288 failure = true; 289 mtd->ecc_stats.failed++; 290 } else { 291 mtd->ecc_stats.corrected += ret; 292 bitflips = max_t(unsigned int, bitflips, ret); 293 } 294 295 free += RTL_ECC_FREE_SIZE; 296 data += RTL_ECC_BLOCK_SIZE; 297 parity += ctx->parity_size; 298 } 299 300 nand_ecc_restore_req(&ctx->req_ctx, req); 301 302 if (unlikely(failure)) 303 dev_dbg(rtlc->dev, "ECC correction failed\n"); 304 else if (unlikely(bitflips > 2)) 305 dev_dbg(rtlc->dev, "%d bitflips detected\n", bitflips); 306 307 return failure ? -EBADMSG : bitflips; 308 } 309 310 static int rtl_ecc_check_support(struct nand_device *nand) 311 { 312 struct mtd_info *mtd = nanddev_to_mtd(nand); 313 struct device *dev = nand->ecc.engine->dev; 314 315 if (mtd->oobsize < RTL_ECC_ALLOWED_MIN_OOB_SIZE || 316 mtd->writesize != RTL_ECC_ALLOWED_PAGE_SIZE) { 317 dev_err(dev, "only flash geometry data=%d, oob>=%d supported\n", 318 RTL_ECC_ALLOWED_PAGE_SIZE, RTL_ECC_ALLOWED_MIN_OOB_SIZE); 319 return -EINVAL; 320 } 321 322 if (nand->ecc.user_conf.algo != NAND_ECC_ALGO_BCH || 323 nand->ecc.user_conf.strength != RTL_ECC_ALLOWED_STRENGTH || 324 nand->ecc.user_conf.placement != NAND_ECC_PLACEMENT_OOB || 325 nand->ecc.user_conf.step_size != RTL_ECC_BLOCK_SIZE) { 326 dev_err(dev, "only algo=bch, strength=%d, placement=oob, step=%d supported\n", 327 RTL_ECC_ALLOWED_STRENGTH, RTL_ECC_BLOCK_SIZE); 328 return -EINVAL; 329 } 330 331 return 0; 332 } 333 334 static int rtl_ecc_init_ctx(struct nand_device *nand) 335 { 336 struct nand_ecc_props *conf = &nand->ecc.ctx.conf; 337 struct rtl_ecc_engine *rtlc = nand_to_rtlc(nand); 338 struct mtd_info *mtd = nanddev_to_mtd(nand); 339 int strength = nand->ecc.user_conf.strength; 340 struct device *dev = nand->ecc.engine->dev; 341 struct rtl_ecc_ctx *ctx; 342 int ret; 343 344 ret = rtl_ecc_check_support(nand); 345 if (ret) 346 return ret; 347 348 ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); 349 if (!ctx) 350 return -ENOMEM; 351 352 nand->ecc.ctx.priv = ctx; 353 mtd_set_ooblayout(mtd, &rtl_ecc_ooblayout_ops); 354 355 conf->algo = NAND_ECC_ALGO_BCH; 356 conf->strength = strength; 357 conf->step_size = RTL_ECC_BLOCK_SIZE; 358 conf->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; 359 360 ctx->rtlc = rtlc; 361 ctx->steps = mtd->writesize / RTL_ECC_BLOCK_SIZE; 362 ctx->strength = strength; 363 ctx->bch_mode = strength == 6 ? RTL_ECC_BCH6 : RTL_ECC_BCH12; 364 ctx->parity_size = strength == 6 ? RTL_ECC_PARITY_SIZE_BCH6 : RTL_ECC_PARITY_SIZE_BCH12; 365 366 ret = nand_ecc_init_req_tweaking(&ctx->req_ctx, nand); 367 if (ret) 368 return ret; 369 370 dev_dbg(dev, "using bch%d with geometry data=%dx%d, free=%dx6, parity=%dx%d", 371 conf->strength, ctx->steps, conf->step_size, 372 ctx->steps, ctx->steps, ctx->parity_size); 373 374 return 0; 375 } 376 377 static void rtl_ecc_cleanup_ctx(struct nand_device *nand) 378 { 379 struct rtl_ecc_ctx *ctx = nand_to_ctx(nand); 380 381 if (ctx) 382 nand_ecc_cleanup_req_tweaking(&ctx->req_ctx); 383 } 384 385 static const struct nand_ecc_engine_ops rtl_ecc_engine_ops = { 386 .init_ctx = rtl_ecc_init_ctx, 387 .cleanup_ctx = rtl_ecc_cleanup_ctx, 388 .prepare_io_req = rtl_ecc_prepare_io_req, 389 .finish_io_req = rtl_ecc_finish_io_req, 390 }; 391 392 static int rtl_ecc_probe(struct platform_device *pdev) 393 { 394 struct device *dev = &pdev->dev; 395 struct rtl_ecc_engine *rtlc; 396 void __iomem *base; 397 int ret; 398 399 rtlc = devm_kzalloc(dev, sizeof(*rtlc), GFP_KERNEL); 400 if (!rtlc) 401 return -ENOMEM; 402 403 base = devm_platform_ioremap_resource(pdev, 0); 404 if (IS_ERR(base)) 405 return PTR_ERR(base); 406 407 ret = devm_mutex_init(dev, &rtlc->lock); 408 if (ret) 409 return ret; 410 411 rtlc->regmap = devm_regmap_init_mmio(dev, base, &rtl_ecc_regmap_config); 412 if (IS_ERR(rtlc->regmap)) 413 return PTR_ERR(rtlc->regmap); 414 415 /* 416 * Focus on simplicity and use a preallocated DMA buffer for data exchange with the 417 * engine. For now make it a noncoherent memory model as invalidating/flushing caches 418 * is faster than reading/writing uncached memory on the known architectures. 419 */ 420 421 rtlc->buf = dma_alloc_noncoherent(dev, RTL_ECC_DMA_SIZE, &rtlc->buf_dma, 422 DMA_BIDIRECTIONAL, GFP_KERNEL); 423 if (!rtlc->buf) 424 return -ENOMEM; 425 426 rtlc->dev = dev; 427 rtlc->engine.dev = dev; 428 rtlc->engine.ops = &rtl_ecc_engine_ops; 429 rtlc->engine.integration = NAND_ECC_ENGINE_INTEGRATION_EXTERNAL; 430 431 nand_ecc_register_on_host_hw_engine(&rtlc->engine); 432 433 platform_set_drvdata(pdev, rtlc); 434 435 return 0; 436 } 437 438 static void rtl_ecc_remove(struct platform_device *pdev) 439 { 440 struct rtl_ecc_engine *rtlc = platform_get_drvdata(pdev); 441 442 nand_ecc_unregister_on_host_hw_engine(&rtlc->engine); 443 dma_free_noncoherent(rtlc->dev, RTL_ECC_DMA_SIZE, rtlc->buf, rtlc->buf_dma, 444 DMA_BIDIRECTIONAL); 445 } 446 447 static const struct of_device_id rtl_ecc_of_ids[] = { 448 { 449 .compatible = "realtek,rtl9301-ecc", 450 }, 451 { /* sentinel */ }, 452 }; 453 454 static struct platform_driver rtl_ecc_driver = { 455 .driver = { 456 .name = "rtl-nand-ecc-engine", 457 .of_match_table = rtl_ecc_of_ids, 458 }, 459 .probe = rtl_ecc_probe, 460 .remove = rtl_ecc_remove, 461 }; 462 module_platform_driver(rtl_ecc_driver); 463 464 MODULE_LICENSE("GPL"); 465 MODULE_AUTHOR("Markus Stockhausen <markus.stockhausen@gmx.de>"); 466 MODULE_DESCRIPTION("Realtek NAND hardware ECC controller"); 467