1 /* 2 * Copyright © 2012 NetCommWireless 3 * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au> 4 * 5 * Test for multi-bit error recovery on a NAND page This mostly tests the 6 * ECC controller / driver. 7 * 8 * There are two test modes: 9 * 10 * 0 - artificially inserting bit errors until the ECC fails 11 * This is the default method and fairly quick. It should 12 * be independent of the quality of the FLASH. 13 * 14 * 1 - re-writing the same pattern repeatedly until the ECC fails. 15 * This method relies on the physics of NAND FLASH to eventually 16 * generate '0' bits if '1' has been written sufficient times. 17 * Depending on the NAND, the first bit errors will appear after 18 * 1000 or more writes and then will usually snowball, reaching the 19 * limits of the ECC quickly. 20 * 21 * The test stops after 10000 cycles, should your FLASH be 22 * exceptionally good and not generate bit errors before that. Try 23 * a different page in that case. 24 * 25 * Please note that neither of these tests will significantly 'use up' any 26 * FLASH endurance. Only a maximum of two erase operations will be performed. 27 * 28 * 29 * This program is free software; you can redistribute it and/or modify it 30 * under the terms of the GNU General Public License version 2 as published by 31 * the Free Software Foundation. 32 * 33 * This program is distributed in the hope that it will be useful, but WITHOUT 34 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 35 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 36 * more details. 37 * 38 * You should have received a copy of the GNU General Public License along with 39 * this program; see the file COPYING. If not, write to the Free Software 40 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 41 */ 42 43 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 44 45 #include <linux/init.h> 46 #include <linux/module.h> 47 #include <linux/moduleparam.h> 48 #include <linux/mtd/mtd.h> 49 #include <linux/err.h> 50 #include <linux/mtd/rawnand.h> 51 #include <linux/slab.h> 52 #include "mtd_test.h" 53 54 static int dev; 55 module_param(dev, int, S_IRUGO); 56 MODULE_PARM_DESC(dev, "MTD device number to use"); 57 58 static unsigned page_offset; 59 module_param(page_offset, uint, S_IRUGO); 60 MODULE_PARM_DESC(page_offset, "Page number relative to dev start"); 61 62 static unsigned seed; 63 module_param(seed, uint, S_IRUGO); 64 MODULE_PARM_DESC(seed, "Random seed"); 65 66 static int mode; 67 module_param(mode, int, S_IRUGO); 68 MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test"); 69 70 static unsigned max_overwrite = 10000; 71 72 static loff_t offset; /* Offset of the page we're using. */ 73 static unsigned eraseblock; /* Eraseblock number for our page. */ 74 75 /* We assume that the ECC can correct up to a certain number 76 * of biterrors per subpage. */ 77 static unsigned subsize; /* Size of subpages */ 78 static unsigned subcount; /* Number of subpages per page */ 79 80 static struct mtd_info *mtd; /* MTD device */ 81 82 static uint8_t *wbuffer; /* One page write / compare buffer */ 83 static uint8_t *rbuffer; /* One page read buffer */ 84 85 /* 'random' bytes from known offsets */ 86 static uint8_t hash(unsigned offset) 87 { 88 unsigned v = offset; 89 unsigned char c; 90 v ^= 0x7f7edfd3; 91 v = v ^ (v >> 3); 92 v = v ^ (v >> 5); 93 v = v ^ (v >> 13); 94 c = v & 0xFF; 95 /* Reverse bits of result. */ 96 c = (c & 0x0F) << 4 | (c & 0xF0) >> 4; 97 c = (c & 0x33) << 2 | (c & 0xCC) >> 2; 98 c = (c & 0x55) << 1 | (c & 0xAA) >> 1; 99 return c; 100 } 101 102 /* Writes wbuffer to page */ 103 static int write_page(int log) 104 { 105 if (log) 106 pr_info("write_page\n"); 107 108 return mtdtest_write(mtd, offset, mtd->writesize, wbuffer); 109 } 110 111 /* Re-writes the data area while leaving the OOB alone. */ 112 static int rewrite_page(int log) 113 { 114 int err = 0; 115 struct mtd_oob_ops ops; 116 117 if (log) 118 pr_info("rewrite page\n"); 119 120 ops.mode = MTD_OPS_RAW; /* No ECC */ 121 ops.len = mtd->writesize; 122 ops.retlen = 0; 123 ops.ooblen = 0; 124 ops.oobretlen = 0; 125 ops.ooboffs = 0; 126 ops.datbuf = wbuffer; 127 ops.oobbuf = NULL; 128 129 err = mtd_write_oob(mtd, offset, &ops); 130 if (err || ops.retlen != mtd->writesize) { 131 pr_err("error: write_oob failed (%d)\n", err); 132 if (!err) 133 err = -EIO; 134 } 135 136 return err; 137 } 138 139 /* Reads page into rbuffer. Returns number of corrected bit errors (>=0) 140 * or error (<0) */ 141 static int read_page(int log) 142 { 143 int err = 0; 144 size_t read; 145 struct mtd_ecc_stats oldstats; 146 147 if (log) 148 pr_info("read_page\n"); 149 150 /* Saving last mtd stats */ 151 memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats)); 152 153 err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer); 154 if (!err || err == -EUCLEAN) 155 err = mtd->ecc_stats.corrected - oldstats.corrected; 156 157 if (err < 0 || read != mtd->writesize) { 158 pr_err("error: read failed at %#llx\n", (long long)offset); 159 if (err >= 0) 160 err = -EIO; 161 } 162 163 return err; 164 } 165 166 /* Verifies rbuffer against random sequence */ 167 static int verify_page(int log) 168 { 169 unsigned i, errs = 0; 170 171 if (log) 172 pr_info("verify_page\n"); 173 174 for (i = 0; i < mtd->writesize; i++) { 175 if (rbuffer[i] != hash(i+seed)) { 176 pr_err("Error: page offset %u, expected %02x, got %02x\n", 177 i, hash(i+seed), rbuffer[i]); 178 errs++; 179 } 180 } 181 182 if (errs) 183 return -EIO; 184 else 185 return 0; 186 } 187 188 #define CBIT(v, n) ((v) & (1 << (n))) 189 #define BCLR(v, n) ((v) = (v) & ~(1 << (n))) 190 191 /* Finds the first '1' bit in wbuffer starting at offset 'byte' 192 * and sets it to '0'. */ 193 static int insert_biterror(unsigned byte) 194 { 195 int bit; 196 197 while (byte < mtd->writesize) { 198 for (bit = 7; bit >= 0; bit--) { 199 if (CBIT(wbuffer[byte], bit)) { 200 BCLR(wbuffer[byte], bit); 201 pr_info("Inserted biterror @ %u/%u\n", byte, bit); 202 return 0; 203 } 204 } 205 byte++; 206 } 207 pr_err("biterror: Failed to find a '1' bit\n"); 208 return -EIO; 209 } 210 211 /* Writes 'random' data to page and then introduces deliberate bit 212 * errors into the page, while verifying each step. */ 213 static int incremental_errors_test(void) 214 { 215 int err = 0; 216 unsigned i; 217 unsigned errs_per_subpage = 0; 218 219 pr_info("incremental biterrors test\n"); 220 221 for (i = 0; i < mtd->writesize; i++) 222 wbuffer[i] = hash(i+seed); 223 224 err = write_page(1); 225 if (err) 226 goto exit; 227 228 while (1) { 229 230 err = rewrite_page(1); 231 if (err) 232 goto exit; 233 234 err = read_page(1); 235 if (err > 0) 236 pr_info("Read reported %d corrected bit errors\n", err); 237 if (err < 0) { 238 pr_err("After %d biterrors per subpage, read reported error %d\n", 239 errs_per_subpage, err); 240 err = 0; 241 goto exit; 242 } 243 244 err = verify_page(1); 245 if (err) { 246 pr_err("ECC failure, read data is incorrect despite read success\n"); 247 goto exit; 248 } 249 250 pr_info("Successfully corrected %d bit errors per subpage\n", 251 errs_per_subpage); 252 253 for (i = 0; i < subcount; i++) { 254 err = insert_biterror(i * subsize); 255 if (err < 0) 256 goto exit; 257 } 258 errs_per_subpage++; 259 } 260 261 exit: 262 return err; 263 } 264 265 266 /* Writes 'random' data to page and then re-writes that same data repeatedly. 267 This eventually develops bit errors (bits written as '1' will slowly become 268 '0'), which are corrected as far as the ECC is capable of. */ 269 static int overwrite_test(void) 270 { 271 int err = 0; 272 unsigned i; 273 unsigned max_corrected = 0; 274 unsigned opno = 0; 275 /* We don't expect more than this many correctable bit errors per 276 * page. */ 277 #define MAXBITS 512 278 static unsigned bitstats[MAXBITS]; /* bit error histogram. */ 279 280 memset(bitstats, 0, sizeof(bitstats)); 281 282 pr_info("overwrite biterrors test\n"); 283 284 for (i = 0; i < mtd->writesize; i++) 285 wbuffer[i] = hash(i+seed); 286 287 err = write_page(1); 288 if (err) 289 goto exit; 290 291 while (opno < max_overwrite) { 292 293 err = write_page(0); 294 if (err) 295 break; 296 297 err = read_page(0); 298 if (err >= 0) { 299 if (err >= MAXBITS) { 300 pr_info("Implausible number of bit errors corrected\n"); 301 err = -EIO; 302 break; 303 } 304 bitstats[err]++; 305 if (err > max_corrected) { 306 max_corrected = err; 307 pr_info("Read reported %d corrected bit errors\n", 308 err); 309 } 310 } else { /* err < 0 */ 311 pr_info("Read reported error %d\n", err); 312 err = 0; 313 break; 314 } 315 316 err = verify_page(0); 317 if (err) { 318 bitstats[max_corrected] = opno; 319 pr_info("ECC failure, read data is incorrect despite read success\n"); 320 break; 321 } 322 323 err = mtdtest_relax(); 324 if (err) 325 break; 326 327 opno++; 328 } 329 330 /* At this point bitstats[0] contains the number of ops with no bit 331 * errors, bitstats[1] the number of ops with 1 bit error, etc. */ 332 pr_info("Bit error histogram (%d operations total):\n", opno); 333 for (i = 0; i < max_corrected; i++) 334 pr_info("Page reads with %3d corrected bit errors: %d\n", 335 i, bitstats[i]); 336 337 exit: 338 return err; 339 } 340 341 static int __init mtd_nandbiterrs_init(void) 342 { 343 int err = 0; 344 345 printk("\n"); 346 printk(KERN_INFO "==================================================\n"); 347 pr_info("MTD device: %d\n", dev); 348 349 mtd = get_mtd_device(NULL, dev); 350 if (IS_ERR(mtd)) { 351 err = PTR_ERR(mtd); 352 pr_err("error: cannot get MTD device\n"); 353 goto exit_mtddev; 354 } 355 356 if (!mtd_type_is_nand(mtd)) { 357 pr_info("this test requires NAND flash\n"); 358 err = -ENODEV; 359 goto exit_nand; 360 } 361 362 pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n", 363 (unsigned long long)mtd->size, mtd->erasesize, 364 mtd->writesize, mtd->oobsize); 365 366 subsize = mtd->writesize >> mtd->subpage_sft; 367 subcount = mtd->writesize / subsize; 368 369 pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize); 370 371 offset = (loff_t)page_offset * mtd->writesize; 372 eraseblock = mtd_div_by_eb(offset, mtd); 373 374 pr_info("Using page=%u, offset=%llu, eraseblock=%u\n", 375 page_offset, offset, eraseblock); 376 377 wbuffer = kmalloc(mtd->writesize, GFP_KERNEL); 378 if (!wbuffer) { 379 err = -ENOMEM; 380 goto exit_wbuffer; 381 } 382 383 rbuffer = kmalloc(mtd->writesize, GFP_KERNEL); 384 if (!rbuffer) { 385 err = -ENOMEM; 386 goto exit_rbuffer; 387 } 388 389 err = mtdtest_erase_eraseblock(mtd, eraseblock); 390 if (err) 391 goto exit_error; 392 393 if (mode == 0) 394 err = incremental_errors_test(); 395 else 396 err = overwrite_test(); 397 398 if (err) 399 goto exit_error; 400 401 /* We leave the block un-erased in case of test failure. */ 402 err = mtdtest_erase_eraseblock(mtd, eraseblock); 403 if (err) 404 goto exit_error; 405 406 err = -EIO; 407 pr_info("finished successfully.\n"); 408 printk(KERN_INFO "==================================================\n"); 409 410 exit_error: 411 kfree(rbuffer); 412 exit_rbuffer: 413 kfree(wbuffer); 414 exit_wbuffer: 415 /* Nothing */ 416 exit_nand: 417 put_mtd_device(mtd); 418 exit_mtddev: 419 return err; 420 } 421 422 static void __exit mtd_nandbiterrs_exit(void) 423 { 424 return; 425 } 426 427 module_init(mtd_nandbiterrs_init); 428 module_exit(mtd_nandbiterrs_exit); 429 430 MODULE_DESCRIPTION("NAND bit error recovery test"); 431 MODULE_AUTHOR("Iwo Mergler"); 432 MODULE_LICENSE("GPL"); 433