xref: /linux/drivers/mtd/nand/raw/davinci_nand.c (revision 186779c036468038b0d077ec5333a51512f867e5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * davinci_nand.c - NAND Flash Driver for DaVinci family chips
4  *
5  * Copyright © 2006 Texas Instruments.
6  *
7  * Port to 2.6.23 Copyright © 2008 by:
8  *   Sander Huijsen <Shuijsen@optelecom-nkf.com>
9  *   Troy Kisky <troy.kisky@boundarydevices.com>
10  *   Dirk Behme <Dirk.Behme@gmail.com>
11  */
12 
13 #include <linux/clk.h>
14 #include <linux/err.h>
15 #include <linux/iopoll.h>
16 #include <linux/kernel.h>
17 #include <linux/memory/ti-aemif.h>
18 #include <linux/module.h>
19 #include <linux/mtd/partitions.h>
20 #include <linux/mtd/rawnand.h>
21 #include <linux/platform_device.h>
22 #include <linux/property.h>
23 #include <linux/slab.h>
24 
25 #define NRCSR_OFFSET		0x00
26 #define NANDFCR_OFFSET		0x60
27 #define NANDFSR_OFFSET		0x64
28 #define NANDF1ECC_OFFSET	0x70
29 
30 /* 4-bit ECC syndrome registers */
31 #define NAND_4BIT_ECC_LOAD_OFFSET	0xbc
32 #define NAND_4BIT_ECC1_OFFSET		0xc0
33 #define NAND_4BIT_ECC2_OFFSET		0xc4
34 #define NAND_4BIT_ECC3_OFFSET		0xc8
35 #define NAND_4BIT_ECC4_OFFSET		0xcc
36 #define NAND_ERR_ADD1_OFFSET		0xd0
37 #define NAND_ERR_ADD2_OFFSET		0xd4
38 #define NAND_ERR_ERRVAL1_OFFSET		0xd8
39 #define NAND_ERR_ERRVAL2_OFFSET		0xdc
40 
41 /* NOTE:  boards don't need to use these address bits
42  * for ALE/CLE unless they support booting from NAND.
43  * They're used unless platform data overrides them.
44  */
45 #define	MASK_ALE		0x08
46 #define	MASK_CLE		0x10
47 
48 #define MAX_TSU_PS		3000	/* Input setup time in ps */
49 #define MAX_TH_PS		1600	/* Input hold time in ps */
50 
51 struct davinci_nand_pdata {
52 	uint32_t		mask_ale;
53 	uint32_t		mask_cle;
54 
55 	/*
56 	 * 0-indexed chip-select number of the asynchronous
57 	 * interface to which the NAND device has been connected.
58 	 *
59 	 * So, if you have NAND connected to CS3 of DA850, you
60 	 * will pass '1' here. Since the asynchronous interface
61 	 * on DA850 starts from CS2.
62 	 */
63 	uint32_t		core_chipsel;
64 
65 	/* for packages using two chipselects */
66 	uint32_t		mask_chipsel;
67 
68 	/* board's default static partition info */
69 	struct mtd_partition	*parts;
70 	unsigned int		nr_parts;
71 
72 	/* none  == NAND_ECC_ENGINE_TYPE_NONE (strongly *not* advised!!)
73 	 * soft  == NAND_ECC_ENGINE_TYPE_SOFT
74 	 * on-die == NAND_ECC_ENGINE_TYPE_ON_DIE
75 	 * else  == NAND_ECC_ENGINE_TYPE_ON_HOST, according to ecc_bits
76 	 *
77 	 * All DaVinci-family chips support 1-bit hardware ECC.
78 	 * Newer ones also support 4-bit ECC, but are awkward
79 	 * using it with large page chips.
80 	 */
81 	enum nand_ecc_engine_type engine_type;
82 	enum nand_ecc_placement ecc_placement;
83 	u8			ecc_bits;
84 
85 	/* e.g. NAND_BUSWIDTH_16 */
86 	unsigned int		options;
87 	/* e.g. NAND_BBT_USE_FLASH */
88 	unsigned int		bbt_options;
89 
90 	/* Main and mirror bbt descriptor overrides */
91 	struct nand_bbt_descr	*bbt_td;
92 	struct nand_bbt_descr	*bbt_md;
93 };
94 
95 /*
96  * This is a device driver for the NAND flash controller found on the
97  * various DaVinci family chips.  It handles up to four SoC chipselects,
98  * and some flavors of secondary chipselect (e.g. based on A12) as used
99  * with multichip packages.
100  *
101  * The 1-bit ECC hardware is supported, as well as the newer 4-bit ECC
102  * available on chips like the DM355 and OMAP-L137 and needed with the
103  * more error-prone MLC NAND chips.
104  *
105  * This driver assumes EM_WAIT connects all the NAND devices' RDY/nBUSY
106  * outputs in a "wire-AND" configuration, with no per-chip signals.
107  */
108 struct davinci_nand_info {
109 	struct nand_controller	controller;
110 	struct nand_chip	chip;
111 
112 	struct platform_device	*pdev;
113 
114 	bool			is_readmode;
115 
116 	void __iomem		*base;
117 	void __iomem		*vaddr;
118 
119 	void __iomem		*current_cs;
120 
121 	uint32_t		mask_chipsel;
122 	uint32_t		mask_ale;
123 	uint32_t		mask_cle;
124 
125 	uint32_t		core_chipsel;
126 
127 	struct clk		*clk;
128 	struct aemif_device	*aemif;
129 };
130 
131 static DEFINE_SPINLOCK(davinci_nand_lock);
132 static bool ecc4_busy;
133 
134 static inline struct davinci_nand_info *to_davinci_nand(struct mtd_info *mtd)
135 {
136 	return container_of(mtd_to_nand(mtd), struct davinci_nand_info, chip);
137 }
138 
139 static inline unsigned int davinci_nand_readl(struct davinci_nand_info *info,
140 		int offset)
141 {
142 	return __raw_readl(info->base + offset);
143 }
144 
145 static inline void davinci_nand_writel(struct davinci_nand_info *info,
146 		int offset, unsigned long value)
147 {
148 	__raw_writel(value, info->base + offset);
149 }
150 
151 /*----------------------------------------------------------------------*/
152 
153 /*
154  * 1-bit hardware ECC ... context maintained for each core chipselect
155  */
156 
157 static inline uint32_t nand_davinci_readecc_1bit(struct mtd_info *mtd)
158 {
159 	struct davinci_nand_info *info = to_davinci_nand(mtd);
160 
161 	return davinci_nand_readl(info, NANDF1ECC_OFFSET
162 			+ 4 * info->core_chipsel);
163 }
164 
165 static void nand_davinci_hwctl_1bit(struct nand_chip *chip, int mode)
166 {
167 	struct davinci_nand_info *info;
168 	uint32_t nandcfr;
169 	unsigned long flags;
170 
171 	info = to_davinci_nand(nand_to_mtd(chip));
172 
173 	/* Reset ECC hardware */
174 	nand_davinci_readecc_1bit(nand_to_mtd(chip));
175 
176 	spin_lock_irqsave(&davinci_nand_lock, flags);
177 
178 	/* Restart ECC hardware */
179 	nandcfr = davinci_nand_readl(info, NANDFCR_OFFSET);
180 	nandcfr |= BIT(8 + info->core_chipsel);
181 	davinci_nand_writel(info, NANDFCR_OFFSET, nandcfr);
182 
183 	spin_unlock_irqrestore(&davinci_nand_lock, flags);
184 }
185 
186 /*
187  * Read hardware ECC value and pack into three bytes
188  */
189 static int nand_davinci_calculate_1bit(struct nand_chip *chip,
190 				       const u_char *dat, u_char *ecc_code)
191 {
192 	unsigned int ecc_val = nand_davinci_readecc_1bit(nand_to_mtd(chip));
193 	unsigned int ecc24 = (ecc_val & 0x0fff) | ((ecc_val & 0x0fff0000) >> 4);
194 
195 	/* invert so that erased block ecc is correct */
196 	ecc24 = ~ecc24;
197 	ecc_code[0] = (u_char)(ecc24);
198 	ecc_code[1] = (u_char)(ecc24 >> 8);
199 	ecc_code[2] = (u_char)(ecc24 >> 16);
200 
201 	return 0;
202 }
203 
204 static int nand_davinci_correct_1bit(struct nand_chip *chip, u_char *dat,
205 				     u_char *read_ecc, u_char *calc_ecc)
206 {
207 	uint32_t eccNand = read_ecc[0] | (read_ecc[1] << 8) |
208 					  (read_ecc[2] << 16);
209 	uint32_t eccCalc = calc_ecc[0] | (calc_ecc[1] << 8) |
210 					  (calc_ecc[2] << 16);
211 	uint32_t diff = eccCalc ^ eccNand;
212 
213 	if (diff) {
214 		if ((((diff >> 12) ^ diff) & 0xfff) == 0xfff) {
215 			/* Correctable error */
216 			if ((diff >> (12 + 3)) < chip->ecc.size) {
217 				dat[diff >> (12 + 3)] ^= BIT((diff >> 12) & 7);
218 				return 1;
219 			} else {
220 				return -EBADMSG;
221 			}
222 		} else if (!(diff & (diff - 1))) {
223 			/* Single bit ECC error in the ECC itself,
224 			 * nothing to fix */
225 			return 1;
226 		} else {
227 			/* Uncorrectable error */
228 			return -EBADMSG;
229 		}
230 
231 	}
232 	return 0;
233 }
234 
235 /*----------------------------------------------------------------------*/
236 
237 /*
238  * 4-bit hardware ECC ... context maintained over entire AEMIF
239  *
240  * This is a syndrome engine, but we avoid NAND_ECC_PLACEMENT_INTERLEAVED
241  * since that forces use of a problematic "infix OOB" layout.
242  * Among other things, it trashes manufacturer bad block markers.
243  * Also, and specific to this hardware, it ECC-protects the "prepad"
244  * in the OOB ... while having ECC protection for parts of OOB would
245  * seem useful, the current MTD stack sometimes wants to update the
246  * OOB without recomputing ECC.
247  */
248 
249 static void nand_davinci_hwctl_4bit(struct nand_chip *chip, int mode)
250 {
251 	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
252 	unsigned long flags;
253 	u32 val;
254 
255 	/* Reset ECC hardware */
256 	davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET);
257 
258 	spin_lock_irqsave(&davinci_nand_lock, flags);
259 
260 	/* Start 4-bit ECC calculation for read/write */
261 	val = davinci_nand_readl(info, NANDFCR_OFFSET);
262 	val &= ~(0x03 << 4);
263 	val |= (info->core_chipsel << 4) | BIT(12);
264 	davinci_nand_writel(info, NANDFCR_OFFSET, val);
265 
266 	info->is_readmode = (mode == NAND_ECC_READ);
267 
268 	spin_unlock_irqrestore(&davinci_nand_lock, flags);
269 }
270 
271 /* Read raw ECC code after writing to NAND. */
272 static void
273 nand_davinci_readecc_4bit(struct davinci_nand_info *info, u32 code[4])
274 {
275 	const u32 mask = 0x03ff03ff;
276 
277 	code[0] = davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET) & mask;
278 	code[1] = davinci_nand_readl(info, NAND_4BIT_ECC2_OFFSET) & mask;
279 	code[2] = davinci_nand_readl(info, NAND_4BIT_ECC3_OFFSET) & mask;
280 	code[3] = davinci_nand_readl(info, NAND_4BIT_ECC4_OFFSET) & mask;
281 }
282 
283 /* Terminate read ECC; or return ECC (as bytes) of data written to NAND. */
284 static int nand_davinci_calculate_4bit(struct nand_chip *chip,
285 				       const u_char *dat, u_char *ecc_code)
286 {
287 	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
288 	u32 raw_ecc[4], *p;
289 	unsigned i;
290 
291 	/* After a read, terminate ECC calculation by a dummy read
292 	 * of some 4-bit ECC register.  ECC covers everything that
293 	 * was read; correct() just uses the hardware state, so
294 	 * ecc_code is not needed.
295 	 */
296 	if (info->is_readmode) {
297 		davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET);
298 		return 0;
299 	}
300 
301 	/* Pack eight raw 10-bit ecc values into ten bytes, making
302 	 * two passes which each convert four values (in upper and
303 	 * lower halves of two 32-bit words) into five bytes.  The
304 	 * ROM boot loader uses this same packing scheme.
305 	 */
306 	nand_davinci_readecc_4bit(info, raw_ecc);
307 	for (i = 0, p = raw_ecc; i < 2; i++, p += 2) {
308 		*ecc_code++ =   p[0]        & 0xff;
309 		*ecc_code++ = ((p[0] >>  8) & 0x03) | ((p[0] >> 14) & 0xfc);
310 		*ecc_code++ = ((p[0] >> 22) & 0x0f) | ((p[1] <<  4) & 0xf0);
311 		*ecc_code++ = ((p[1] >>  4) & 0x3f) | ((p[1] >> 10) & 0xc0);
312 		*ecc_code++ =  (p[1] >> 18) & 0xff;
313 	}
314 
315 	return 0;
316 }
317 
318 /* Correct up to 4 bits in data we just read, using state left in the
319  * hardware plus the ecc_code computed when it was first written.
320  */
321 static int nand_davinci_correct_4bit(struct nand_chip *chip, u_char *data,
322 				     u_char *ecc_code, u_char *null)
323 {
324 	int i;
325 	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
326 	unsigned short ecc10[8];
327 	unsigned short *ecc16;
328 	u32 syndrome[4];
329 	u32 ecc_state;
330 	unsigned num_errors, corrected;
331 	unsigned long timeo;
332 
333 	/* Unpack ten bytes into eight 10 bit values.  We know we're
334 	 * little-endian, and use type punning for less shifting/masking.
335 	 */
336 	if (WARN_ON(0x01 & (uintptr_t)ecc_code))
337 		return -EINVAL;
338 	ecc16 = (unsigned short *)ecc_code;
339 
340 	ecc10[0] =  (ecc16[0] >>  0) & 0x3ff;
341 	ecc10[1] = ((ecc16[0] >> 10) & 0x3f) | ((ecc16[1] << 6) & 0x3c0);
342 	ecc10[2] =  (ecc16[1] >>  4) & 0x3ff;
343 	ecc10[3] = ((ecc16[1] >> 14) & 0x3)  | ((ecc16[2] << 2) & 0x3fc);
344 	ecc10[4] =  (ecc16[2] >>  8)         | ((ecc16[3] << 8) & 0x300);
345 	ecc10[5] =  (ecc16[3] >>  2) & 0x3ff;
346 	ecc10[6] = ((ecc16[3] >> 12) & 0xf)  | ((ecc16[4] << 4) & 0x3f0);
347 	ecc10[7] =  (ecc16[4] >>  6) & 0x3ff;
348 
349 	/* Tell ECC controller about the expected ECC codes. */
350 	for (i = 7; i >= 0; i--)
351 		davinci_nand_writel(info, NAND_4BIT_ECC_LOAD_OFFSET, ecc10[i]);
352 
353 	/* Allow time for syndrome calculation ... then read it.
354 	 * A syndrome of all zeroes 0 means no detected errors.
355 	 */
356 	davinci_nand_readl(info, NANDFSR_OFFSET);
357 	nand_davinci_readecc_4bit(info, syndrome);
358 	if (!(syndrome[0] | syndrome[1] | syndrome[2] | syndrome[3]))
359 		return 0;
360 
361 	/*
362 	 * Clear any previous address calculation by doing a dummy read of an
363 	 * error address register.
364 	 */
365 	davinci_nand_readl(info, NAND_ERR_ADD1_OFFSET);
366 
367 	/* Start address calculation, and wait for it to complete.
368 	 * We _could_ start reading more data while this is working,
369 	 * to speed up the overall page read.
370 	 */
371 	davinci_nand_writel(info, NANDFCR_OFFSET,
372 			davinci_nand_readl(info, NANDFCR_OFFSET) | BIT(13));
373 
374 	/*
375 	 * ECC_STATE field reads 0x3 (Error correction complete) immediately
376 	 * after setting the 4BITECC_ADD_CALC_START bit. So if you immediately
377 	 * begin trying to poll for the state, you may fall right out of your
378 	 * loop without any of the correction calculations having taken place.
379 	 * The recommendation from the hardware team is to initially delay as
380 	 * long as ECC_STATE reads less than 4. After that, ECC HW has entered
381 	 * correction state.
382 	 */
383 	timeo = jiffies + usecs_to_jiffies(100);
384 	do {
385 		ecc_state = (davinci_nand_readl(info,
386 				NANDFSR_OFFSET) >> 8) & 0x0f;
387 		cpu_relax();
388 	} while ((ecc_state < 4) && time_before(jiffies, timeo));
389 
390 	for (;;) {
391 		u32	fsr = davinci_nand_readl(info, NANDFSR_OFFSET);
392 
393 		switch ((fsr >> 8) & 0x0f) {
394 		case 0:		/* no error, should not happen */
395 			davinci_nand_readl(info, NAND_ERR_ERRVAL1_OFFSET);
396 			return 0;
397 		case 1:		/* five or more errors detected */
398 			davinci_nand_readl(info, NAND_ERR_ERRVAL1_OFFSET);
399 			return -EBADMSG;
400 		case 2:		/* error addresses computed */
401 		case 3:
402 			num_errors = 1 + ((fsr >> 16) & 0x03);
403 			goto correct;
404 		default:	/* still working on it */
405 			cpu_relax();
406 			continue;
407 		}
408 	}
409 
410 correct:
411 	/* correct each error */
412 	for (i = 0, corrected = 0; i < num_errors; i++) {
413 		int error_address, error_value;
414 
415 		if (i > 1) {
416 			error_address = davinci_nand_readl(info,
417 						NAND_ERR_ADD2_OFFSET);
418 			error_value = davinci_nand_readl(info,
419 						NAND_ERR_ERRVAL2_OFFSET);
420 		} else {
421 			error_address = davinci_nand_readl(info,
422 						NAND_ERR_ADD1_OFFSET);
423 			error_value = davinci_nand_readl(info,
424 						NAND_ERR_ERRVAL1_OFFSET);
425 		}
426 
427 		if (i & 1) {
428 			error_address >>= 16;
429 			error_value >>= 16;
430 		}
431 		error_address &= 0x3ff;
432 		error_address = (512 + 7) - error_address;
433 
434 		if (error_address < 512) {
435 			data[error_address] ^= error_value;
436 			corrected++;
437 		}
438 	}
439 
440 	return corrected;
441 }
442 
443 /*----------------------------------------------------------------------*/
444 
445 /* An ECC layout for using 4-bit ECC with small-page flash, storing
446  * ten ECC bytes plus the manufacturer's bad block marker byte, and
447  * and not overlapping the default BBT markers.
448  */
449 static int hwecc4_ooblayout_small_ecc(struct mtd_info *mtd, int section,
450 				      struct mtd_oob_region *oobregion)
451 {
452 	if (section > 2)
453 		return -ERANGE;
454 
455 	if (!section) {
456 		oobregion->offset = 0;
457 		oobregion->length = 5;
458 	} else if (section == 1) {
459 		oobregion->offset = 6;
460 		oobregion->length = 2;
461 	} else {
462 		oobregion->offset = 13;
463 		oobregion->length = 3;
464 	}
465 
466 	return 0;
467 }
468 
469 static int hwecc4_ooblayout_small_free(struct mtd_info *mtd, int section,
470 				       struct mtd_oob_region *oobregion)
471 {
472 	if (section > 1)
473 		return -ERANGE;
474 
475 	if (!section) {
476 		oobregion->offset = 8;
477 		oobregion->length = 5;
478 	} else {
479 		oobregion->offset = 16;
480 		oobregion->length = mtd->oobsize - 16;
481 	}
482 
483 	return 0;
484 }
485 
486 static const struct mtd_ooblayout_ops hwecc4_small_ooblayout_ops = {
487 	.ecc = hwecc4_ooblayout_small_ecc,
488 	.free = hwecc4_ooblayout_small_free,
489 };
490 
491 static int hwecc4_ooblayout_large_ecc(struct mtd_info *mtd, int section,
492 				       struct mtd_oob_region *oobregion)
493 {
494 	struct nand_device *nand = mtd_to_nanddev(mtd);
495 	unsigned int total_ecc_bytes = nand->ecc.ctx.total;
496 	int nregions = total_ecc_bytes / 10; /* 10 bytes per chunk */
497 
498 	if (section >= nregions)
499 		return -ERANGE;
500 
501 	oobregion->offset = (section * 16) + 6;
502 	oobregion->length = 10;
503 
504 	return 0;
505 }
506 
507 static int hwecc4_ooblayout_large_free(struct mtd_info *mtd, int section,
508 				       struct mtd_oob_region *oobregion)
509 {
510 	struct nand_device *nand = mtd_to_nanddev(mtd);
511 	unsigned int total_ecc_bytes = nand->ecc.ctx.total;
512 	int nregions = total_ecc_bytes / 10; /* 10 bytes per chunk */
513 
514 	/* First region is used for BBT */
515 	if (section >= (nregions - 1))
516 		return -ERANGE;
517 
518 	oobregion->offset = ((section + 1) * 16);
519 	oobregion->length = 6;
520 
521 	return 0;
522 }
523 
524 static const struct mtd_ooblayout_ops hwecc4_large_ooblayout_ops = {
525 	.ecc = hwecc4_ooblayout_large_ecc,
526 	.free = hwecc4_ooblayout_large_free,
527 };
528 
529 #if defined(CONFIG_OF)
530 static const struct of_device_id davinci_nand_of_match[] = {
531 	{.compatible = "ti,davinci-nand", },
532 	{.compatible = "ti,keystone-nand", },
533 	{},
534 };
535 MODULE_DEVICE_TABLE(of, davinci_nand_of_match);
536 
537 static struct davinci_nand_pdata *
538 nand_davinci_get_pdata(struct platform_device *pdev)
539 {
540 	if (!dev_get_platdata(&pdev->dev)) {
541 		struct davinci_nand_pdata *pdata;
542 		const char *mode;
543 		u32 prop;
544 
545 		pdata =  devm_kzalloc(&pdev->dev,
546 				sizeof(struct davinci_nand_pdata),
547 				GFP_KERNEL);
548 		pdev->dev.platform_data = pdata;
549 		if (!pdata)
550 			return ERR_PTR(-ENOMEM);
551 		if (!device_property_read_u32(&pdev->dev,
552 					      "ti,davinci-chipselect", &prop))
553 			pdata->core_chipsel = prop;
554 		else
555 			return ERR_PTR(-EINVAL);
556 
557 		if (!device_property_read_u32(&pdev->dev,
558 					      "ti,davinci-mask-ale", &prop))
559 			pdata->mask_ale = prop;
560 		if (!device_property_read_u32(&pdev->dev,
561 					      "ti,davinci-mask-cle", &prop))
562 			pdata->mask_cle = prop;
563 		if (!device_property_read_u32(&pdev->dev,
564 					      "ti,davinci-mask-chipsel", &prop))
565 			pdata->mask_chipsel = prop;
566 		if (!device_property_read_string(&pdev->dev,
567 						 "ti,davinci-ecc-mode",
568 						 &mode)) {
569 			if (!strncmp("none", mode, 4))
570 				pdata->engine_type = NAND_ECC_ENGINE_TYPE_NONE;
571 			if (!strncmp("soft", mode, 4))
572 				pdata->engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
573 			if (!strncmp("hw", mode, 2))
574 				pdata->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
575 			if (!strncmp("on-die", mode, 6))
576 				pdata->engine_type = NAND_ECC_ENGINE_TYPE_ON_DIE;
577 		}
578 		if (!device_property_read_u32(&pdev->dev,
579 					      "ti,davinci-ecc-bits", &prop))
580 			pdata->ecc_bits = prop;
581 
582 		if (!device_property_read_u32(&pdev->dev,
583 					      "ti,davinci-nand-buswidth",
584 					      &prop) && prop == 16)
585 			pdata->options |= NAND_BUSWIDTH_16;
586 
587 		if (device_property_read_bool(&pdev->dev,
588 					      "ti,davinci-nand-use-bbt"))
589 			pdata->bbt_options = NAND_BBT_USE_FLASH;
590 
591 		/*
592 		 * Since kernel v4.8, this driver has been fixed to enable
593 		 * use of 4-bit hardware ECC with subpages and verified on
594 		 * TI's keystone EVMs (K2L, K2HK and K2E).
595 		 * However, in the interest of not breaking systems using
596 		 * existing UBI partitions, sub-page writes are not being
597 		 * (re)enabled. If you want to use subpage writes on Keystone
598 		 * platforms (i.e. do not have any existing UBI partitions),
599 		 * then use "ti,davinci-nand" as the compatible in your
600 		 * device-tree file.
601 		 */
602 		if (device_is_compatible(&pdev->dev, "ti,keystone-nand"))
603 			pdata->options |= NAND_NO_SUBPAGE_WRITE;
604 	}
605 
606 	return dev_get_platdata(&pdev->dev);
607 }
608 #else
609 static struct davinci_nand_pdata *
610 nand_davinci_get_pdata(struct platform_device *pdev)
611 {
612 	return dev_get_platdata(&pdev->dev);
613 }
614 #endif
615 
616 static int davinci_nand_attach_chip(struct nand_chip *chip)
617 {
618 	struct mtd_info *mtd = nand_to_mtd(chip);
619 	struct davinci_nand_info *info = to_davinci_nand(mtd);
620 	struct davinci_nand_pdata *pdata = nand_davinci_get_pdata(info->pdev);
621 	int ret = 0;
622 
623 	if (IS_ERR(pdata))
624 		return PTR_ERR(pdata);
625 
626 	/* Use board-specific ECC config */
627 	chip->ecc.engine_type = pdata->engine_type;
628 	chip->ecc.placement = pdata->ecc_placement;
629 
630 	switch (chip->ecc.engine_type) {
631 	case NAND_ECC_ENGINE_TYPE_NONE:
632 	case NAND_ECC_ENGINE_TYPE_ON_DIE:
633 		pdata->ecc_bits = 0;
634 		break;
635 	case NAND_ECC_ENGINE_TYPE_SOFT:
636 		pdata->ecc_bits = 0;
637 		/*
638 		 * This driver expects Hamming based ECC when engine_type is set
639 		 * to NAND_ECC_ENGINE_TYPE_SOFT. Force ecc.algo to
640 		 * NAND_ECC_ALGO_HAMMING to avoid adding an extra ->ecc_algo
641 		 * field to davinci_nand_pdata.
642 		 */
643 		chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
644 		break;
645 	case NAND_ECC_ENGINE_TYPE_ON_HOST:
646 		if (pdata->ecc_bits == 4) {
647 			int chunks = mtd->writesize / 512;
648 
649 			if (!chunks || mtd->oobsize < 16) {
650 				dev_dbg(&info->pdev->dev, "too small\n");
651 				return -EINVAL;
652 			}
653 
654 			/*
655 			 * No sanity checks:  CPUs must support this,
656 			 * and the chips may not use NAND_BUSWIDTH_16.
657 			 */
658 
659 			/* No sharing 4-bit hardware between chipselects yet */
660 			spin_lock_irq(&davinci_nand_lock);
661 			if (ecc4_busy)
662 				ret = -EBUSY;
663 			else
664 				ecc4_busy = true;
665 			spin_unlock_irq(&davinci_nand_lock);
666 
667 			if (ret == -EBUSY)
668 				return ret;
669 
670 			chip->ecc.calculate = nand_davinci_calculate_4bit;
671 			chip->ecc.correct = nand_davinci_correct_4bit;
672 			chip->ecc.hwctl = nand_davinci_hwctl_4bit;
673 			chip->ecc.bytes = 10;
674 			chip->ecc.options = NAND_ECC_GENERIC_ERASED_CHECK;
675 			chip->ecc.algo = NAND_ECC_ALGO_BCH;
676 
677 			/*
678 			 * Update ECC layout if needed ... for 1-bit HW ECC, the
679 			 * default is OK, but it allocates 6 bytes when only 3
680 			 * are needed (for each 512 bytes). For 4-bit HW ECC,
681 			 * the default is not usable: 10 bytes needed, not 6.
682 			 *
683 			 * For small page chips, preserve the manufacturer's
684 			 * badblock marking data ... and make sure a flash BBT
685 			 * table marker fits in the free bytes.
686 			 */
687 			if (chunks == 1) {
688 				mtd_set_ooblayout(mtd,
689 						  &hwecc4_small_ooblayout_ops);
690 			} else if (chunks == 4 || chunks == 8) {
691 				chip->ecc.read_page = nand_read_page_hwecc_oob_first;
692 
693 				if (chip->options & NAND_IS_BOOT_MEDIUM)
694 					mtd_set_ooblayout(mtd, &hwecc4_large_ooblayout_ops);
695 				else
696 					mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout());
697 			} else {
698 				return -EIO;
699 			}
700 		} else {
701 			/* 1bit ecc hamming */
702 			chip->ecc.calculate = nand_davinci_calculate_1bit;
703 			chip->ecc.correct = nand_davinci_correct_1bit;
704 			chip->ecc.hwctl = nand_davinci_hwctl_1bit;
705 			chip->ecc.bytes = 3;
706 			chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
707 		}
708 		chip->ecc.size = 512;
709 		chip->ecc.strength = pdata->ecc_bits;
710 		break;
711 	default:
712 		return -EINVAL;
713 	}
714 
715 	return ret;
716 }
717 
718 static void nand_davinci_data_in(struct davinci_nand_info *info, void *buf,
719 				 unsigned int len, bool force_8bit)
720 {
721 	u32 alignment = ((uintptr_t)buf | len) & 3;
722 
723 	if (force_8bit || (alignment & 1))
724 		ioread8_rep(info->current_cs, buf, len);
725 	else if (alignment & 3)
726 		ioread16_rep(info->current_cs, buf, len >> 1);
727 	else
728 		ioread32_rep(info->current_cs, buf, len >> 2);
729 }
730 
731 static void nand_davinci_data_out(struct davinci_nand_info *info,
732 				  const void *buf, unsigned int len,
733 				  bool force_8bit)
734 {
735 	u32 alignment = ((uintptr_t)buf | len) & 3;
736 
737 	if (force_8bit || (alignment & 1))
738 		iowrite8_rep(info->current_cs, buf, len);
739 	else if (alignment & 3)
740 		iowrite16_rep(info->current_cs, buf, len >> 1);
741 	else
742 		iowrite32_rep(info->current_cs, buf, len >> 2);
743 }
744 
745 static int davinci_nand_exec_instr(struct davinci_nand_info *info,
746 				   const struct nand_op_instr *instr)
747 {
748 	unsigned int i, timeout_us;
749 	u32 status;
750 	int ret;
751 
752 	switch (instr->type) {
753 	case NAND_OP_CMD_INSTR:
754 		iowrite8(instr->ctx.cmd.opcode,
755 			 info->current_cs + info->mask_cle);
756 		break;
757 
758 	case NAND_OP_ADDR_INSTR:
759 		for (i = 0; i < instr->ctx.addr.naddrs; i++) {
760 			iowrite8(instr->ctx.addr.addrs[i],
761 				 info->current_cs + info->mask_ale);
762 		}
763 		break;
764 
765 	case NAND_OP_DATA_IN_INSTR:
766 		nand_davinci_data_in(info, instr->ctx.data.buf.in,
767 				     instr->ctx.data.len,
768 				     instr->ctx.data.force_8bit);
769 		break;
770 
771 	case NAND_OP_DATA_OUT_INSTR:
772 		nand_davinci_data_out(info, instr->ctx.data.buf.out,
773 				      instr->ctx.data.len,
774 				      instr->ctx.data.force_8bit);
775 		break;
776 
777 	case NAND_OP_WAITRDY_INSTR:
778 		timeout_us = instr->ctx.waitrdy.timeout_ms * 1000;
779 		ret = readl_relaxed_poll_timeout(info->base + NANDFSR_OFFSET,
780 						 status, status & BIT(0), 5,
781 						 timeout_us);
782 		if (ret)
783 			return ret;
784 
785 		break;
786 	}
787 
788 	if (instr->delay_ns) {
789 		/* Dummy read to be sure that command is sent before ndelay starts */
790 		davinci_nand_readl(info, 0);
791 		ndelay(instr->delay_ns);
792 	}
793 
794 	return 0;
795 }
796 
797 static int davinci_nand_exec_op(struct nand_chip *chip,
798 				const struct nand_operation *op,
799 				bool check_only)
800 {
801 	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
802 	unsigned int i;
803 
804 	if (check_only)
805 		return 0;
806 
807 	info->current_cs = info->vaddr + (op->cs * info->mask_chipsel);
808 
809 	for (i = 0; i < op->ninstrs; i++) {
810 		int ret;
811 
812 		ret = davinci_nand_exec_instr(info, &op->instrs[i]);
813 		if (ret)
814 			return ret;
815 	}
816 
817 	return 0;
818 }
819 
820 #define TO_CYCLES(ps, period_ns) (DIV_ROUND_UP((ps) / 1000, (period_ns)))
821 
822 static int davinci_nand_setup_interface(struct nand_chip *chip, int chipnr,
823 					const struct nand_interface_config *conf)
824 {
825 	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
826 	const struct nand_sdr_timings *sdr;
827 	struct aemif_cs_timings timings;
828 	s32 cfg, min, cyc_ns;
829 	int ret;
830 
831 	cyc_ns = 1000000000 / clk_get_rate(info->clk);
832 
833 	sdr = nand_get_sdr_timings(conf);
834 	if (IS_ERR(sdr))
835 		return PTR_ERR(sdr);
836 
837 	cfg = TO_CYCLES(sdr->tCLR_min, cyc_ns) - 1;
838 	timings.rsetup = cfg > 0 ? cfg : 0;
839 
840 	cfg = max_t(s32, TO_CYCLES(sdr->tREA_max + MAX_TSU_PS, cyc_ns),
841 		    TO_CYCLES(sdr->tRP_min, cyc_ns)) - 1;
842 	timings.rstrobe = cfg > 0 ? cfg : 0;
843 
844 	min = TO_CYCLES(sdr->tCEA_max + MAX_TSU_PS, cyc_ns) - 2;
845 	while ((s32)(timings.rsetup + timings.rstrobe) < min)
846 		timings.rstrobe++;
847 
848 	cfg = TO_CYCLES((s32)(MAX_TH_PS - sdr->tCHZ_max), cyc_ns) - 1;
849 	timings.rhold = cfg > 0 ? cfg : 0;
850 
851 	min = TO_CYCLES(sdr->tRC_min, cyc_ns) - 3;
852 	while ((s32)(timings.rsetup + timings.rstrobe + timings.rhold) < min)
853 		timings.rhold++;
854 
855 	cfg = TO_CYCLES((s32)(sdr->tRHZ_max - (timings.rhold + 1) * cyc_ns * 1000), cyc_ns);
856 	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCHZ_max, cyc_ns)) - 1;
857 	timings.ta = cfg > 0 ? cfg : 0;
858 
859 	cfg = TO_CYCLES(sdr->tWP_min, cyc_ns) - 1;
860 	timings.wstrobe = cfg > 0 ? cfg : 0;
861 
862 	cfg = max_t(s32, TO_CYCLES(sdr->tCLS_min, cyc_ns), TO_CYCLES(sdr->tALS_min, cyc_ns));
863 	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCS_min, cyc_ns)) - 1;
864 	timings.wsetup = cfg > 0 ? cfg : 0;
865 
866 	min = TO_CYCLES(sdr->tDS_min, cyc_ns) - 2;
867 	while ((s32)(timings.wsetup + timings.wstrobe) < min)
868 		timings.wstrobe++;
869 
870 	cfg = max_t(s32, TO_CYCLES(sdr->tCLH_min, cyc_ns), TO_CYCLES(sdr->tALH_min, cyc_ns));
871 	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCH_min, cyc_ns));
872 	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tDH_min, cyc_ns)) - 1;
873 	timings.whold = cfg > 0 ? cfg : 0;
874 
875 	min = TO_CYCLES(sdr->tWC_min, cyc_ns) - 2;
876 	while ((s32)(timings.wsetup + timings.wstrobe + timings.whold) < min)
877 		timings.whold++;
878 
879 	dev_dbg(&info->pdev->dev, "RSETUP %x RSTROBE %x RHOLD %x\n",
880 		timings.rsetup, timings.rstrobe, timings.rhold);
881 	dev_dbg(&info->pdev->dev, "TA %x\n", timings.ta);
882 	dev_dbg(&info->pdev->dev, "WSETUP %x WSTROBE %x WHOLD %x\n",
883 		timings.wsetup, timings.wstrobe, timings.whold);
884 
885 	ret = aemif_check_cs_timings(&timings);
886 	if (ret || chipnr == NAND_DATA_IFACE_CHECK_ONLY)
887 		return ret;
888 
889 	return aemif_set_cs_timings(info->aemif, info->core_chipsel, &timings);
890 }
891 
892 static const struct nand_controller_ops davinci_nand_controller_ops = {
893 	.attach_chip = davinci_nand_attach_chip,
894 	.exec_op = davinci_nand_exec_op,
895 	.setup_interface = davinci_nand_setup_interface,
896 };
897 
898 static int nand_davinci_probe(struct platform_device *pdev)
899 {
900 	struct davinci_nand_pdata	*pdata;
901 	struct davinci_nand_info	*info;
902 	struct resource			*res1;
903 	struct resource			*res2;
904 	void __iomem			*vaddr;
905 	void __iomem			*base;
906 	int				ret;
907 	uint32_t			val;
908 	struct mtd_info			*mtd;
909 
910 	pdata = nand_davinci_get_pdata(pdev);
911 	if (IS_ERR(pdata))
912 		return PTR_ERR(pdata);
913 
914 	/* insist on board-specific configuration */
915 	if (!pdata)
916 		return -ENODEV;
917 
918 	/* which external chipselect will we be managing? */
919 	if (pdata->core_chipsel > 3)
920 		return -ENODEV;
921 
922 	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
923 	if (!info)
924 		return -ENOMEM;
925 
926 	platform_set_drvdata(pdev, info);
927 
928 	res1 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
929 	res2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
930 	if (!res1 || !res2) {
931 		dev_err(&pdev->dev, "resource missing\n");
932 		return -EINVAL;
933 	}
934 
935 	vaddr = devm_ioremap_resource(&pdev->dev, res1);
936 	if (IS_ERR(vaddr))
937 		return PTR_ERR(vaddr);
938 
939 	/*
940 	 * This registers range is used to setup NAND settings. In case with
941 	 * TI AEMIF driver, the same memory address range is requested already
942 	 * by AEMIF, so we cannot request it twice, just ioremap.
943 	 * The AEMIF and NAND drivers not use the same registers in this range.
944 	 */
945 	base = devm_ioremap(&pdev->dev, res2->start, resource_size(res2));
946 	if (!base) {
947 		dev_err(&pdev->dev, "ioremap failed for resource %pR\n", res2);
948 		return -EADDRNOTAVAIL;
949 	}
950 
951 	info->clk = devm_clk_get_enabled(&pdev->dev, "aemif");
952 	if (IS_ERR(info->clk))
953 		return dev_err_probe(&pdev->dev, PTR_ERR(info->clk), "failed to get clock");
954 
955 	info->pdev		= pdev;
956 	info->base		= base;
957 	info->vaddr		= vaddr;
958 	info->aemif		= dev_get_drvdata(pdev->dev.parent);
959 
960 	mtd			= nand_to_mtd(&info->chip);
961 	mtd->dev.parent		= &pdev->dev;
962 	nand_set_flash_node(&info->chip, pdev->dev.of_node);
963 
964 	/* options such as NAND_BBT_USE_FLASH */
965 	info->chip.bbt_options	= pdata->bbt_options;
966 	/* options such as 16-bit widths */
967 	info->chip.options	= pdata->options;
968 	info->chip.bbt_td	= pdata->bbt_td;
969 	info->chip.bbt_md	= pdata->bbt_md;
970 
971 	info->current_cs	= info->vaddr;
972 	info->core_chipsel	= pdata->core_chipsel;
973 	info->mask_chipsel	= pdata->mask_chipsel;
974 
975 	/* use nandboot-capable ALE/CLE masks by default */
976 	info->mask_ale		= pdata->mask_ale ? : MASK_ALE;
977 	info->mask_cle		= pdata->mask_cle ? : MASK_CLE;
978 
979 	spin_lock_irq(&davinci_nand_lock);
980 
981 	/* put CSxNAND into NAND mode */
982 	val = davinci_nand_readl(info, NANDFCR_OFFSET);
983 	val |= BIT(info->core_chipsel);
984 	davinci_nand_writel(info, NANDFCR_OFFSET, val);
985 
986 	spin_unlock_irq(&davinci_nand_lock);
987 
988 	/* Scan to find existence of the device(s) */
989 	nand_controller_init(&info->controller);
990 	info->controller.ops = &davinci_nand_controller_ops;
991 	info->chip.controller = &info->controller;
992 	ret = nand_scan(&info->chip, pdata->mask_chipsel ? 2 : 1);
993 	if (ret < 0) {
994 		dev_dbg(&pdev->dev, "no NAND chip(s) found\n");
995 		return ret;
996 	}
997 
998 	if (pdata->parts)
999 		ret = mtd_device_register(mtd, pdata->parts, pdata->nr_parts);
1000 	else
1001 		ret = mtd_device_register(mtd, NULL, 0);
1002 	if (ret < 0)
1003 		goto err_cleanup_nand;
1004 
1005 	val = davinci_nand_readl(info, NRCSR_OFFSET);
1006 	dev_info(&pdev->dev, "controller rev. %d.%d\n",
1007 	       (val >> 8) & 0xff, val & 0xff);
1008 
1009 	return 0;
1010 
1011 err_cleanup_nand:
1012 	nand_cleanup(&info->chip);
1013 
1014 	return ret;
1015 }
1016 
1017 static void nand_davinci_remove(struct platform_device *pdev)
1018 {
1019 	struct davinci_nand_info *info = platform_get_drvdata(pdev);
1020 	struct nand_chip *chip = &info->chip;
1021 	int ret;
1022 
1023 	spin_lock_irq(&davinci_nand_lock);
1024 	if (chip->ecc.placement == NAND_ECC_PLACEMENT_INTERLEAVED)
1025 		ecc4_busy = false;
1026 	spin_unlock_irq(&davinci_nand_lock);
1027 
1028 	ret = mtd_device_unregister(nand_to_mtd(chip));
1029 	WARN_ON(ret);
1030 	nand_cleanup(chip);
1031 }
1032 
1033 static struct platform_driver nand_davinci_driver = {
1034 	.probe		= nand_davinci_probe,
1035 	.remove		= nand_davinci_remove,
1036 	.driver		= {
1037 		.name	= "davinci_nand",
1038 		.of_match_table = of_match_ptr(davinci_nand_of_match),
1039 	},
1040 };
1041 MODULE_ALIAS("platform:davinci_nand");
1042 
1043 module_platform_driver(nand_davinci_driver);
1044 
1045 MODULE_LICENSE("GPL");
1046 MODULE_AUTHOR("Texas Instruments");
1047 MODULE_DESCRIPTION("Davinci NAND flash driver");
1048 
1049