1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
4 */
5
6 #include <linux/module.h>
7 #include <linux/platform_device.h>
8 #include <linux/clk.h>
9 #include <linux/hw_bitfield.h>
10 #include <linux/mmc/host.h>
11 #include <linux/of_address.h>
12 #include <linux/mmc/slot-gpio.h>
13 #include <linux/pm_runtime.h>
14 #include <linux/slab.h>
15
16 #include "dw_mmc.h"
17 #include "dw_mmc-pltfm.h"
18
19 #define RK3288_CLKGEN_DIV 2
20 #define SDMMC_TIMING_CON0 0x130
21 #define SDMMC_TIMING_CON1 0x134
22 #define ROCKCHIP_MMC_DELAY_SEL BIT(10)
23 #define ROCKCHIP_MMC_DEGREE_MASK 0x3
24 #define ROCKCHIP_MMC_DEGREE_OFFSET 1
25 #define ROCKCHIP_MMC_DELAYNUM_OFFSET 2
26 #define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET)
27 #define ROCKCHIP_MMC_DELAY_ELEMENT_PSEC 60
28
29 static const unsigned int freqs[] = { 100000, 200000, 300000, 400000 };
30
31 struct dw_mci_rockchip_priv_data {
32 struct clk *drv_clk;
33 struct clk *sample_clk;
34 int default_sample_phase;
35 int num_phases;
36 bool internal_phase;
37 };
38
39 /*
40 * Each fine delay is between 44ps-77ps. Assume each fine delay is 60ps to
41 * simplify calculations. So 45degs could be anywhere between 33deg and 57.8deg.
42 */
rockchip_mmc_get_internal_phase(struct dw_mci * host,bool sample)43 static int rockchip_mmc_get_internal_phase(struct dw_mci *host, bool sample)
44 {
45 unsigned long rate = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV;
46 u32 raw_value;
47 u16 degrees;
48 u32 delay_num = 0;
49
50 /* Constant signal, no measurable phase shift */
51 if (!rate)
52 return 0;
53
54 if (sample)
55 raw_value = mci_readl(host, TIMING_CON1);
56 else
57 raw_value = mci_readl(host, TIMING_CON0);
58
59 raw_value >>= ROCKCHIP_MMC_DEGREE_OFFSET;
60 degrees = (raw_value & ROCKCHIP_MMC_DEGREE_MASK) * 90;
61
62 if (raw_value & ROCKCHIP_MMC_DELAY_SEL) {
63 /* degrees/delaynum * 1000000 */
64 unsigned long factor = (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10) *
65 36 * (rate / 10000);
66
67 delay_num = (raw_value & ROCKCHIP_MMC_DELAYNUM_MASK);
68 delay_num >>= ROCKCHIP_MMC_DELAYNUM_OFFSET;
69 degrees += DIV_ROUND_CLOSEST(delay_num * factor, 1000000);
70 }
71
72 return degrees % 360;
73 }
74
rockchip_mmc_get_phase(struct dw_mci * host,bool sample)75 static int rockchip_mmc_get_phase(struct dw_mci *host, bool sample)
76 {
77 struct dw_mci_rockchip_priv_data *priv = host->priv;
78 struct clk *clock = sample ? priv->sample_clk : priv->drv_clk;
79
80 if (priv->internal_phase)
81 return rockchip_mmc_get_internal_phase(host, sample);
82 else
83 return clk_get_phase(clock);
84 }
85
rockchip_mmc_set_internal_phase(struct dw_mci * host,bool sample,int degrees)86 static int rockchip_mmc_set_internal_phase(struct dw_mci *host, bool sample, int degrees)
87 {
88 unsigned long rate = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV;
89 u8 nineties, remainder;
90 u8 delay_num;
91 u32 raw_value;
92 u32 delay;
93
94 /*
95 * The below calculation is based on the output clock from
96 * MMC host to the card, which expects the phase clock inherits
97 * the clock rate from its parent, namely the output clock
98 * provider of MMC host. However, things may go wrong if
99 * (1) It is orphan.
100 * (2) It is assigned to the wrong parent.
101 *
102 * This check help debug the case (1), which seems to be the
103 * most likely problem we often face and which makes it difficult
104 * for people to debug unstable mmc tuning results.
105 */
106 if (!rate) {
107 dev_err(host->dev, "%s: invalid clk rate\n", __func__);
108 return -EINVAL;
109 }
110
111 nineties = degrees / 90;
112 remainder = (degrees % 90);
113
114 /*
115 * Due to the inexact nature of the "fine" delay, we might
116 * actually go non-monotonic. We don't go _too_ monotonic
117 * though, so we should be OK. Here are options of how we may
118 * work:
119 *
120 * Ideally we end up with:
121 * 1.0, 2.0, ..., 69.0, 70.0, ..., 89.0, 90.0
122 *
123 * On one extreme (if delay is actually 44ps):
124 * .73, 1.5, ..., 50.6, 51.3, ..., 65.3, 90.0
125 * The other (if delay is actually 77ps):
126 * 1.3, 2.6, ..., 88.6. 89.8, ..., 114.0, 90
127 *
128 * It's possible we might make a delay that is up to 25
129 * degrees off from what we think we're making. That's OK
130 * though because we should be REALLY far from any bad range.
131 */
132
133 /*
134 * Convert to delay; do a little extra work to make sure we
135 * don't overflow 32-bit / 64-bit numbers.
136 */
137 delay = 10000000; /* PSECS_PER_SEC / 10000 / 10 */
138 delay *= remainder;
139 delay = DIV_ROUND_CLOSEST(delay,
140 (rate / 1000) * 36 *
141 (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10));
142
143 delay_num = (u8) min_t(u32, delay, 255);
144
145 raw_value = delay_num ? ROCKCHIP_MMC_DELAY_SEL : 0;
146 raw_value |= delay_num << ROCKCHIP_MMC_DELAYNUM_OFFSET;
147 raw_value |= nineties;
148
149 if (sample)
150 mci_writel(host, TIMING_CON1,
151 FIELD_PREP_WM16(GENMASK(11, 1), raw_value));
152 else
153 mci_writel(host, TIMING_CON0,
154 FIELD_PREP_WM16(GENMASK(11, 1), raw_value));
155
156 dev_dbg(host->dev, "set %s_phase(%d) delay_nums=%u actual_degrees=%d\n",
157 sample ? "sample" : "drv", degrees, delay_num,
158 rockchip_mmc_get_phase(host, sample)
159 );
160
161 return 0;
162 }
163
rockchip_mmc_set_phase(struct dw_mci * host,bool sample,int degrees)164 static int rockchip_mmc_set_phase(struct dw_mci *host, bool sample, int degrees)
165 {
166 struct dw_mci_rockchip_priv_data *priv = host->priv;
167 struct clk *clock = sample ? priv->sample_clk : priv->drv_clk;
168
169 if (priv->internal_phase)
170 return rockchip_mmc_set_internal_phase(host, sample, degrees);
171 else
172 return clk_set_phase(clock, degrees);
173 }
174
dw_mci_rk3288_set_ios(struct dw_mci * host,struct mmc_ios * ios)175 static void dw_mci_rk3288_set_ios(struct dw_mci *host, struct mmc_ios *ios)
176 {
177 struct dw_mci_rockchip_priv_data *priv = host->priv;
178 int ret;
179 unsigned int cclkin;
180 u32 bus_hz;
181
182 if (ios->clock == 0)
183 return;
184
185 /*
186 * cclkin: source clock of mmc controller
187 * bus_hz: card interface clock generated by CLKGEN
188 * bus_hz = cclkin / RK3288_CLKGEN_DIV
189 * ios->clock = (div == 0) ? bus_hz : (bus_hz / (2 * div))
190 *
191 * Note: div can only be 0 or 1, but div must be set to 1 for eMMC
192 * DDR52 8-bit mode.
193 */
194 if (ios->bus_width == MMC_BUS_WIDTH_8 &&
195 ios->timing == MMC_TIMING_MMC_DDR52)
196 cclkin = 2 * ios->clock * RK3288_CLKGEN_DIV;
197 else
198 cclkin = ios->clock * RK3288_CLKGEN_DIV;
199
200 ret = clk_set_rate(host->ciu_clk, cclkin);
201 if (ret)
202 dev_warn(host->dev, "failed to set rate %uHz err: %d\n", cclkin, ret);
203
204 bus_hz = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV;
205 if (bus_hz != host->bus_hz) {
206 host->bus_hz = bus_hz;
207 /* force dw_mci_setup_bus() */
208 host->current_speed = 0;
209 }
210
211 /* Make sure we use phases which we can enumerate with */
212 if (!IS_ERR(priv->sample_clk) && ios->timing <= MMC_TIMING_SD_HS)
213 rockchip_mmc_set_phase(host, true, priv->default_sample_phase);
214
215 /*
216 * Set the drive phase offset based on speed mode to achieve hold times.
217 *
218 * NOTE: this is _not_ a value that is dynamically tuned and is also
219 * _not_ a value that will vary from board to board. It is a value
220 * that could vary between different SoC models if they had massively
221 * different output clock delays inside their dw_mmc IP block (delay_o),
222 * but since it's OK to overshoot a little we don't need to do complex
223 * calculations and can pick values that will just work for everyone.
224 *
225 * When picking values we'll stick with picking 0/90/180/270 since
226 * those can be made very accurately on all known Rockchip SoCs.
227 *
228 * Note that these values match values from the DesignWare Databook
229 * tables for the most part except for SDR12 and "ID mode". For those
230 * two modes the databook calculations assume a clock in of 50MHz. As
231 * seen above, we always use a clock in rate that is exactly the
232 * card's input clock (times RK3288_CLKGEN_DIV, but that gets divided
233 * back out before the controller sees it).
234 *
235 * From measurement of a single device, it appears that delay_o is
236 * about .5 ns. Since we try to leave a bit of margin, it's expected
237 * that numbers here will be fine even with much larger delay_o
238 * (the 1.4 ns assumed by the DesignWare Databook would result in the
239 * same results, for instance).
240 */
241 if (!IS_ERR(priv->drv_clk)) {
242 int phase;
243
244 /*
245 * In almost all cases a 90 degree phase offset will provide
246 * sufficient hold times across all valid input clock rates
247 * assuming delay_o is not absurd for a given SoC. We'll use
248 * that as a default.
249 */
250 phase = 90;
251
252 switch (ios->timing) {
253 case MMC_TIMING_MMC_DDR52:
254 /*
255 * Since clock in rate with MMC_DDR52 is doubled when
256 * bus width is 8 we need to double the phase offset
257 * to get the same timings.
258 */
259 if (ios->bus_width == MMC_BUS_WIDTH_8)
260 phase = 180;
261 break;
262 case MMC_TIMING_UHS_SDR104:
263 case MMC_TIMING_MMC_HS200:
264 /*
265 * In the case of 150 MHz clock (typical max for
266 * Rockchip SoCs), 90 degree offset will add a delay
267 * of 1.67 ns. That will meet min hold time of .8 ns
268 * as long as clock output delay is < .87 ns. On
269 * SoCs measured this seems to be OK, but it doesn't
270 * hurt to give margin here, so we use 180.
271 */
272 phase = 180;
273 break;
274 }
275
276 rockchip_mmc_set_phase(host, false, phase);
277 }
278 }
279
280 #define TUNING_ITERATION_TO_PHASE(i, num_phases) \
281 (DIV_ROUND_UP((i) * 360, num_phases))
282
dw_mci_rk3288_execute_tuning(struct dw_mci_slot * slot,u32 opcode)283 static int dw_mci_rk3288_execute_tuning(struct dw_mci_slot *slot, u32 opcode)
284 {
285 struct dw_mci *host = slot->host;
286 struct dw_mci_rockchip_priv_data *priv = host->priv;
287 struct mmc_host *mmc = slot->mmc;
288 int ret = 0;
289 int i;
290 bool v, prev_v = 0, first_v;
291 struct range_t {
292 int start;
293 int end; /* inclusive */
294 };
295 struct range_t *ranges;
296 unsigned int range_count = 0;
297 int longest_range_len = -1;
298 int longest_range = -1;
299 int middle_phase;
300 int phase;
301
302 if (IS_ERR(priv->sample_clk)) {
303 dev_err(host->dev, "Tuning clock (sample_clk) not defined.\n");
304 return -EIO;
305 }
306
307 ranges = kmalloc_array(priv->num_phases / 2 + 1,
308 sizeof(*ranges), GFP_KERNEL);
309 if (!ranges)
310 return -ENOMEM;
311
312 /* Try each phase and extract good ranges */
313 for (i = 0; i < priv->num_phases; ) {
314 rockchip_mmc_set_phase(host, true,
315 TUNING_ITERATION_TO_PHASE(
316 i,
317 priv->num_phases));
318
319 v = !mmc_send_tuning(mmc, opcode, NULL);
320
321 if (i == 0)
322 first_v = v;
323
324 if ((!prev_v) && v) {
325 range_count++;
326 ranges[range_count-1].start = i;
327 }
328 if (v) {
329 ranges[range_count-1].end = i;
330 i++;
331 } else if (i == priv->num_phases - 1) {
332 /* No extra skipping rules if we're at the end */
333 i++;
334 } else {
335 /*
336 * No need to check too close to an invalid
337 * one since testing bad phases is slow. Skip
338 * 20 degrees.
339 */
340 i += DIV_ROUND_UP(20 * priv->num_phases, 360);
341
342 /* Always test the last one */
343 if (i >= priv->num_phases)
344 i = priv->num_phases - 1;
345 }
346
347 prev_v = v;
348 }
349
350 if (range_count == 0) {
351 dev_warn(host->dev, "All phases bad!");
352 ret = -EIO;
353 goto free;
354 }
355
356 /* wrap around case, merge the end points */
357 if ((range_count > 1) && first_v && v) {
358 ranges[0].start = ranges[range_count-1].start;
359 range_count--;
360 }
361
362 if (ranges[0].start == 0 && ranges[0].end == priv->num_phases - 1) {
363 rockchip_mmc_set_phase(host, true, priv->default_sample_phase);
364
365 dev_info(host->dev, "All phases work, using default phase %d.",
366 priv->default_sample_phase);
367 goto free;
368 }
369
370 /* Find the longest range */
371 for (i = 0; i < range_count; i++) {
372 int len = (ranges[i].end - ranges[i].start + 1);
373
374 if (len < 0)
375 len += priv->num_phases;
376
377 if (longest_range_len < len) {
378 longest_range_len = len;
379 longest_range = i;
380 }
381
382 dev_dbg(host->dev, "Good phase range %d-%d (%d len)\n",
383 TUNING_ITERATION_TO_PHASE(ranges[i].start,
384 priv->num_phases),
385 TUNING_ITERATION_TO_PHASE(ranges[i].end,
386 priv->num_phases),
387 len
388 );
389 }
390
391 dev_dbg(host->dev, "Best phase range %d-%d (%d len)\n",
392 TUNING_ITERATION_TO_PHASE(ranges[longest_range].start,
393 priv->num_phases),
394 TUNING_ITERATION_TO_PHASE(ranges[longest_range].end,
395 priv->num_phases),
396 longest_range_len
397 );
398
399 middle_phase = ranges[longest_range].start + longest_range_len / 2;
400 middle_phase %= priv->num_phases;
401 phase = TUNING_ITERATION_TO_PHASE(middle_phase, priv->num_phases);
402 dev_info(host->dev, "Successfully tuned phase to %d\n", phase);
403
404 rockchip_mmc_set_phase(host, true, phase);
405
406 free:
407 kfree(ranges);
408 return ret;
409 }
410
dw_mci_common_parse_dt(struct dw_mci * host)411 static int dw_mci_common_parse_dt(struct dw_mci *host)
412 {
413 struct device_node *np = host->dev->of_node;
414 struct dw_mci_rockchip_priv_data *priv;
415
416 priv = devm_kzalloc(host->dev, sizeof(*priv), GFP_KERNEL);
417 if (!priv)
418 return -ENOMEM;
419
420 if (of_property_read_u32(np, "rockchip,desired-num-phases",
421 &priv->num_phases))
422 priv->num_phases = 360;
423
424 if (of_property_read_u32(np, "rockchip,default-sample-phase",
425 &priv->default_sample_phase))
426 priv->default_sample_phase = 0;
427
428 host->priv = priv;
429
430 return 0;
431 }
432
dw_mci_rk3288_parse_dt(struct dw_mci * host)433 static int dw_mci_rk3288_parse_dt(struct dw_mci *host)
434 {
435 struct dw_mci_rockchip_priv_data *priv;
436 int err;
437
438 err = dw_mci_common_parse_dt(host);
439 if (err)
440 return err;
441
442 priv = host->priv;
443
444 priv->drv_clk = devm_clk_get(host->dev, "ciu-drive");
445 if (IS_ERR(priv->drv_clk))
446 dev_dbg(host->dev, "ciu-drive not available\n");
447
448 priv->sample_clk = devm_clk_get(host->dev, "ciu-sample");
449 if (IS_ERR(priv->sample_clk))
450 dev_dbg(host->dev, "ciu-sample not available\n");
451
452 priv->internal_phase = false;
453
454 return 0;
455 }
456
dw_mci_rk3576_parse_dt(struct dw_mci * host)457 static int dw_mci_rk3576_parse_dt(struct dw_mci *host)
458 {
459 struct dw_mci_rockchip_priv_data *priv;
460 int err = dw_mci_common_parse_dt(host);
461 if (err)
462 return err;
463
464 priv = host->priv;
465
466 priv->internal_phase = true;
467
468 return 0;
469 }
470
dw_mci_rockchip_init(struct dw_mci * host)471 static int dw_mci_rockchip_init(struct dw_mci *host)
472 {
473 int ret, i;
474
475 /* It is slot 8 on Rockchip SoCs */
476 host->sdio_id0 = 8;
477
478 if (of_device_is_compatible(host->dev->of_node, "rockchip,rk3288-dw-mshc")) {
479 host->bus_hz /= RK3288_CLKGEN_DIV;
480
481 /* clock driver will fail if the clock is less than the lowest source clock
482 * divided by the internal clock divider. Test for the lowest available
483 * clock and set the minimum freq to clock / clock divider.
484 */
485
486 for (i = 0; i < ARRAY_SIZE(freqs); i++) {
487 ret = clk_round_rate(host->ciu_clk, freqs[i] * RK3288_CLKGEN_DIV);
488 if (ret > 0) {
489 host->minimum_speed = ret / RK3288_CLKGEN_DIV;
490 break;
491 }
492 }
493 if (ret < 0)
494 dev_warn(host->dev, "no valid minimum freq: %d\n", ret);
495 }
496
497 return 0;
498 }
499
500 static const struct dw_mci_drv_data rk2928_drv_data = {
501 .init = dw_mci_rockchip_init,
502 };
503
504 static const struct dw_mci_drv_data rk3288_drv_data = {
505 .common_caps = MMC_CAP_CMD23,
506 .set_ios = dw_mci_rk3288_set_ios,
507 .execute_tuning = dw_mci_rk3288_execute_tuning,
508 .parse_dt = dw_mci_rk3288_parse_dt,
509 .init = dw_mci_rockchip_init,
510 };
511
512 static const struct dw_mci_drv_data rk3576_drv_data = {
513 .common_caps = MMC_CAP_CMD23,
514 .set_ios = dw_mci_rk3288_set_ios,
515 .execute_tuning = dw_mci_rk3288_execute_tuning,
516 .parse_dt = dw_mci_rk3576_parse_dt,
517 .init = dw_mci_rockchip_init,
518 };
519
520 static const struct of_device_id dw_mci_rockchip_match[] = {
521 { .compatible = "rockchip,rk2928-dw-mshc",
522 .data = &rk2928_drv_data },
523 { .compatible = "rockchip,rk3288-dw-mshc",
524 .data = &rk3288_drv_data },
525 { .compatible = "rockchip,rk3576-dw-mshc",
526 .data = &rk3576_drv_data },
527 {},
528 };
529 MODULE_DEVICE_TABLE(of, dw_mci_rockchip_match);
530
dw_mci_rockchip_probe(struct platform_device * pdev)531 static int dw_mci_rockchip_probe(struct platform_device *pdev)
532 {
533 const struct dw_mci_drv_data *drv_data;
534 const struct of_device_id *match;
535 int ret;
536
537 if (!pdev->dev.of_node)
538 return -ENODEV;
539
540 match = of_match_node(dw_mci_rockchip_match, pdev->dev.of_node);
541 drv_data = match->data;
542
543 pm_runtime_get_noresume(&pdev->dev);
544 pm_runtime_set_active(&pdev->dev);
545 pm_runtime_enable(&pdev->dev);
546 pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
547 pm_runtime_use_autosuspend(&pdev->dev);
548
549 ret = dw_mci_pltfm_register(pdev, drv_data);
550 if (ret) {
551 pm_runtime_disable(&pdev->dev);
552 pm_runtime_set_suspended(&pdev->dev);
553 pm_runtime_put_noidle(&pdev->dev);
554 return ret;
555 }
556
557 pm_runtime_put_autosuspend(&pdev->dev);
558
559 return 0;
560 }
561
dw_mci_rockchip_remove(struct platform_device * pdev)562 static void dw_mci_rockchip_remove(struct platform_device *pdev)
563 {
564 pm_runtime_get_sync(&pdev->dev);
565 pm_runtime_disable(&pdev->dev);
566 pm_runtime_put_noidle(&pdev->dev);
567
568 dw_mci_pltfm_remove(pdev);
569 }
570
571 static const struct dev_pm_ops dw_mci_rockchip_dev_pm_ops = {
572 SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
573 RUNTIME_PM_OPS(dw_mci_runtime_suspend, dw_mci_runtime_resume, NULL)
574 };
575
576 static struct platform_driver dw_mci_rockchip_pltfm_driver = {
577 .probe = dw_mci_rockchip_probe,
578 .remove = dw_mci_rockchip_remove,
579 .driver = {
580 .name = "dwmmc_rockchip",
581 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
582 .of_match_table = dw_mci_rockchip_match,
583 .pm = pm_ptr(&dw_mci_rockchip_dev_pm_ops),
584 },
585 };
586
587 module_platform_driver(dw_mci_rockchip_pltfm_driver);
588
589 MODULE_AUTHOR("Addy Ke <addy.ke@rock-chips.com>");
590 MODULE_DESCRIPTION("Rockchip Specific DW-MSHC Driver Extension");
591 MODULE_ALIAS("platform:dwmmc_rockchip");
592 MODULE_LICENSE("GPL v2");
593