1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd 4 */ 5 6 #include <linux/module.h> 7 #include <linux/platform_device.h> 8 #include <linux/clk.h> 9 #include <linux/hw_bitfield.h> 10 #include <linux/mmc/host.h> 11 #include <linux/of_address.h> 12 #include <linux/mmc/slot-gpio.h> 13 #include <linux/pm_runtime.h> 14 #include <linux/slab.h> 15 16 #include "dw_mmc.h" 17 #include "dw_mmc-pltfm.h" 18 19 #define RK3288_CLKGEN_DIV 2 20 #define SDMMC_TIMING_CON0 0x130 21 #define SDMMC_TIMING_CON1 0x134 22 #define SDMMC_MISC_CON 0x138 23 #define MEM_CLK_AUTOGATE_ENABLE BIT(5) 24 #define ROCKCHIP_MMC_DELAY_SEL BIT(10) 25 #define ROCKCHIP_MMC_DEGREE_MASK 0x3 26 #define ROCKCHIP_MMC_DEGREE_OFFSET 1 27 #define ROCKCHIP_MMC_DELAYNUM_OFFSET 2 28 #define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET) 29 #define ROCKCHIP_MMC_DELAY_ELEMENT_PSEC 60 30 31 static const unsigned int freqs[] = { 100000, 200000, 300000, 400000 }; 32 33 struct dw_mci_rockchip_priv_data { 34 struct clk *drv_clk; 35 struct clk *sample_clk; 36 int default_sample_phase; 37 int num_phases; 38 bool internal_phase; 39 }; 40 41 /* 42 * Each fine delay is between 44ps-77ps. Assume each fine delay is 60ps to 43 * simplify calculations. So 45degs could be anywhere between 33deg and 57.8deg. 44 */ 45 static int rockchip_mmc_get_internal_phase(struct dw_mci *host, bool sample) 46 { 47 unsigned long rate = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV; 48 u32 raw_value; 49 u16 degrees; 50 u32 delay_num = 0; 51 52 /* Constant signal, no measurable phase shift */ 53 if (!rate) 54 return 0; 55 56 if (sample) 57 raw_value = mci_readl(host, TIMING_CON1); 58 else 59 raw_value = mci_readl(host, TIMING_CON0); 60 61 raw_value >>= ROCKCHIP_MMC_DEGREE_OFFSET; 62 degrees = (raw_value & ROCKCHIP_MMC_DEGREE_MASK) * 90; 63 64 if (raw_value & ROCKCHIP_MMC_DELAY_SEL) { 65 /* degrees/delaynum * 1000000 */ 66 unsigned long factor = (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10) * 67 36 * (rate / 10000); 68 69 delay_num = (raw_value & ROCKCHIP_MMC_DELAYNUM_MASK); 70 delay_num >>= ROCKCHIP_MMC_DELAYNUM_OFFSET; 71 degrees += DIV_ROUND_CLOSEST(delay_num * factor, 1000000); 72 } 73 74 return degrees % 360; 75 } 76 77 static int rockchip_mmc_get_phase(struct dw_mci *host, bool sample) 78 { 79 struct dw_mci_rockchip_priv_data *priv = host->priv; 80 struct clk *clock = sample ? priv->sample_clk : priv->drv_clk; 81 82 if (priv->internal_phase) 83 return rockchip_mmc_get_internal_phase(host, sample); 84 else 85 return clk_get_phase(clock); 86 } 87 88 static int rockchip_mmc_set_internal_phase(struct dw_mci *host, bool sample, int degrees) 89 { 90 unsigned long rate = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV; 91 u8 nineties, remainder; 92 u8 delay_num; 93 u32 raw_value; 94 u32 delay; 95 96 /* 97 * The below calculation is based on the output clock from 98 * MMC host to the card, which expects the phase clock inherits 99 * the clock rate from its parent, namely the output clock 100 * provider of MMC host. However, things may go wrong if 101 * (1) It is orphan. 102 * (2) It is assigned to the wrong parent. 103 * 104 * This check help debug the case (1), which seems to be the 105 * most likely problem we often face and which makes it difficult 106 * for people to debug unstable mmc tuning results. 107 */ 108 if (!rate) { 109 dev_err(host->dev, "%s: invalid clk rate\n", __func__); 110 return -EINVAL; 111 } 112 113 nineties = degrees / 90; 114 remainder = (degrees % 90); 115 116 /* 117 * Due to the inexact nature of the "fine" delay, we might 118 * actually go non-monotonic. We don't go _too_ monotonic 119 * though, so we should be OK. Here are options of how we may 120 * work: 121 * 122 * Ideally we end up with: 123 * 1.0, 2.0, ..., 69.0, 70.0, ..., 89.0, 90.0 124 * 125 * On one extreme (if delay is actually 44ps): 126 * .73, 1.5, ..., 50.6, 51.3, ..., 65.3, 90.0 127 * The other (if delay is actually 77ps): 128 * 1.3, 2.6, ..., 88.6. 89.8, ..., 114.0, 90 129 * 130 * It's possible we might make a delay that is up to 25 131 * degrees off from what we think we're making. That's OK 132 * though because we should be REALLY far from any bad range. 133 */ 134 135 /* 136 * Convert to delay; do a little extra work to make sure we 137 * don't overflow 32-bit / 64-bit numbers. 138 */ 139 delay = 10000000; /* PSECS_PER_SEC / 10000 / 10 */ 140 delay *= remainder; 141 delay = DIV_ROUND_CLOSEST(delay, 142 (rate / 1000) * 36 * 143 (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10)); 144 145 delay_num = (u8) min_t(u32, delay, 255); 146 147 raw_value = delay_num ? ROCKCHIP_MMC_DELAY_SEL : 0; 148 raw_value |= delay_num << ROCKCHIP_MMC_DELAYNUM_OFFSET; 149 raw_value |= nineties; 150 151 if (sample) 152 mci_writel(host, TIMING_CON1, 153 FIELD_PREP_WM16(GENMASK(11, 1), raw_value)); 154 else 155 mci_writel(host, TIMING_CON0, 156 FIELD_PREP_WM16(GENMASK(11, 1), raw_value)); 157 158 dev_dbg(host->dev, "set %s_phase(%d) delay_nums=%u actual_degrees=%d\n", 159 sample ? "sample" : "drv", degrees, delay_num, 160 rockchip_mmc_get_phase(host, sample) 161 ); 162 163 return 0; 164 } 165 166 static int rockchip_mmc_set_phase(struct dw_mci *host, bool sample, int degrees) 167 { 168 struct dw_mci_rockchip_priv_data *priv = host->priv; 169 struct clk *clock = sample ? priv->sample_clk : priv->drv_clk; 170 171 if (priv->internal_phase) 172 return rockchip_mmc_set_internal_phase(host, sample, degrees); 173 else 174 return clk_set_phase(clock, degrees); 175 } 176 177 static void dw_mci_rk3288_set_ios(struct dw_mci *host, struct mmc_ios *ios) 178 { 179 struct dw_mci_rockchip_priv_data *priv = host->priv; 180 int ret; 181 unsigned int cclkin; 182 u32 bus_hz; 183 184 if (ios->clock == 0) 185 return; 186 187 /* 188 * cclkin: source clock of mmc controller 189 * bus_hz: card interface clock generated by CLKGEN 190 * bus_hz = cclkin / RK3288_CLKGEN_DIV 191 * ios->clock = (div == 0) ? bus_hz : (bus_hz / (2 * div)) 192 * 193 * Note: div can only be 0 or 1, but div must be set to 1 for eMMC 194 * DDR52 8-bit mode. 195 */ 196 if (ios->bus_width == MMC_BUS_WIDTH_8 && 197 ios->timing == MMC_TIMING_MMC_DDR52) 198 cclkin = 2 * ios->clock * RK3288_CLKGEN_DIV; 199 else 200 cclkin = ios->clock * RK3288_CLKGEN_DIV; 201 202 ret = clk_set_rate(host->ciu_clk, cclkin); 203 if (ret) 204 dev_warn(host->dev, "failed to set rate %uHz err: %d\n", cclkin, ret); 205 206 bus_hz = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV; 207 if (bus_hz != host->bus_hz) { 208 host->bus_hz = bus_hz; 209 /* force dw_mci_setup_bus() */ 210 host->current_speed = 0; 211 } 212 213 /* Make sure we use phases which we can enumerate with */ 214 if (!IS_ERR(priv->sample_clk) && ios->timing <= MMC_TIMING_SD_HS) 215 rockchip_mmc_set_phase(host, true, priv->default_sample_phase); 216 217 /* 218 * Set the drive phase offset based on speed mode to achieve hold times. 219 * 220 * NOTE: this is _not_ a value that is dynamically tuned and is also 221 * _not_ a value that will vary from board to board. It is a value 222 * that could vary between different SoC models if they had massively 223 * different output clock delays inside their dw_mmc IP block (delay_o), 224 * but since it's OK to overshoot a little we don't need to do complex 225 * calculations and can pick values that will just work for everyone. 226 * 227 * When picking values we'll stick with picking 0/90/180/270 since 228 * those can be made very accurately on all known Rockchip SoCs. 229 * 230 * Note that these values match values from the DesignWare Databook 231 * tables for the most part except for SDR12 and "ID mode". For those 232 * two modes the databook calculations assume a clock in of 50MHz. As 233 * seen above, we always use a clock in rate that is exactly the 234 * card's input clock (times RK3288_CLKGEN_DIV, but that gets divided 235 * back out before the controller sees it). 236 * 237 * From measurement of a single device, it appears that delay_o is 238 * about .5 ns. Since we try to leave a bit of margin, it's expected 239 * that numbers here will be fine even with much larger delay_o 240 * (the 1.4 ns assumed by the DesignWare Databook would result in the 241 * same results, for instance). 242 */ 243 if (!IS_ERR(priv->drv_clk)) { 244 int phase; 245 246 /* 247 * In almost all cases a 90 degree phase offset will provide 248 * sufficient hold times across all valid input clock rates 249 * assuming delay_o is not absurd for a given SoC. We'll use 250 * that as a default. 251 */ 252 phase = 90; 253 254 switch (ios->timing) { 255 case MMC_TIMING_MMC_DDR52: 256 /* 257 * Since clock in rate with MMC_DDR52 is doubled when 258 * bus width is 8 we need to double the phase offset 259 * to get the same timings. 260 */ 261 if (ios->bus_width == MMC_BUS_WIDTH_8) 262 phase = 180; 263 break; 264 case MMC_TIMING_UHS_SDR104: 265 case MMC_TIMING_MMC_HS200: 266 /* 267 * In the case of 150 MHz clock (typical max for 268 * Rockchip SoCs), 90 degree offset will add a delay 269 * of 1.67 ns. That will meet min hold time of .8 ns 270 * as long as clock output delay is < .87 ns. On 271 * SoCs measured this seems to be OK, but it doesn't 272 * hurt to give margin here, so we use 180. 273 */ 274 phase = 180; 275 break; 276 } 277 278 rockchip_mmc_set_phase(host, false, phase); 279 } 280 } 281 282 #define TUNING_ITERATION_TO_PHASE(i, num_phases) \ 283 (DIV_ROUND_UP((i) * 360, num_phases)) 284 285 static int dw_mci_rk3288_execute_tuning(struct dw_mci_slot *slot, u32 opcode) 286 { 287 struct dw_mci *host = slot->host; 288 struct dw_mci_rockchip_priv_data *priv = host->priv; 289 struct mmc_host *mmc = slot->mmc; 290 int ret = 0; 291 int i; 292 bool v, prev_v = 0, first_v; 293 struct range_t { 294 int start; 295 int end; /* inclusive */ 296 }; 297 struct range_t *ranges; 298 unsigned int range_count = 0; 299 int longest_range_len = -1; 300 int longest_range = -1; 301 int middle_phase; 302 int phase; 303 304 if (IS_ERR(priv->sample_clk)) { 305 dev_err(host->dev, "Tuning clock (sample_clk) not defined.\n"); 306 return -EIO; 307 } 308 309 ranges = kmalloc_array(priv->num_phases / 2 + 1, 310 sizeof(*ranges), GFP_KERNEL); 311 if (!ranges) 312 return -ENOMEM; 313 314 /* Try each phase and extract good ranges */ 315 for (i = 0; i < priv->num_phases; ) { 316 rockchip_mmc_set_phase(host, true, 317 TUNING_ITERATION_TO_PHASE( 318 i, 319 priv->num_phases)); 320 321 v = !mmc_send_tuning(mmc, opcode, NULL); 322 323 if (i == 0) 324 first_v = v; 325 326 if ((!prev_v) && v) { 327 range_count++; 328 ranges[range_count-1].start = i; 329 } 330 if (v) { 331 ranges[range_count-1].end = i; 332 i++; 333 } else if (i == priv->num_phases - 1) { 334 /* No extra skipping rules if we're at the end */ 335 i++; 336 } else { 337 /* 338 * No need to check too close to an invalid 339 * one since testing bad phases is slow. Skip 340 * 20 degrees. 341 */ 342 i += DIV_ROUND_UP(20 * priv->num_phases, 360); 343 344 /* Always test the last one */ 345 if (i >= priv->num_phases) 346 i = priv->num_phases - 1; 347 } 348 349 prev_v = v; 350 } 351 352 if (range_count == 0) { 353 dev_warn(host->dev, "All phases bad!"); 354 ret = -EIO; 355 goto free; 356 } 357 358 /* wrap around case, merge the end points */ 359 if ((range_count > 1) && first_v && v) { 360 ranges[0].start = ranges[range_count-1].start; 361 range_count--; 362 } 363 364 if (ranges[0].start == 0 && ranges[0].end == priv->num_phases - 1) { 365 rockchip_mmc_set_phase(host, true, priv->default_sample_phase); 366 367 dev_info(host->dev, "All phases work, using default phase %d.", 368 priv->default_sample_phase); 369 goto free; 370 } 371 372 /* Find the longest range */ 373 for (i = 0; i < range_count; i++) { 374 int len = (ranges[i].end - ranges[i].start + 1); 375 376 if (len < 0) 377 len += priv->num_phases; 378 379 if (longest_range_len < len) { 380 longest_range_len = len; 381 longest_range = i; 382 } 383 384 dev_dbg(host->dev, "Good phase range %d-%d (%d len)\n", 385 TUNING_ITERATION_TO_PHASE(ranges[i].start, 386 priv->num_phases), 387 TUNING_ITERATION_TO_PHASE(ranges[i].end, 388 priv->num_phases), 389 len 390 ); 391 } 392 393 dev_dbg(host->dev, "Best phase range %d-%d (%d len)\n", 394 TUNING_ITERATION_TO_PHASE(ranges[longest_range].start, 395 priv->num_phases), 396 TUNING_ITERATION_TO_PHASE(ranges[longest_range].end, 397 priv->num_phases), 398 longest_range_len 399 ); 400 401 middle_phase = ranges[longest_range].start + longest_range_len / 2; 402 middle_phase %= priv->num_phases; 403 phase = TUNING_ITERATION_TO_PHASE(middle_phase, priv->num_phases); 404 dev_info(host->dev, "Successfully tuned phase to %d\n", phase); 405 406 rockchip_mmc_set_phase(host, true, phase); 407 408 free: 409 kfree(ranges); 410 return ret; 411 } 412 413 static int dw_mci_common_parse_dt(struct dw_mci *host) 414 { 415 struct device_node *np = host->dev->of_node; 416 struct dw_mci_rockchip_priv_data *priv; 417 418 priv = devm_kzalloc(host->dev, sizeof(*priv), GFP_KERNEL); 419 if (!priv) 420 return -ENOMEM; 421 422 if (of_property_read_u32(np, "rockchip,desired-num-phases", 423 &priv->num_phases)) 424 priv->num_phases = 360; 425 426 if (of_property_read_u32(np, "rockchip,default-sample-phase", 427 &priv->default_sample_phase)) 428 priv->default_sample_phase = 0; 429 430 host->priv = priv; 431 432 return 0; 433 } 434 435 static int dw_mci_rk3288_parse_dt(struct dw_mci *host) 436 { 437 struct dw_mci_rockchip_priv_data *priv; 438 int err; 439 440 err = dw_mci_common_parse_dt(host); 441 if (err) 442 return err; 443 444 priv = host->priv; 445 446 priv->drv_clk = devm_clk_get(host->dev, "ciu-drive"); 447 if (IS_ERR(priv->drv_clk)) 448 dev_dbg(host->dev, "ciu-drive not available\n"); 449 450 priv->sample_clk = devm_clk_get(host->dev, "ciu-sample"); 451 if (IS_ERR(priv->sample_clk)) 452 dev_dbg(host->dev, "ciu-sample not available\n"); 453 454 priv->internal_phase = false; 455 456 return 0; 457 } 458 459 static int dw_mci_rk3576_parse_dt(struct dw_mci *host) 460 { 461 struct dw_mci_rockchip_priv_data *priv; 462 int err = dw_mci_common_parse_dt(host); 463 if (err) 464 return err; 465 466 priv = host->priv; 467 468 priv->internal_phase = true; 469 470 return 0; 471 } 472 473 static int dw_mci_rockchip_init(struct dw_mci *host) 474 { 475 struct dw_mci_rockchip_priv_data *priv = host->priv; 476 int ret, i; 477 478 /* It is slot 8 on Rockchip SoCs */ 479 host->sdio_id0 = 8; 480 481 if (of_device_is_compatible(host->dev->of_node, "rockchip,rk3288-dw-mshc")) { 482 host->bus_hz /= RK3288_CLKGEN_DIV; 483 484 /* clock driver will fail if the clock is less than the lowest source clock 485 * divided by the internal clock divider. Test for the lowest available 486 * clock and set the minimum freq to clock / clock divider. 487 */ 488 489 for (i = 0; i < ARRAY_SIZE(freqs); i++) { 490 ret = clk_round_rate(host->ciu_clk, freqs[i] * RK3288_CLKGEN_DIV); 491 if (ret > 0) { 492 host->minimum_speed = ret / RK3288_CLKGEN_DIV; 493 break; 494 } 495 } 496 if (ret < 0) 497 dev_warn(host->dev, "no valid minimum freq: %d\n", ret); 498 } 499 500 if (priv->internal_phase) 501 mci_writel(host, MISC_CON, MEM_CLK_AUTOGATE_ENABLE); 502 503 return 0; 504 } 505 506 static const struct dw_mci_drv_data rk2928_drv_data = { 507 .init = dw_mci_rockchip_init, 508 }; 509 510 static const struct dw_mci_drv_data rk3288_drv_data = { 511 .common_caps = MMC_CAP_CMD23, 512 .set_ios = dw_mci_rk3288_set_ios, 513 .execute_tuning = dw_mci_rk3288_execute_tuning, 514 .parse_dt = dw_mci_rk3288_parse_dt, 515 .init = dw_mci_rockchip_init, 516 }; 517 518 static const struct dw_mci_drv_data rk3576_drv_data = { 519 .common_caps = MMC_CAP_CMD23, 520 .set_ios = dw_mci_rk3288_set_ios, 521 .execute_tuning = dw_mci_rk3288_execute_tuning, 522 .parse_dt = dw_mci_rk3576_parse_dt, 523 .init = dw_mci_rockchip_init, 524 }; 525 526 static const struct of_device_id dw_mci_rockchip_match[] = { 527 { .compatible = "rockchip,rk2928-dw-mshc", 528 .data = &rk2928_drv_data }, 529 { .compatible = "rockchip,rk3288-dw-mshc", 530 .data = &rk3288_drv_data }, 531 { .compatible = "rockchip,rk3576-dw-mshc", 532 .data = &rk3576_drv_data }, 533 {}, 534 }; 535 MODULE_DEVICE_TABLE(of, dw_mci_rockchip_match); 536 537 static int dw_mci_rockchip_probe(struct platform_device *pdev) 538 { 539 const struct dw_mci_drv_data *drv_data; 540 const struct of_device_id *match; 541 int ret; 542 543 if (!pdev->dev.of_node) 544 return -ENODEV; 545 546 match = of_match_node(dw_mci_rockchip_match, pdev->dev.of_node); 547 drv_data = match->data; 548 549 pm_runtime_get_noresume(&pdev->dev); 550 pm_runtime_set_active(&pdev->dev); 551 pm_runtime_enable(&pdev->dev); 552 pm_runtime_set_autosuspend_delay(&pdev->dev, 50); 553 pm_runtime_use_autosuspend(&pdev->dev); 554 555 ret = dw_mci_pltfm_register(pdev, drv_data); 556 if (ret) { 557 pm_runtime_disable(&pdev->dev); 558 pm_runtime_set_suspended(&pdev->dev); 559 pm_runtime_put_noidle(&pdev->dev); 560 return ret; 561 } 562 563 pm_runtime_put_autosuspend(&pdev->dev); 564 565 return 0; 566 } 567 568 static void dw_mci_rockchip_remove(struct platform_device *pdev) 569 { 570 pm_runtime_get_sync(&pdev->dev); 571 pm_runtime_disable(&pdev->dev); 572 pm_runtime_put_noidle(&pdev->dev); 573 574 dw_mci_pltfm_remove(pdev); 575 } 576 577 static const struct dev_pm_ops dw_mci_rockchip_dev_pm_ops = { 578 SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) 579 RUNTIME_PM_OPS(dw_mci_runtime_suspend, dw_mci_runtime_resume, NULL) 580 }; 581 582 static struct platform_driver dw_mci_rockchip_pltfm_driver = { 583 .probe = dw_mci_rockchip_probe, 584 .remove = dw_mci_rockchip_remove, 585 .driver = { 586 .name = "dwmmc_rockchip", 587 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 588 .of_match_table = dw_mci_rockchip_match, 589 .pm = pm_ptr(&dw_mci_rockchip_dev_pm_ops), 590 }, 591 }; 592 593 module_platform_driver(dw_mci_rockchip_pltfm_driver); 594 595 MODULE_AUTHOR("Addy Ke <addy.ke@rock-chips.com>"); 596 MODULE_DESCRIPTION("Rockchip Specific DW-MSHC Driver Extension"); 597 MODULE_ALIAS("platform:dwmmc_rockchip"); 598 MODULE_LICENSE("GPL v2"); 599