1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd 4 */ 5 6 #include <linux/module.h> 7 #include <linux/platform_device.h> 8 #include <linux/clk.h> 9 #include <linux/mmc/host.h> 10 #include <linux/of_address.h> 11 #include <linux/mmc/slot-gpio.h> 12 #include <linux/pm_runtime.h> 13 #include <linux/slab.h> 14 15 #include "dw_mmc.h" 16 #include "dw_mmc-pltfm.h" 17 18 #define RK3288_CLKGEN_DIV 2 19 #define SDMMC_TIMING_CON0 0x130 20 #define SDMMC_TIMING_CON1 0x134 21 #define ROCKCHIP_MMC_DELAY_SEL BIT(10) 22 #define ROCKCHIP_MMC_DEGREE_MASK 0x3 23 #define ROCKCHIP_MMC_DEGREE_OFFSET 1 24 #define ROCKCHIP_MMC_DELAYNUM_OFFSET 2 25 #define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET) 26 #define ROCKCHIP_MMC_DELAY_ELEMENT_PSEC 60 27 #define HIWORD_UPDATE(val, mask, shift) \ 28 ((val) << (shift) | (mask) << ((shift) + 16)) 29 30 static const unsigned int freqs[] = { 100000, 200000, 300000, 400000 }; 31 32 struct dw_mci_rockchip_priv_data { 33 struct clk *drv_clk; 34 struct clk *sample_clk; 35 int default_sample_phase; 36 int num_phases; 37 bool internal_phase; 38 }; 39 40 /* 41 * Each fine delay is between 44ps-77ps. Assume each fine delay is 60ps to 42 * simplify calculations. So 45degs could be anywhere between 33deg and 57.8deg. 43 */ 44 static int rockchip_mmc_get_internal_phase(struct dw_mci *host, bool sample) 45 { 46 unsigned long rate = clk_get_rate(host->ciu_clk); 47 u32 raw_value; 48 u16 degrees; 49 u32 delay_num = 0; 50 51 /* Constant signal, no measurable phase shift */ 52 if (!rate) 53 return 0; 54 55 if (sample) 56 raw_value = mci_readl(host, TIMING_CON1); 57 else 58 raw_value = mci_readl(host, TIMING_CON0); 59 60 raw_value >>= ROCKCHIP_MMC_DEGREE_OFFSET; 61 degrees = (raw_value & ROCKCHIP_MMC_DEGREE_MASK) * 90; 62 63 if (raw_value & ROCKCHIP_MMC_DELAY_SEL) { 64 /* degrees/delaynum * 1000000 */ 65 unsigned long factor = (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10) * 66 36 * (rate / 10000); 67 68 delay_num = (raw_value & ROCKCHIP_MMC_DELAYNUM_MASK); 69 delay_num >>= ROCKCHIP_MMC_DELAYNUM_OFFSET; 70 degrees += DIV_ROUND_CLOSEST(delay_num * factor, 1000000); 71 } 72 73 return degrees % 360; 74 } 75 76 static int rockchip_mmc_get_phase(struct dw_mci *host, bool sample) 77 { 78 struct dw_mci_rockchip_priv_data *priv = host->priv; 79 struct clk *clock = sample ? priv->sample_clk : priv->drv_clk; 80 81 if (priv->internal_phase) 82 return rockchip_mmc_get_internal_phase(host, sample); 83 else 84 return clk_get_phase(clock); 85 } 86 87 static int rockchip_mmc_set_internal_phase(struct dw_mci *host, bool sample, int degrees) 88 { 89 unsigned long rate = clk_get_rate(host->ciu_clk); 90 u8 nineties, remainder; 91 u8 delay_num; 92 u32 raw_value; 93 u32 delay; 94 95 /* 96 * The below calculation is based on the output clock from 97 * MMC host to the card, which expects the phase clock inherits 98 * the clock rate from its parent, namely the output clock 99 * provider of MMC host. However, things may go wrong if 100 * (1) It is orphan. 101 * (2) It is assigned to the wrong parent. 102 * 103 * This check help debug the case (1), which seems to be the 104 * most likely problem we often face and which makes it difficult 105 * for people to debug unstable mmc tuning results. 106 */ 107 if (!rate) { 108 dev_err(host->dev, "%s: invalid clk rate\n", __func__); 109 return -EINVAL; 110 } 111 112 nineties = degrees / 90; 113 remainder = (degrees % 90); 114 115 /* 116 * Due to the inexact nature of the "fine" delay, we might 117 * actually go non-monotonic. We don't go _too_ monotonic 118 * though, so we should be OK. Here are options of how we may 119 * work: 120 * 121 * Ideally we end up with: 122 * 1.0, 2.0, ..., 69.0, 70.0, ..., 89.0, 90.0 123 * 124 * On one extreme (if delay is actually 44ps): 125 * .73, 1.5, ..., 50.6, 51.3, ..., 65.3, 90.0 126 * The other (if delay is actually 77ps): 127 * 1.3, 2.6, ..., 88.6. 89.8, ..., 114.0, 90 128 * 129 * It's possible we might make a delay that is up to 25 130 * degrees off from what we think we're making. That's OK 131 * though because we should be REALLY far from any bad range. 132 */ 133 134 /* 135 * Convert to delay; do a little extra work to make sure we 136 * don't overflow 32-bit / 64-bit numbers. 137 */ 138 delay = 10000000; /* PSECS_PER_SEC / 10000 / 10 */ 139 delay *= remainder; 140 delay = DIV_ROUND_CLOSEST(delay, 141 (rate / 1000) * 36 * 142 (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10)); 143 144 delay_num = (u8) min_t(u32, delay, 255); 145 146 raw_value = delay_num ? ROCKCHIP_MMC_DELAY_SEL : 0; 147 raw_value |= delay_num << ROCKCHIP_MMC_DELAYNUM_OFFSET; 148 raw_value |= nineties; 149 150 if (sample) 151 mci_writel(host, TIMING_CON1, HIWORD_UPDATE(raw_value, 0x07ff, 1)); 152 else 153 mci_writel(host, TIMING_CON0, HIWORD_UPDATE(raw_value, 0x07ff, 1)); 154 155 dev_dbg(host->dev, "set %s_phase(%d) delay_nums=%u actual_degrees=%d\n", 156 sample ? "sample" : "drv", degrees, delay_num, 157 rockchip_mmc_get_phase(host, sample) 158 ); 159 160 return 0; 161 } 162 163 static int rockchip_mmc_set_phase(struct dw_mci *host, bool sample, int degrees) 164 { 165 struct dw_mci_rockchip_priv_data *priv = host->priv; 166 struct clk *clock = sample ? priv->sample_clk : priv->drv_clk; 167 168 if (priv->internal_phase) 169 return rockchip_mmc_set_internal_phase(host, sample, degrees); 170 else 171 return clk_set_phase(clock, degrees); 172 } 173 174 static void dw_mci_rk3288_set_ios(struct dw_mci *host, struct mmc_ios *ios) 175 { 176 struct dw_mci_rockchip_priv_data *priv = host->priv; 177 int ret; 178 unsigned int cclkin; 179 u32 bus_hz; 180 181 if (ios->clock == 0) 182 return; 183 184 /* 185 * cclkin: source clock of mmc controller 186 * bus_hz: card interface clock generated by CLKGEN 187 * bus_hz = cclkin / RK3288_CLKGEN_DIV 188 * ios->clock = (div == 0) ? bus_hz : (bus_hz / (2 * div)) 189 * 190 * Note: div can only be 0 or 1, but div must be set to 1 for eMMC 191 * DDR52 8-bit mode. 192 */ 193 if (ios->bus_width == MMC_BUS_WIDTH_8 && 194 ios->timing == MMC_TIMING_MMC_DDR52) 195 cclkin = 2 * ios->clock * RK3288_CLKGEN_DIV; 196 else 197 cclkin = ios->clock * RK3288_CLKGEN_DIV; 198 199 ret = clk_set_rate(host->ciu_clk, cclkin); 200 if (ret) 201 dev_warn(host->dev, "failed to set rate %uHz err: %d\n", cclkin, ret); 202 203 bus_hz = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV; 204 if (bus_hz != host->bus_hz) { 205 host->bus_hz = bus_hz; 206 /* force dw_mci_setup_bus() */ 207 host->current_speed = 0; 208 } 209 210 /* Make sure we use phases which we can enumerate with */ 211 if (!IS_ERR(priv->sample_clk) && ios->timing <= MMC_TIMING_SD_HS) 212 rockchip_mmc_set_phase(host, true, priv->default_sample_phase); 213 214 /* 215 * Set the drive phase offset based on speed mode to achieve hold times. 216 * 217 * NOTE: this is _not_ a value that is dynamically tuned and is also 218 * _not_ a value that will vary from board to board. It is a value 219 * that could vary between different SoC models if they had massively 220 * different output clock delays inside their dw_mmc IP block (delay_o), 221 * but since it's OK to overshoot a little we don't need to do complex 222 * calculations and can pick values that will just work for everyone. 223 * 224 * When picking values we'll stick with picking 0/90/180/270 since 225 * those can be made very accurately on all known Rockchip SoCs. 226 * 227 * Note that these values match values from the DesignWare Databook 228 * tables for the most part except for SDR12 and "ID mode". For those 229 * two modes the databook calculations assume a clock in of 50MHz. As 230 * seen above, we always use a clock in rate that is exactly the 231 * card's input clock (times RK3288_CLKGEN_DIV, but that gets divided 232 * back out before the controller sees it). 233 * 234 * From measurement of a single device, it appears that delay_o is 235 * about .5 ns. Since we try to leave a bit of margin, it's expected 236 * that numbers here will be fine even with much larger delay_o 237 * (the 1.4 ns assumed by the DesignWare Databook would result in the 238 * same results, for instance). 239 */ 240 if (!IS_ERR(priv->drv_clk)) { 241 int phase; 242 243 /* 244 * In almost all cases a 90 degree phase offset will provide 245 * sufficient hold times across all valid input clock rates 246 * assuming delay_o is not absurd for a given SoC. We'll use 247 * that as a default. 248 */ 249 phase = 90; 250 251 switch (ios->timing) { 252 case MMC_TIMING_MMC_DDR52: 253 /* 254 * Since clock in rate with MMC_DDR52 is doubled when 255 * bus width is 8 we need to double the phase offset 256 * to get the same timings. 257 */ 258 if (ios->bus_width == MMC_BUS_WIDTH_8) 259 phase = 180; 260 break; 261 case MMC_TIMING_UHS_SDR104: 262 case MMC_TIMING_MMC_HS200: 263 /* 264 * In the case of 150 MHz clock (typical max for 265 * Rockchip SoCs), 90 degree offset will add a delay 266 * of 1.67 ns. That will meet min hold time of .8 ns 267 * as long as clock output delay is < .87 ns. On 268 * SoCs measured this seems to be OK, but it doesn't 269 * hurt to give margin here, so we use 180. 270 */ 271 phase = 180; 272 break; 273 } 274 275 rockchip_mmc_set_phase(host, false, phase); 276 } 277 } 278 279 #define TUNING_ITERATION_TO_PHASE(i, num_phases) \ 280 (DIV_ROUND_UP((i) * 360, num_phases)) 281 282 static int dw_mci_rk3288_execute_tuning(struct dw_mci_slot *slot, u32 opcode) 283 { 284 struct dw_mci *host = slot->host; 285 struct dw_mci_rockchip_priv_data *priv = host->priv; 286 struct mmc_host *mmc = slot->mmc; 287 int ret = 0; 288 int i; 289 bool v, prev_v = 0, first_v; 290 struct range_t { 291 int start; 292 int end; /* inclusive */ 293 }; 294 struct range_t *ranges; 295 unsigned int range_count = 0; 296 int longest_range_len = -1; 297 int longest_range = -1; 298 int middle_phase; 299 int phase; 300 301 if (IS_ERR(priv->sample_clk)) { 302 dev_err(host->dev, "Tuning clock (sample_clk) not defined.\n"); 303 return -EIO; 304 } 305 306 ranges = kmalloc_array(priv->num_phases / 2 + 1, 307 sizeof(*ranges), GFP_KERNEL); 308 if (!ranges) 309 return -ENOMEM; 310 311 /* Try each phase and extract good ranges */ 312 for (i = 0; i < priv->num_phases; ) { 313 rockchip_mmc_set_phase(host, true, 314 TUNING_ITERATION_TO_PHASE( 315 i, 316 priv->num_phases)); 317 318 v = !mmc_send_tuning(mmc, opcode, NULL); 319 320 if (i == 0) 321 first_v = v; 322 323 if ((!prev_v) && v) { 324 range_count++; 325 ranges[range_count-1].start = i; 326 } 327 if (v) { 328 ranges[range_count-1].end = i; 329 i++; 330 } else if (i == priv->num_phases - 1) { 331 /* No extra skipping rules if we're at the end */ 332 i++; 333 } else { 334 /* 335 * No need to check too close to an invalid 336 * one since testing bad phases is slow. Skip 337 * 20 degrees. 338 */ 339 i += DIV_ROUND_UP(20 * priv->num_phases, 360); 340 341 /* Always test the last one */ 342 if (i >= priv->num_phases) 343 i = priv->num_phases - 1; 344 } 345 346 prev_v = v; 347 } 348 349 if (range_count == 0) { 350 dev_warn(host->dev, "All phases bad!"); 351 ret = -EIO; 352 goto free; 353 } 354 355 /* wrap around case, merge the end points */ 356 if ((range_count > 1) && first_v && v) { 357 ranges[0].start = ranges[range_count-1].start; 358 range_count--; 359 } 360 361 if (ranges[0].start == 0 && ranges[0].end == priv->num_phases - 1) { 362 rockchip_mmc_set_phase(host, true, priv->default_sample_phase); 363 364 dev_info(host->dev, "All phases work, using default phase %d.", 365 priv->default_sample_phase); 366 goto free; 367 } 368 369 /* Find the longest range */ 370 for (i = 0; i < range_count; i++) { 371 int len = (ranges[i].end - ranges[i].start + 1); 372 373 if (len < 0) 374 len += priv->num_phases; 375 376 if (longest_range_len < len) { 377 longest_range_len = len; 378 longest_range = i; 379 } 380 381 dev_dbg(host->dev, "Good phase range %d-%d (%d len)\n", 382 TUNING_ITERATION_TO_PHASE(ranges[i].start, 383 priv->num_phases), 384 TUNING_ITERATION_TO_PHASE(ranges[i].end, 385 priv->num_phases), 386 len 387 ); 388 } 389 390 dev_dbg(host->dev, "Best phase range %d-%d (%d len)\n", 391 TUNING_ITERATION_TO_PHASE(ranges[longest_range].start, 392 priv->num_phases), 393 TUNING_ITERATION_TO_PHASE(ranges[longest_range].end, 394 priv->num_phases), 395 longest_range_len 396 ); 397 398 middle_phase = ranges[longest_range].start + longest_range_len / 2; 399 middle_phase %= priv->num_phases; 400 phase = TUNING_ITERATION_TO_PHASE(middle_phase, priv->num_phases); 401 dev_info(host->dev, "Successfully tuned phase to %d\n", phase); 402 403 rockchip_mmc_set_phase(host, true, phase); 404 405 free: 406 kfree(ranges); 407 return ret; 408 } 409 410 static int dw_mci_common_parse_dt(struct dw_mci *host) 411 { 412 struct device_node *np = host->dev->of_node; 413 struct dw_mci_rockchip_priv_data *priv; 414 415 priv = devm_kzalloc(host->dev, sizeof(*priv), GFP_KERNEL); 416 if (!priv) 417 return -ENOMEM; 418 419 if (of_property_read_u32(np, "rockchip,desired-num-phases", 420 &priv->num_phases)) 421 priv->num_phases = 360; 422 423 if (of_property_read_u32(np, "rockchip,default-sample-phase", 424 &priv->default_sample_phase)) 425 priv->default_sample_phase = 0; 426 427 host->priv = priv; 428 429 return 0; 430 } 431 432 static int dw_mci_rk3288_parse_dt(struct dw_mci *host) 433 { 434 struct dw_mci_rockchip_priv_data *priv; 435 int err; 436 437 err = dw_mci_common_parse_dt(host); 438 if (err) 439 return err; 440 441 priv = host->priv; 442 443 priv->drv_clk = devm_clk_get(host->dev, "ciu-drive"); 444 if (IS_ERR(priv->drv_clk)) 445 dev_dbg(host->dev, "ciu-drive not available\n"); 446 447 priv->sample_clk = devm_clk_get(host->dev, "ciu-sample"); 448 if (IS_ERR(priv->sample_clk)) 449 dev_dbg(host->dev, "ciu-sample not available\n"); 450 451 priv->internal_phase = false; 452 453 return 0; 454 } 455 456 static int dw_mci_rk3576_parse_dt(struct dw_mci *host) 457 { 458 struct dw_mci_rockchip_priv_data *priv; 459 int err = dw_mci_common_parse_dt(host); 460 if (err) 461 return err; 462 463 priv = host->priv; 464 465 priv->internal_phase = true; 466 467 return 0; 468 } 469 470 static int dw_mci_rockchip_init(struct dw_mci *host) 471 { 472 int ret, i; 473 474 /* It is slot 8 on Rockchip SoCs */ 475 host->sdio_id0 = 8; 476 477 if (of_device_is_compatible(host->dev->of_node, "rockchip,rk3288-dw-mshc")) { 478 host->bus_hz /= RK3288_CLKGEN_DIV; 479 480 /* clock driver will fail if the clock is less than the lowest source clock 481 * divided by the internal clock divider. Test for the lowest available 482 * clock and set the minimum freq to clock / clock divider. 483 */ 484 485 for (i = 0; i < ARRAY_SIZE(freqs); i++) { 486 ret = clk_round_rate(host->ciu_clk, freqs[i] * RK3288_CLKGEN_DIV); 487 if (ret > 0) { 488 host->minimum_speed = ret / RK3288_CLKGEN_DIV; 489 break; 490 } 491 } 492 if (ret < 0) 493 dev_warn(host->dev, "no valid minimum freq: %d\n", ret); 494 } 495 496 return 0; 497 } 498 499 static const struct dw_mci_drv_data rk2928_drv_data = { 500 .init = dw_mci_rockchip_init, 501 }; 502 503 static const struct dw_mci_drv_data rk3288_drv_data = { 504 .common_caps = MMC_CAP_CMD23, 505 .set_ios = dw_mci_rk3288_set_ios, 506 .execute_tuning = dw_mci_rk3288_execute_tuning, 507 .parse_dt = dw_mci_rk3288_parse_dt, 508 .init = dw_mci_rockchip_init, 509 }; 510 511 static const struct dw_mci_drv_data rk3576_drv_data = { 512 .common_caps = MMC_CAP_CMD23, 513 .set_ios = dw_mci_rk3288_set_ios, 514 .execute_tuning = dw_mci_rk3288_execute_tuning, 515 .parse_dt = dw_mci_rk3576_parse_dt, 516 .init = dw_mci_rockchip_init, 517 }; 518 519 static const struct of_device_id dw_mci_rockchip_match[] = { 520 { .compatible = "rockchip,rk2928-dw-mshc", 521 .data = &rk2928_drv_data }, 522 { .compatible = "rockchip,rk3288-dw-mshc", 523 .data = &rk3288_drv_data }, 524 { .compatible = "rockchip,rk3576-dw-mshc", 525 .data = &rk3576_drv_data }, 526 {}, 527 }; 528 MODULE_DEVICE_TABLE(of, dw_mci_rockchip_match); 529 530 static int dw_mci_rockchip_probe(struct platform_device *pdev) 531 { 532 const struct dw_mci_drv_data *drv_data; 533 const struct of_device_id *match; 534 int ret; 535 536 if (!pdev->dev.of_node) 537 return -ENODEV; 538 539 match = of_match_node(dw_mci_rockchip_match, pdev->dev.of_node); 540 drv_data = match->data; 541 542 pm_runtime_get_noresume(&pdev->dev); 543 pm_runtime_set_active(&pdev->dev); 544 pm_runtime_enable(&pdev->dev); 545 pm_runtime_set_autosuspend_delay(&pdev->dev, 50); 546 pm_runtime_use_autosuspend(&pdev->dev); 547 548 ret = dw_mci_pltfm_register(pdev, drv_data); 549 if (ret) { 550 pm_runtime_disable(&pdev->dev); 551 pm_runtime_set_suspended(&pdev->dev); 552 pm_runtime_put_noidle(&pdev->dev); 553 return ret; 554 } 555 556 pm_runtime_put_autosuspend(&pdev->dev); 557 558 return 0; 559 } 560 561 static void dw_mci_rockchip_remove(struct platform_device *pdev) 562 { 563 pm_runtime_get_sync(&pdev->dev); 564 pm_runtime_disable(&pdev->dev); 565 pm_runtime_put_noidle(&pdev->dev); 566 567 dw_mci_pltfm_remove(pdev); 568 } 569 570 static const struct dev_pm_ops dw_mci_rockchip_dev_pm_ops = { 571 SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, 572 pm_runtime_force_resume) 573 SET_RUNTIME_PM_OPS(dw_mci_runtime_suspend, 574 dw_mci_runtime_resume, 575 NULL) 576 }; 577 578 static struct platform_driver dw_mci_rockchip_pltfm_driver = { 579 .probe = dw_mci_rockchip_probe, 580 .remove = dw_mci_rockchip_remove, 581 .driver = { 582 .name = "dwmmc_rockchip", 583 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 584 .of_match_table = dw_mci_rockchip_match, 585 .pm = &dw_mci_rockchip_dev_pm_ops, 586 }, 587 }; 588 589 module_platform_driver(dw_mci_rockchip_pltfm_driver); 590 591 MODULE_AUTHOR("Addy Ke <addy.ke@rock-chips.com>"); 592 MODULE_DESCRIPTION("Rockchip Specific DW-MSHC Driver Extension"); 593 MODULE_ALIAS("platform:dwmmc_rockchip"); 594 MODULE_LICENSE("GPL v2"); 595