xref: /linux/drivers/net/dsa/sja1105/sja1105_tas.c (revision 1d1997db870f4058676439ef7014390ba9e24eb2)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
3  */
4 #include "sja1105.h"
5 
6 #define SJA1105_TAS_CLKSRC_DISABLED	0
7 #define SJA1105_TAS_CLKSRC_STANDALONE	1
8 #define SJA1105_TAS_CLKSRC_AS6802	2
9 #define SJA1105_TAS_CLKSRC_PTP		3
10 #define SJA1105_TAS_MAX_DELTA		BIT(19)
11 #define SJA1105_GATE_MASK		GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
12 
13 #define work_to_sja1105_tas(d) \
14 	container_of((d), struct sja1105_tas_data, tas_work)
15 #define tas_to_sja1105(d) \
16 	container_of((d), struct sja1105_private, tas_data)
17 
18 /* This is not a preprocessor macro because the "ns" argument may or may not be
19  * s64 at caller side. This ensures it is properly type-cast before div_s64.
20  */
21 static s64 ns_to_sja1105_delta(s64 ns)
22 {
23 	return div_s64(ns, 200);
24 }
25 
26 static s64 sja1105_delta_to_ns(s64 delta)
27 {
28 	return delta * 200;
29 }
30 
31 /* Calculate the first base_time in the future that satisfies this
32  * relationship:
33  *
34  * future_base_time = base_time + N x cycle_time >= now, or
35  *
36  *      now - base_time
37  * N >= ---------------
38  *         cycle_time
39  *
40  * Because N is an integer, the ceiling value of the above "a / b" ratio
41  * is in fact precisely the floor value of "(a + b - 1) / b", which is
42  * easier to calculate only having integer division tools.
43  */
44 static s64 future_base_time(s64 base_time, s64 cycle_time, s64 now)
45 {
46 	s64 a, b, n;
47 
48 	if (base_time >= now)
49 		return base_time;
50 
51 	a = now - base_time;
52 	b = cycle_time;
53 	n = div_s64(a + b - 1, b);
54 
55 	return base_time + n * cycle_time;
56 }
57 
58 static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
59 {
60 	struct sja1105_tas_data *tas_data = &priv->tas_data;
61 	struct dsa_switch *ds = priv->ds;
62 	s64 earliest_base_time = S64_MAX;
63 	s64 latest_base_time = 0;
64 	s64 its_cycle_time = 0;
65 	s64 max_cycle_time = 0;
66 	int port;
67 
68 	tas_data->enabled = false;
69 
70 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
71 		const struct tc_taprio_qopt_offload *offload;
72 
73 		offload = tas_data->offload[port];
74 		if (!offload)
75 			continue;
76 
77 		tas_data->enabled = true;
78 
79 		if (max_cycle_time < offload->cycle_time)
80 			max_cycle_time = offload->cycle_time;
81 		if (latest_base_time < offload->base_time)
82 			latest_base_time = offload->base_time;
83 		if (earliest_base_time > offload->base_time) {
84 			earliest_base_time = offload->base_time;
85 			its_cycle_time = offload->cycle_time;
86 		}
87 	}
88 
89 	if (!tas_data->enabled)
90 		return 0;
91 
92 	/* Roll the earliest base time over until it is in a comparable
93 	 * time base with the latest, then compare their deltas.
94 	 * We want to enforce that all ports' base times are within
95 	 * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
96 	 */
97 	earliest_base_time = future_base_time(earliest_base_time,
98 					      its_cycle_time,
99 					      latest_base_time);
100 	while (earliest_base_time > latest_base_time)
101 		earliest_base_time -= its_cycle_time;
102 	if (latest_base_time - earliest_base_time >
103 	    sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
104 		dev_err(ds->dev,
105 			"Base times too far apart: min %llu max %llu\n",
106 			earliest_base_time, latest_base_time);
107 		return -ERANGE;
108 	}
109 
110 	tas_data->earliest_base_time = earliest_base_time;
111 	tas_data->max_cycle_time = max_cycle_time;
112 
113 	dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
114 	dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
115 	dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
116 
117 	return 0;
118 }
119 
120 /* Lo and behold: the egress scheduler from hell.
121  *
122  * At the hardware level, the Time-Aware Shaper holds a global linear arrray of
123  * all schedule entries for all ports. These are the Gate Control List (GCL)
124  * entries, let's call them "timeslots" for short. This linear array of
125  * timeslots is held in BLK_IDX_SCHEDULE.
126  *
127  * Then there are a maximum of 8 "execution threads" inside the switch, which
128  * iterate cyclically through the "schedule". Each "cycle" has an entry point
129  * and an exit point, both being timeslot indices in the schedule table. The
130  * hardware calls each cycle a "subschedule".
131  *
132  * Subschedule (cycle) i starts when
133  *   ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta.
134  *
135  * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from
136  *   k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to
137  *   k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i]
138  *
139  * For each schedule entry (timeslot) k, the engine executes the gate control
140  * list entry for the duration of BLK_IDX_SCHEDULE[k].delta.
141  *
142  *         +---------+
143  *         |         | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS
144  *         +---------+
145  *              |
146  *              +-----------------+
147  *                                | .actsubsch
148  *  BLK_IDX_SCHEDULE_ENTRY_POINTS v
149  *                 +-------+-------+
150  *                 |cycle 0|cycle 1|
151  *                 +-------+-------+
152  *                   |  |      |  |
153  *  +----------------+  |      |  +-------------------------------------+
154  *  |   .subschindx     |      |             .subschindx                |
155  *  |                   |      +---------------+                        |
156  *  |          .address |        .address      |                        |
157  *  |                   |                      |                        |
158  *  |                   |                      |                        |
159  *  |  BLK_IDX_SCHEDULE v                      v                        |
160  *  |              +-------+-------+-------+-------+-------+------+     |
161  *  |              |entry 0|entry 1|entry 2|entry 3|entry 4|entry5|     |
162  *  |              +-------+-------+-------+-------+-------+------+     |
163  *  |                                  ^                    ^  ^  ^     |
164  *  |                                  |                    |  |  |     |
165  *  |        +-------------------------+                    |  |  |     |
166  *  |        |              +-------------------------------+  |  |     |
167  *  |        |              |              +-------------------+  |     |
168  *  |        |              |              |                      |     |
169  *  | +---------------------------------------------------------------+ |
170  *  | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| |
171  *  | +---------------------------------------------------------------+ |
172  *  |        ^              ^                BLK_IDX_SCHEDULE_PARAMS    |
173  *  |        |              |                                           |
174  *  +--------+              +-------------------------------------------+
175  *
176  *  In the above picture there are two subschedules (cycles):
177  *
178  *  - cycle 0: iterates the schedule table from 0 to 2 (and back)
179  *  - cycle 1: iterates the schedule table from 3 to 5 (and back)
180  *
181  *  All other possible execution threads must be marked as unused by making
182  *  their "subschedule end index" (subscheind) equal to the last valid
183  *  subschedule's end index (in this case 5).
184  */
185 static int sja1105_init_scheduling(struct sja1105_private *priv)
186 {
187 	struct sja1105_schedule_entry_points_entry *schedule_entry_points;
188 	struct sja1105_schedule_entry_points_params_entry
189 					*schedule_entry_points_params;
190 	struct sja1105_schedule_params_entry *schedule_params;
191 	struct sja1105_tas_data *tas_data = &priv->tas_data;
192 	struct sja1105_schedule_entry *schedule;
193 	struct sja1105_table *table;
194 	int schedule_start_idx;
195 	s64 entry_point_delta;
196 	int schedule_end_idx;
197 	int num_entries = 0;
198 	int num_cycles = 0;
199 	int cycle = 0;
200 	int i, k = 0;
201 	int port, rc;
202 
203 	rc = sja1105_tas_set_runtime_params(priv);
204 	if (rc < 0)
205 		return rc;
206 
207 	/* Discard previous Schedule Table */
208 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
209 	if (table->entry_count) {
210 		kfree(table->entries);
211 		table->entry_count = 0;
212 	}
213 
214 	/* Discard previous Schedule Entry Points Parameters Table */
215 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
216 	if (table->entry_count) {
217 		kfree(table->entries);
218 		table->entry_count = 0;
219 	}
220 
221 	/* Discard previous Schedule Parameters Table */
222 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
223 	if (table->entry_count) {
224 		kfree(table->entries);
225 		table->entry_count = 0;
226 	}
227 
228 	/* Discard previous Schedule Entry Points Table */
229 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
230 	if (table->entry_count) {
231 		kfree(table->entries);
232 		table->entry_count = 0;
233 	}
234 
235 	/* Figure out the dimensioning of the problem */
236 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
237 		if (tas_data->offload[port]) {
238 			num_entries += tas_data->offload[port]->num_entries;
239 			num_cycles++;
240 		}
241 	}
242 
243 	/* Nothing to do */
244 	if (!num_cycles)
245 		return 0;
246 
247 	/* Pre-allocate space in the static config tables */
248 
249 	/* Schedule Table */
250 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
251 	table->entries = kcalloc(num_entries, table->ops->unpacked_entry_size,
252 				 GFP_KERNEL);
253 	if (!table->entries)
254 		return -ENOMEM;
255 	table->entry_count = num_entries;
256 	schedule = table->entries;
257 
258 	/* Schedule Points Parameters Table */
259 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
260 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
261 				 table->ops->unpacked_entry_size, GFP_KERNEL);
262 	if (!table->entries)
263 		/* Previously allocated memory will be freed automatically in
264 		 * sja1105_static_config_free. This is true for all early
265 		 * returns below.
266 		 */
267 		return -ENOMEM;
268 	table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
269 	schedule_entry_points_params = table->entries;
270 
271 	/* Schedule Parameters Table */
272 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
273 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
274 				 table->ops->unpacked_entry_size, GFP_KERNEL);
275 	if (!table->entries)
276 		return -ENOMEM;
277 	table->entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT;
278 	schedule_params = table->entries;
279 
280 	/* Schedule Entry Points Table */
281 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
282 	table->entries = kcalloc(num_cycles, table->ops->unpacked_entry_size,
283 				 GFP_KERNEL);
284 	if (!table->entries)
285 		return -ENOMEM;
286 	table->entry_count = num_cycles;
287 	schedule_entry_points = table->entries;
288 
289 	/* Finally start populating the static config tables */
290 	schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
291 	schedule_entry_points_params->actsubsch = num_cycles - 1;
292 
293 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
294 		const struct tc_taprio_qopt_offload *offload;
295 		/* Relative base time */
296 		s64 rbt;
297 
298 		offload = tas_data->offload[port];
299 		if (!offload)
300 			continue;
301 
302 		schedule_start_idx = k;
303 		schedule_end_idx = k + offload->num_entries - 1;
304 		/* This is the base time expressed as a number of TAS ticks
305 		 * relative to PTPSCHTM, which we'll (perhaps improperly) call
306 		 * the operational base time.
307 		 */
308 		rbt = future_base_time(offload->base_time,
309 				       offload->cycle_time,
310 				       tas_data->earliest_base_time);
311 		rbt -= tas_data->earliest_base_time;
312 		/* UM10944.pdf 4.2.2. Schedule Entry Points table says that
313 		 * delta cannot be zero, which is shitty. Advance all relative
314 		 * base times by 1 TAS delta, so that even the earliest base
315 		 * time becomes 1 in relative terms. Then start the operational
316 		 * base time (PTPSCHTM) one TAS delta earlier than planned.
317 		 */
318 		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
319 
320 		schedule_entry_points[cycle].subschindx = cycle;
321 		schedule_entry_points[cycle].delta = entry_point_delta;
322 		schedule_entry_points[cycle].address = schedule_start_idx;
323 
324 		/* The subschedule end indices need to be
325 		 * monotonically increasing.
326 		 */
327 		for (i = cycle; i < 8; i++)
328 			schedule_params->subscheind[i] = schedule_end_idx;
329 
330 		for (i = 0; i < offload->num_entries; i++, k++) {
331 			s64 delta_ns = offload->entries[i].interval;
332 
333 			schedule[k].delta = ns_to_sja1105_delta(delta_ns);
334 			schedule[k].destports = BIT(port);
335 			schedule[k].resmedia_en = true;
336 			schedule[k].resmedia = SJA1105_GATE_MASK &
337 					~offload->entries[i].gate_mask;
338 		}
339 		cycle++;
340 	}
341 
342 	return 0;
343 }
344 
345 /* Be there 2 port subschedules, each executing an arbitrary number of gate
346  * open/close events cyclically.
347  * None of those gate events must ever occur at the exact same time, otherwise
348  * the switch is known to act in exotically strange ways.
349  * However the hardware doesn't bother performing these integrity checks.
350  * So here we are with the task of validating whether the new @admin offload
351  * has any conflict with the already established TAS configuration in
352  * tas_data->offload.  We already know the other ports are in harmony with one
353  * another, otherwise we wouldn't have saved them.
354  * Each gate event executes periodically, with a period of @cycle_time and a
355  * phase given by its cycle's @base_time plus its offset within the cycle
356  * (which in turn is given by the length of the events prior to it).
357  * There are two aspects to possible collisions:
358  * - Collisions within one cycle's (actually the longest cycle's) time frame.
359  *   For that, we need to compare the cartesian product of each possible
360  *   occurrence of each event within one cycle time.
361  * - Collisions in the future. Events may not collide within one cycle time,
362  *   but if two port schedules don't have the same periodicity (aka the cycle
363  *   times aren't multiples of one another), they surely will some time in the
364  *   future (actually they will collide an infinite amount of times).
365  */
366 static bool
367 sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
368 			    const struct tc_taprio_qopt_offload *admin)
369 {
370 	struct sja1105_tas_data *tas_data = &priv->tas_data;
371 	const struct tc_taprio_qopt_offload *offload;
372 	s64 max_cycle_time, min_cycle_time;
373 	s64 delta1, delta2;
374 	s64 rbt1, rbt2;
375 	s64 stop_time;
376 	s64 t1, t2;
377 	int i, j;
378 	s32 rem;
379 
380 	offload = tas_data->offload[port];
381 	if (!offload)
382 		return false;
383 
384 	/* Check if the two cycle times are multiples of one another.
385 	 * If they aren't, then they will surely collide.
386 	 */
387 	max_cycle_time = max(offload->cycle_time, admin->cycle_time);
388 	min_cycle_time = min(offload->cycle_time, admin->cycle_time);
389 	div_s64_rem(max_cycle_time, min_cycle_time, &rem);
390 	if (rem)
391 		return true;
392 
393 	/* Calculate the "reduced" base time of each of the two cycles
394 	 * (transposed back as close to 0 as possible) by dividing to
395 	 * the cycle time.
396 	 */
397 	div_s64_rem(offload->base_time, offload->cycle_time, &rem);
398 	rbt1 = rem;
399 
400 	div_s64_rem(admin->base_time, admin->cycle_time, &rem);
401 	rbt2 = rem;
402 
403 	stop_time = max_cycle_time + max(rbt1, rbt2);
404 
405 	/* delta1 is the relative base time of each GCL entry within
406 	 * the established ports' TAS config.
407 	 */
408 	for (i = 0, delta1 = 0;
409 	     i < offload->num_entries;
410 	     delta1 += offload->entries[i].interval, i++) {
411 		/* delta2 is the relative base time of each GCL entry
412 		 * within the newly added TAS config.
413 		 */
414 		for (j = 0, delta2 = 0;
415 		     j < admin->num_entries;
416 		     delta2 += admin->entries[j].interval, j++) {
417 			/* t1 follows all possible occurrences of the
418 			 * established ports' GCL entry i within the
419 			 * first cycle time.
420 			 */
421 			for (t1 = rbt1 + delta1;
422 			     t1 <= stop_time;
423 			     t1 += offload->cycle_time) {
424 				/* t2 follows all possible occurrences
425 				 * of the newly added GCL entry j
426 				 * within the first cycle time.
427 				 */
428 				for (t2 = rbt2 + delta2;
429 				     t2 <= stop_time;
430 				     t2 += admin->cycle_time) {
431 					if (t1 == t2) {
432 						dev_warn(priv->ds->dev,
433 							 "GCL entry %d collides with entry %d of port %d\n",
434 							 j, i, port);
435 						return true;
436 					}
437 				}
438 			}
439 		}
440 	}
441 
442 	return false;
443 }
444 
445 int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
446 			    struct tc_taprio_qopt_offload *admin)
447 {
448 	struct sja1105_private *priv = ds->priv;
449 	struct sja1105_tas_data *tas_data = &priv->tas_data;
450 	int other_port, rc, i;
451 
452 	/* Can't change an already configured port (must delete qdisc first).
453 	 * Can't delete the qdisc from an unconfigured port.
454 	 */
455 	if (!!tas_data->offload[port] == admin->enable)
456 		return -EINVAL;
457 
458 	if (!admin->enable) {
459 		taprio_offload_free(tas_data->offload[port]);
460 		tas_data->offload[port] = NULL;
461 
462 		rc = sja1105_init_scheduling(priv);
463 		if (rc < 0)
464 			return rc;
465 
466 		return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
467 	}
468 
469 	/* The cycle time extension is the amount of time the last cycle from
470 	 * the old OPER needs to be extended in order to phase-align with the
471 	 * base time of the ADMIN when that becomes the new OPER.
472 	 * But of course our switch needs to be reset to switch-over between
473 	 * the ADMIN and the OPER configs - so much for a seamless transition.
474 	 * So don't add insult over injury and just say we don't support cycle
475 	 * time extension.
476 	 */
477 	if (admin->cycle_time_extension)
478 		return -ENOTSUPP;
479 
480 	if (!ns_to_sja1105_delta(admin->base_time)) {
481 		dev_err(ds->dev, "A base time of zero is not hardware-allowed\n");
482 		return -ERANGE;
483 	}
484 
485 	for (i = 0; i < admin->num_entries; i++) {
486 		s64 delta_ns = admin->entries[i].interval;
487 		s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
488 		bool too_long, too_short;
489 
490 		too_long = (delta_cycles >= SJA1105_TAS_MAX_DELTA);
491 		too_short = (delta_cycles == 0);
492 		if (too_long || too_short) {
493 			dev_err(priv->ds->dev,
494 				"Interval %llu too %s for GCL entry %d\n",
495 				delta_ns, too_long ? "long" : "short", i);
496 			return -ERANGE;
497 		}
498 	}
499 
500 	for (other_port = 0; other_port < SJA1105_NUM_PORTS; other_port++) {
501 		if (other_port == port)
502 			continue;
503 
504 		if (sja1105_tas_check_conflicts(priv, other_port, admin))
505 			return -ERANGE;
506 	}
507 
508 	tas_data->offload[port] = taprio_offload_get(admin);
509 
510 	rc = sja1105_init_scheduling(priv);
511 	if (rc < 0)
512 		return rc;
513 
514 	return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
515 }
516 
517 static int sja1105_tas_check_running(struct sja1105_private *priv)
518 {
519 	struct sja1105_tas_data *tas_data = &priv->tas_data;
520 	struct dsa_switch *ds = priv->ds;
521 	struct sja1105_ptp_cmd cmd = {0};
522 	int rc;
523 
524 	rc = sja1105_ptp_commit(ds, &cmd, SPI_READ);
525 	if (rc < 0)
526 		return rc;
527 
528 	if (cmd.ptpstrtsch == 1)
529 		/* Schedule successfully started */
530 		tas_data->state = SJA1105_TAS_STATE_RUNNING;
531 	else if (cmd.ptpstopsch == 1)
532 		/* Schedule is stopped */
533 		tas_data->state = SJA1105_TAS_STATE_DISABLED;
534 	else
535 		/* Schedule is probably not configured with PTP clock source */
536 		rc = -EINVAL;
537 
538 	return rc;
539 }
540 
541 /* Write to PTPCLKCORP */
542 static int sja1105_tas_adjust_drift(struct sja1105_private *priv,
543 				    u64 correction)
544 {
545 	const struct sja1105_regs *regs = priv->info->regs;
546 	u32 ptpclkcorp = ns_to_sja1105_ticks(correction);
547 
548 	return sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkcorp,
549 				&ptpclkcorp, NULL);
550 }
551 
552 /* Write to PTPSCHTM */
553 static int sja1105_tas_set_base_time(struct sja1105_private *priv,
554 				     u64 base_time)
555 {
556 	const struct sja1105_regs *regs = priv->info->regs;
557 	u64 ptpschtm = ns_to_sja1105_ticks(base_time);
558 
559 	return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpschtm,
560 				&ptpschtm, NULL);
561 }
562 
563 static int sja1105_tas_start(struct sja1105_private *priv)
564 {
565 	struct sja1105_tas_data *tas_data = &priv->tas_data;
566 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
567 	struct dsa_switch *ds = priv->ds;
568 	int rc;
569 
570 	dev_dbg(ds->dev, "Starting the TAS\n");
571 
572 	if (tas_data->state == SJA1105_TAS_STATE_ENABLED_NOT_RUNNING ||
573 	    tas_data->state == SJA1105_TAS_STATE_RUNNING) {
574 		dev_err(ds->dev, "TAS already started\n");
575 		return -EINVAL;
576 	}
577 
578 	cmd->ptpstrtsch = 1;
579 	cmd->ptpstopsch = 0;
580 
581 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
582 	if (rc < 0)
583 		return rc;
584 
585 	tas_data->state = SJA1105_TAS_STATE_ENABLED_NOT_RUNNING;
586 
587 	return 0;
588 }
589 
590 static int sja1105_tas_stop(struct sja1105_private *priv)
591 {
592 	struct sja1105_tas_data *tas_data = &priv->tas_data;
593 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
594 	struct dsa_switch *ds = priv->ds;
595 	int rc;
596 
597 	dev_dbg(ds->dev, "Stopping the TAS\n");
598 
599 	if (tas_data->state == SJA1105_TAS_STATE_DISABLED) {
600 		dev_err(ds->dev, "TAS already disabled\n");
601 		return -EINVAL;
602 	}
603 
604 	cmd->ptpstopsch = 1;
605 	cmd->ptpstrtsch = 0;
606 
607 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
608 	if (rc < 0)
609 		return rc;
610 
611 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
612 
613 	return 0;
614 }
615 
616 /* The schedule engine and the PTP clock are driven by the same oscillator, and
617  * they run in parallel. But whilst the PTP clock can keep an absolute
618  * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make
619  * up a delta, which is 200ns), and wrapping around at the end of each cycle.
620  * The schedule engine is started when the PTP clock reaches the PTPSCHTM time
621  * (in PTP domain).
622  * Because the PTP clock can be rate-corrected (accelerated or slowed down) by
623  * a software servo, and the schedule engine clock runs in parallel to the PTP
624  * clock, there is logic internal to the switch that periodically keeps the
625  * schedule engine from drifting away. The frequency with which this internal
626  * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is
627  * a value also in the PTP clock domain, and is also rate-corrected.
628  * To be precise, during a correction period, there is logic to determine by
629  * how many scheduler clock ticks has the PTP clock drifted. At the end of each
630  * correction period/beginning of new one, the length of a delta is shrunk or
631  * expanded with an integer number of ticks, compared with the typical 25.
632  * So a delta lasts for 200ns (or 25 ticks) only on average.
633  * Sometimes it is longer, sometimes it is shorter. The internal syntonization
634  * logic can adjust for at most 5 ticks each 20 ticks.
635  *
636  * The first implication is that you should choose your schedule correction
637  * period to be an integer multiple of the schedule length. Preferably one.
638  * In case there are schedules of multiple ports active, then the correction
639  * period needs to be a multiple of them all. Given the restriction that the
640  * cycle times have to be multiples of one another anyway, this means the
641  * correction period can simply be the largest cycle time, hence the current
642  * choice. This way, the updates are always synchronous to the transmission
643  * cycle, and therefore predictable.
644  *
645  * The second implication is that at the beginning of a correction period, the
646  * first few deltas will be modulated in time, until the schedule engine is
647  * properly phase-aligned with the PTP clock. For this reason, you should place
648  * your best-effort traffic at the beginning of a cycle, and your
649  * time-triggered traffic afterwards.
650  *
651  * The third implication is that once the schedule engine is started, it can
652  * only adjust for so much drift within a correction period. In the servo you
653  * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you
654  * want to do the latter, you need to stop and restart the schedule engine,
655  * which is what the state machine handles.
656  */
657 static void sja1105_tas_state_machine(struct work_struct *work)
658 {
659 	struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work);
660 	struct sja1105_private *priv = tas_to_sja1105(tas_data);
661 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
662 	struct timespec64 base_time_ts, now_ts;
663 	struct dsa_switch *ds = priv->ds;
664 	struct timespec64 diff;
665 	s64 base_time, now;
666 	int rc = 0;
667 
668 	mutex_lock(&ptp_data->lock);
669 
670 	switch (tas_data->state) {
671 	case SJA1105_TAS_STATE_DISABLED:
672 		/* Can't do anything at all if clock is still being stepped */
673 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ)
674 			break;
675 
676 		rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time);
677 		if (rc < 0)
678 			break;
679 
680 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
681 		if (rc < 0)
682 			break;
683 
684 		/* Plan to start the earliest schedule first. The others
685 		 * will be started in hardware, by way of their respective
686 		 * entry points delta.
687 		 * Try our best to avoid fringe cases (race condition between
688 		 * ptpschtm and ptpstrtsch) by pushing the oper_base_time at
689 		 * least one second in the future from now. This is not ideal,
690 		 * but this only needs to buy us time until the
691 		 * sja1105_tas_start command below gets executed.
692 		 */
693 		base_time = future_base_time(tas_data->earliest_base_time,
694 					     tas_data->max_cycle_time,
695 					     now + 1ull * NSEC_PER_SEC);
696 		base_time -= sja1105_delta_to_ns(1);
697 
698 		rc = sja1105_tas_set_base_time(priv, base_time);
699 		if (rc < 0)
700 			break;
701 
702 		tas_data->oper_base_time = base_time;
703 
704 		rc = sja1105_tas_start(priv);
705 		if (rc < 0)
706 			break;
707 
708 		base_time_ts = ns_to_timespec64(base_time);
709 		now_ts = ns_to_timespec64(now);
710 
711 		dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
712 			base_time_ts.tv_sec, base_time_ts.tv_nsec,
713 			now_ts.tv_sec, now_ts.tv_nsec);
714 
715 		break;
716 
717 	case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING:
718 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
719 			/* Clock was stepped.. bad news for TAS */
720 			sja1105_tas_stop(priv);
721 			break;
722 		}
723 
724 		/* Check if TAS has actually started, by comparing the
725 		 * scheduled start time with the SJA1105 PTP clock
726 		 */
727 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
728 		if (rc < 0)
729 			break;
730 
731 		if (now < tas_data->oper_base_time) {
732 			/* TAS has not started yet */
733 			diff = ns_to_timespec64(tas_data->oper_base_time - now);
734 			dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
735 				diff.tv_sec, diff.tv_nsec);
736 			break;
737 		}
738 
739 		/* Time elapsed, what happened? */
740 		rc = sja1105_tas_check_running(priv);
741 		if (rc < 0)
742 			break;
743 
744 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
745 			/* TAS has started */
746 			dev_err(ds->dev,
747 				"TAS not started despite time elapsed\n");
748 
749 		break;
750 
751 	case SJA1105_TAS_STATE_RUNNING:
752 		/* Clock was stepped.. bad news for TAS */
753 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
754 			sja1105_tas_stop(priv);
755 			break;
756 		}
757 
758 		rc = sja1105_tas_check_running(priv);
759 		if (rc < 0)
760 			break;
761 
762 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
763 			dev_err(ds->dev, "TAS surprisingly stopped\n");
764 
765 		break;
766 
767 	default:
768 		if (net_ratelimit())
769 			dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
770 	}
771 
772 	if (rc && net_ratelimit())
773 		dev_err(ds->dev, "An operation returned %d\n", rc);
774 
775 	mutex_unlock(&ptp_data->lock);
776 }
777 
778 void sja1105_tas_clockstep(struct dsa_switch *ds)
779 {
780 	struct sja1105_private *priv = ds->priv;
781 	struct sja1105_tas_data *tas_data = &priv->tas_data;
782 
783 	if (!tas_data->enabled)
784 		return;
785 
786 	tas_data->last_op = SJA1105_PTP_CLOCKSTEP;
787 	schedule_work(&tas_data->tas_work);
788 }
789 
790 void sja1105_tas_adjfreq(struct dsa_switch *ds)
791 {
792 	struct sja1105_private *priv = ds->priv;
793 	struct sja1105_tas_data *tas_data = &priv->tas_data;
794 
795 	if (!tas_data->enabled)
796 		return;
797 
798 	/* No reason to schedule the workqueue, nothing changed */
799 	if (tas_data->state == SJA1105_TAS_STATE_RUNNING)
800 		return;
801 
802 	tas_data->last_op = SJA1105_PTP_ADJUSTFREQ;
803 	schedule_work(&tas_data->tas_work);
804 }
805 
806 void sja1105_tas_setup(struct dsa_switch *ds)
807 {
808 	struct sja1105_private *priv = ds->priv;
809 	struct sja1105_tas_data *tas_data = &priv->tas_data;
810 
811 	INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
812 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
813 	tas_data->last_op = SJA1105_PTP_NONE;
814 }
815 
816 void sja1105_tas_teardown(struct dsa_switch *ds)
817 {
818 	struct sja1105_private *priv = ds->priv;
819 	struct tc_taprio_qopt_offload *offload;
820 	int port;
821 
822 	cancel_work_sync(&priv->tas_data.tas_work);
823 
824 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
825 		offload = priv->tas_data.offload[port];
826 		if (!offload)
827 			continue;
828 
829 		taprio_offload_free(offload);
830 	}
831 }
832