xref: /freebsd/sys/dev/sound/pcm/feeder_rate.c (revision f9218d3d4fd34f082473b3a021c6d4d109fb47cf)
1 /*
2  * Copyright (c) 2003 Orion Hodson <orion@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * MAINTAINER: Orion Hodson <orion@freebsd.org>
27  *
28  * This rate conversion code uses linear interpolation without any
29  * pre- or post- interpolation filtering to combat aliasing.  This
30  * greatly limits the sound quality and should be addressed at some
31  * stage in the future.
32  *
33  * Since this accuracy of interpolation is sensitive and examination
34  * of the algorithm output is harder from the kernel, th code is
35  * designed to be compiled in the kernel and in a userland test
36  * harness.  This is done by selectively including and excluding code
37  * with several portions based on whether _KERNEL is defined.  It's a
38  * little ugly, but exceedingly useful.  The testsuite and its
39  * revisions can be found at:
40  *		http://people.freebsd.org/~orion/feedrate/
41  *
42  * Special thanks to Ken Marx for exposing flaws in the code and for
43  * testing revisions.
44  */
45 
46 #ifdef _KERNEL
47 
48 #include <dev/sound/pcm/sound.h>
49 #include "feeder_if.h"
50 
51 SND_DECLARE_FILE("$FreeBSD$");
52 
53 #endif /* _KERNEL */
54 
55 MALLOC_DEFINE(M_RATEFEEDER, "ratefeed", "pcm rate feeder");
56 
57 #ifndef RATE_ASSERT
58 #define RATE_ASSERT(x, y) /* KASSERT(x) */
59 #endif /* RATE_ASSERT */
60 
61 #ifndef RATE_TRACE
62 #define RATE_TRACE(x...)  /* printf(x) */
63 #endif
64 
65 /*****************************************************************************/
66 
67 /* The following coefficients are coupled.  They are chosen to be
68  * guarantee calculable factors for the interpolation routine.  They
69  * have been tested over the range of RATEMIN-RATEMAX Hz.  Decreasing
70  * the granularity increases the required buffer size and affects the
71  * gain values at different points in the space.  These values were
72  * found by running the test program with -p (probe) and some trial
73  * and error.
74  *
75  * ROUNDHZ	the granularity of sample rates (fits n*11025 and n*8000).
76  * FEEDBUFSZ	the amount of buffer space.
77  * MINGAIN	the minimum acceptable gain in coefficients search.
78  */
79 #define ROUNDHZ			   25
80 #define FEEDBUFSZ 		 8192
81 #define MINGAIN			   92
82 
83 #define RATEMIN  		 4000
84 #define RATEMAX 		48000
85 
86 struct feed_rate_info;
87 
88 typedef int (*rate_convert_method)(struct feed_rate_info *,
89 				   uint32_t, uint32_t, int16_t *);
90 
91 static int
92 convert_stereo_up(struct feed_rate_info *info,
93 		  uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
94 
95 static int
96 convert_stereo_down(struct feed_rate_info *info,
97 		    uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
98 
99 struct feed_rate_info {
100 	uint32_t src, dst;	/* source and destination rates */
101 	uint16_t buffer_ticks;	/* number of available samples in buffer */
102 	uint16_t buffer_pos;	/* next available sample in buffer */
103 	uint16_t rounds; 	/* maximum number of cycle rounds w buffer */
104 	uint16_t alpha;		/* interpolation distance */
105         uint16_t sscale;        /* src clock scale */
106         uint16_t dscale;        /* dst clock scale */
107         uint16_t mscale;        /* scale factor to avoid divide per sample */
108         uint16_t mroll;         /* roll to again avoid divide per sample */
109 	uint16_t channels;	/* 1 = mono, 2 = stereo */
110 
111 	rate_convert_method convert;
112     	int16_t  buffer[FEEDBUFSZ];
113 };
114 
115 #define bytes_per_sample		2
116 #define src_ticks_per_cycle(info)	(info->dscale * info->rounds)
117 #define dst_ticks_per_cycle(info)	(info->sscale * info->rounds)
118 #define bytes_per_tick(info)		(info->channels * bytes_per_sample)
119 #define src_bytes_per_cycle(info) 					      \
120         		(src_ticks_per_cycle(info) * bytes_per_tick(info))
121 #define dst_bytes_per_cycle(info) 					      \
122         		(dst_ticks_per_cycle(info) * bytes_per_tick(info))
123 
124 static uint32_t
125 gcd(uint32_t x, uint32_t y)
126 {
127 	uint32_t w;
128 	while (y != 0) {
129 		w = x % y;
130 		x = y;
131 		y = w;
132 	}
133 	return x;
134 }
135 
136 static int
137 feed_rate_setup(struct pcm_feeder *f)
138 {
139 	struct feed_rate_info *info = f->data;
140         uint32_t mscale, mroll, l, r, g;
141 
142 	/* Beat sample rates down by greatest common divisor */
143 	g = gcd(info->src, info->dst);
144 	info->sscale = info->dst / g;
145 	info->dscale = info->src / g;
146 
147 	info->alpha = 0;
148 	info->buffer_ticks = 0;
149 	info->buffer_pos = 0;
150 
151 	/* Pick suitable conversion routine */
152 	if (info->src > info->dst) {
153 		info->convert = convert_stereo_down;
154 	} else {
155 		info->convert = convert_stereo_up;
156 	}
157 
158 	/*
159 	 * Determine number of conversion rounds that will fit into
160 	 * buffer.  NB Must set info->rounds to one before using
161 	 * src_ticks_per_cycle here since it used by src_ticks_per_cycle.
162 	 */
163 	info->rounds = 1;
164 	r = (FEEDBUFSZ - bytes_per_tick(info)) /
165 		(src_ticks_per_cycle(info) * bytes_per_tick(info));
166 	if (r == 0) {
167 		RATE_TRACE("Insufficient buffer space for conversion %d -> %d "
168 			   "(%d < %d)\n", info->src, info->dst, FEEDBUFSZ,
169 			   src_ticks_per_cycle(info) * bytes_per_tick(info));
170 		return -1;
171 	}
172 	info->rounds = r;
173 
174 	/*
175 	 * Find scale and roll combination that allows us to trade
176 	 * costly divide operations in the main loop for multiply-rolls.
177 	 */
178         for (l = 96; l >= MINGAIN; l -= 3) {
179 		for (mroll = 0; mroll < 16; mroll ++) {
180 			mscale = (1 << mroll) / info->sscale;
181 
182                         r = (mscale * info->sscale * 100) >> mroll;
183                         if (r > l && r <= 100) {
184                                 info->mscale = mscale;
185                                 info->mroll = mroll;
186                                 RATE_TRACE("Converting %d to %d with "
187 					   "mscale = %d and mroll = %d "
188 					   "(gain = %d / 100)\n",
189 					   info->src, info->dst,
190 					   info->mscale, info->mroll, r);
191                                 return 0;
192                         }
193                 }
194         }
195 
196 	RATE_TRACE("Failed to find a converter within %d%% gain for "
197 		   "%d to %d.\n", l, info->src, info->dst);
198 
199         return -2;
200 }
201 
202 static int
203 feed_rate_set(struct pcm_feeder *f, int what, int value)
204 {
205 	struct feed_rate_info *info = f->data;
206 	int rvalue;
207 
208 	if (value < RATEMIN || value > RATEMAX) {
209 		return -1;
210 	}
211 
212 	rvalue = (value / ROUNDHZ) * ROUNDHZ;
213 	if (value - rvalue > ROUNDHZ / 2) {
214 	    rvalue += ROUNDHZ;
215 	}
216 
217 	switch(what) {
218 	case FEEDRATE_SRC:
219 		info->src = rvalue;
220 		break;
221 	case FEEDRATE_DST:
222 		info->dst = rvalue;
223 		break;
224 	default:
225 		return -1;
226 	}
227 
228 	return feed_rate_setup(f);
229 }
230 
231 static int
232 feed_rate_get(struct pcm_feeder *f, int what)
233 {
234 	struct feed_rate_info *info = f->data;
235 
236 	switch(what) {
237 	case FEEDRATE_SRC:
238 		return info->src;
239 	case FEEDRATE_DST:
240 		return info->dst;
241 	default:
242 		return -1;
243 	}
244 	return -1;
245 }
246 
247 static int
248 feed_rate_init(struct pcm_feeder *f)
249 {
250 	struct feed_rate_info *info;
251 
252 	info = malloc(sizeof(*info), M_RATEFEEDER, M_WAITOK | M_ZERO);
253 	info->src = DSP_DEFAULT_SPEED;
254 	info->dst = DSP_DEFAULT_SPEED;
255 	info->channels = 2;
256 
257 	f->data = info;
258 	return 0;
259 }
260 
261 static int
262 feed_rate_free(struct pcm_feeder *f)
263 {
264 	struct feed_rate_info *info = f->data;
265 
266 	if (info) {
267 		free(info, M_RATEFEEDER);
268 	}
269 	f->data = NULL;
270 	return 0;
271 }
272 
273 static int
274 convert_stereo_up(struct feed_rate_info *info,
275 		  uint32_t		 src_ticks,
276 		  uint32_t		 dst_ticks,
277 		  int16_t		*dst)
278 {
279 	uint32_t max_dst_ticks;
280 	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o;
281 	int16_t *src;
282 
283 	sp = info->buffer_pos * 2;
284 	se = sp + src_ticks * 2;
285 
286 	src = info->buffer;
287 	alpha = info->alpha * info->mscale;
288 	dalpha = info->dscale * info->mscale; /* Alpha increment */
289 	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
290 	mroll = info->mroll;
291 
292 	/*
293 	 * For efficiency the main conversion loop should only depend on
294 	 * one variable.  We use the state to work out the maximum number
295 	 * of output samples that are available and eliminate the checking of
296 	 * sp from the loop.
297 	 */
298 	max_dst_ticks = src_ticks * info->dst / info->src - alpha / dalpha;
299 	if (max_dst_ticks < dst_ticks) {
300 		dst_ticks = max_dst_ticks;
301 	}
302 
303 	dp = 0;
304 	de = dst_ticks * 2;
305 	/*
306 	 * Unrolling this loop manually does not help much here because
307 	 * of the alpha, malpha comparison.
308 	 */
309 	while (dp < de) {
310 		o = malpha - alpha;
311 		x = alpha * src[sp + 2] + o * src[sp];
312 		dst[dp++] = x >> mroll;
313 		x = alpha * src[sp + 3] + o * src[sp + 1];
314 		dst[dp++] = x >> mroll;
315 		alpha += dalpha;
316 		if (alpha >= malpha) {
317 			alpha -= malpha;
318 			sp += 2;
319 		}
320 	}
321 	RATE_ASSERT(sp <= se, ("%s: Source overrun\n", __func__));
322 
323 	info->buffer_pos = sp / info->channels;
324 	info->alpha = alpha / info->mscale;
325 
326 	return dp / info->channels;
327 }
328 
329 static int
330 convert_stereo_down(struct feed_rate_info *info,
331 		    uint32_t		   src_ticks,
332 		    uint32_t		   dst_ticks,
333 		    int16_t		  *dst)
334 {
335 	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o, m,
336 		mdalpha, mstep;
337 	int16_t *src;
338 
339 	sp = info->buffer_pos * 2;
340 	se = sp + src_ticks * 2;
341 
342 	src = info->buffer;
343 	alpha = info->alpha * info->mscale;
344 	dalpha = info->dscale * info->mscale; /* Alpha increment */
345 	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
346 	mroll = info->mroll;
347 
348 	dp = 0;
349 	de = dst_ticks * 2;
350 
351 	m = dalpha / malpha;
352 	mstep = m * 2;
353 	mdalpha = dalpha - m * malpha;
354 
355 	/*
356 	 * TODO: eliminate sp or dp from this loop comparison for a few
357 	 * extra % performance.
358 	 */
359 	while (sp < se && dp < de) {
360 		o = malpha - alpha;
361 		x = alpha * src[sp + 2] + o * src[sp];
362 		dst[dp++] = x >> mroll;
363 		x = alpha * src[sp + 3] + o * src[sp + 1];
364 		dst[dp++] = x >> mroll;
365 
366 		alpha += mdalpha;
367 		sp += mstep;
368 		if (alpha >= malpha) {
369 			alpha -= malpha;
370 			sp += 2;
371 		}
372 	}
373 
374 	info->buffer_pos = sp / 2;
375 	info->alpha = alpha / info->mscale;
376 
377 	RATE_ASSERT(info->buffer_pos <= info->buffer_ticks,
378 		    ("%s: Source overrun\n", __func__));
379 
380 	return dp / 2;
381 }
382 
383 static int
384 feed_rate(struct pcm_feeder	*f,
385 	  struct pcm_channel	*c,
386 	  uint8_t		*b,
387 	  uint32_t		 count,
388 	  void			*source)
389 {
390 	struct feed_rate_info *info = f->data;
391 
392 	uint32_t done, s_ticks, d_ticks;
393 	done = 0;
394 
395 	RATE_ASSERT(info->channels == 2,
396 		    ("%s: channels (%d) != 2", __func__, info->channels));
397 
398 	while (done < count) {
399 		/* Slurp in more data if input buffer is not full */
400 		while (info->buffer_ticks < src_ticks_per_cycle(info)) {
401 			uint8_t *u8b;
402 			int	 fetch;
403 			fetch = src_bytes_per_cycle(info) -
404 				info->buffer_ticks * bytes_per_tick(info);
405 			u8b = (uint8_t*)info->buffer +
406 				(info->buffer_ticks + 1) *
407 				bytes_per_tick(info);
408 			fetch = FEEDER_FEED(f->source, c, u8b, fetch, source);
409 			RATE_ASSERT(fetch % bytes_per_tick(info) == 0,
410 				    ("%s: fetched unaligned bytes (%d)",
411 				     __func__, fetch));
412 			info->buffer_ticks += fetch / bytes_per_tick(info);
413 			RATE_ASSERT(src_ticks_per_cycle(info) >=
414 				    info->buffer_ticks,
415 				    ("%s: buffer overfilled (%d > %d).",
416 				     __func__, info->buffer_ticks,
417 				 src_ticks_per_cycle(info)));
418 			if (fetch == 0)
419 				break;
420 		}
421 
422 		/* Find amount of input buffer data that should be processed */
423 		d_ticks = (count - done) / bytes_per_tick(info);
424 		s_ticks = info->buffer_ticks - info->buffer_pos;
425 		if (info->buffer_ticks != src_ticks_per_cycle(info)) {
426 			if (s_ticks > 8)
427 				s_ticks -= 8;
428 			else
429 				s_ticks = 0;
430 		}
431 
432 		d_ticks = info->convert(info, s_ticks, d_ticks,
433 					(int16_t*)(b + done));
434 		if (d_ticks == 0)
435 			break;
436 		done += d_ticks * bytes_per_tick(info);
437 
438 		RATE_ASSERT(info->buffer_pos <= info->buffer_ticks,
439 			    ("%s: buffer_ticks too big\n", __func__));
440 		RATE_ASSERT(info->buffer_ticks <= src_ticks_per_cycle(info),
441 			    ("too many ticks %d /  %d\n",
442 			     info->buffer_ticks, src_ticks_per_cycle(info)));
443 		RATE_TRACE("%s: ticks %5d / %d pos %d\n", __func__,
444 			   info->buffer_ticks, src_ticks_per_cycle(info),
445 			   info->buffer_pos);
446 
447 		if (src_ticks_per_cycle(info) <= info->buffer_pos) {
448 			/* End of cycle reached, copy last samples to start */
449 			uint8_t *u8b;
450 			u8b = (uint8_t*)info->buffer;
451 			bcopy(u8b + src_bytes_per_cycle(info), u8b,
452 			      bytes_per_tick(info));
453 
454 			RATE_ASSERT(info->alpha == 0,
455 				    ("%s: completed cycle with "
456 				     "alpha non-zero", __func__, info->alpha));
457 
458 			info->buffer_pos = 0;
459 			info->buffer_ticks = 0;
460 		}
461 	}
462 
463 	RATE_ASSERT(count >= done,
464 		    ("%s: generated too many bytes of data (%d > %d).",
465 		     __func__, done, count));
466 
467 	if (done != count) {
468 		RATE_TRACE("Only did %d of %d\n", done, count);
469 	}
470 
471 	return done;
472 }
473 
474 static struct pcm_feederdesc feeder_rate_desc[] = {
475 	{FEEDER_RATE, AFMT_S16_LE | AFMT_STEREO, AFMT_S16_LE | AFMT_STEREO, 0},
476 	{0, 0, 0, 0},
477 };
478 static kobj_method_t feeder_rate_methods[] = {
479     	KOBJMETHOD(feeder_init,		feed_rate_init),
480     	KOBJMETHOD(feeder_free,		feed_rate_free),
481     	KOBJMETHOD(feeder_set,		feed_rate_set),
482     	KOBJMETHOD(feeder_get,		feed_rate_get),
483     	KOBJMETHOD(feeder_feed,		feed_rate),
484 	{0, 0}
485 };
486 FEEDER_DECLARE(feeder_rate, 2, NULL);
487 
488