1 /* 2 * Copyright (c) 2003 Orion Hodson <orion@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * MAINTAINER: Orion Hodson <orion@freebsd.org> 27 * 28 * This rate conversion code uses linear interpolation without any 29 * pre- or post- interpolation filtering to combat aliasing. This 30 * greatly limits the sound quality and should be addressed at some 31 * stage in the future. 32 * 33 * Since this accuracy of interpolation is sensitive and examination 34 * of the algorithm output is harder from the kernel, th code is 35 * designed to be compiled in the kernel and in a userland test 36 * harness. This is done by selectively including and excluding code 37 * with several portions based on whether _KERNEL is defined. It's a 38 * little ugly, but exceedingly useful. The testsuite and its 39 * revisions can be found at: 40 * http://people.freebsd.org/~orion/feedrate/ 41 * 42 * Special thanks to Ken Marx for exposing flaws in the code and for 43 * testing revisions. 44 */ 45 46 #ifdef _KERNEL 47 48 #include <dev/sound/pcm/sound.h> 49 #include "feeder_if.h" 50 51 SND_DECLARE_FILE("$FreeBSD$"); 52 53 #endif /* _KERNEL */ 54 55 MALLOC_DEFINE(M_RATEFEEDER, "ratefeed", "pcm rate feeder"); 56 57 #ifndef RATE_ASSERT 58 #define RATE_ASSERT(x, y) /* KASSERT(x) */ 59 #endif /* RATE_ASSERT */ 60 61 #ifndef RATE_TRACE 62 #define RATE_TRACE(x...) /* printf(x) */ 63 #endif 64 65 /*****************************************************************************/ 66 67 /* The following coefficients are coupled. They are chosen to be 68 * guarantee calculable factors for the interpolation routine. They 69 * have been tested over the range of RATEMIN-RATEMAX Hz. Decreasing 70 * the granularity increases the required buffer size and affects the 71 * gain values at different points in the space. These values were 72 * found by running the test program with -p (probe) and some trial 73 * and error. 74 * 75 * ROUNDHZ the granularity of sample rates (fits n*11025 and n*8000). 76 * FEEDBUFSZ the amount of buffer space. 77 * MINGAIN the minimum acceptable gain in coefficients search. 78 */ 79 #define ROUNDHZ 25 80 #define FEEDBUFSZ 8192 81 #define MINGAIN 92 82 83 #define RATEMIN 4000 84 #define RATEMAX 48000 85 86 struct feed_rate_info; 87 88 typedef int (*rate_convert_method)(struct feed_rate_info *, 89 uint32_t, uint32_t, int16_t *); 90 91 static int 92 convert_stereo_up(struct feed_rate_info *info, 93 uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst); 94 95 static int 96 convert_stereo_down(struct feed_rate_info *info, 97 uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst); 98 99 struct feed_rate_info { 100 uint32_t src, dst; /* source and destination rates */ 101 uint16_t buffer_ticks; /* number of available samples in buffer */ 102 uint16_t buffer_pos; /* next available sample in buffer */ 103 uint16_t rounds; /* maximum number of cycle rounds w buffer */ 104 uint16_t alpha; /* interpolation distance */ 105 uint16_t sscale; /* src clock scale */ 106 uint16_t dscale; /* dst clock scale */ 107 uint16_t mscale; /* scale factor to avoid divide per sample */ 108 uint16_t mroll; /* roll to again avoid divide per sample */ 109 uint16_t channels; /* 1 = mono, 2 = stereo */ 110 111 rate_convert_method convert; 112 int16_t buffer[FEEDBUFSZ]; 113 }; 114 115 #define bytes_per_sample 2 116 #define src_ticks_per_cycle(info) (info->dscale * info->rounds) 117 #define dst_ticks_per_cycle(info) (info->sscale * info->rounds) 118 #define bytes_per_tick(info) (info->channels * bytes_per_sample) 119 #define src_bytes_per_cycle(info) \ 120 (src_ticks_per_cycle(info) * bytes_per_tick(info)) 121 #define dst_bytes_per_cycle(info) \ 122 (dst_ticks_per_cycle(info) * bytes_per_tick(info)) 123 124 static uint32_t 125 gcd(uint32_t x, uint32_t y) 126 { 127 uint32_t w; 128 while (y != 0) { 129 w = x % y; 130 x = y; 131 y = w; 132 } 133 return x; 134 } 135 136 static int 137 feed_rate_setup(struct pcm_feeder *f) 138 { 139 struct feed_rate_info *info = f->data; 140 uint32_t mscale, mroll, l, r, g; 141 142 /* Beat sample rates down by greatest common divisor */ 143 g = gcd(info->src, info->dst); 144 info->sscale = info->dst / g; 145 info->dscale = info->src / g; 146 147 info->alpha = 0; 148 info->buffer_ticks = 0; 149 info->buffer_pos = 0; 150 151 /* Pick suitable conversion routine */ 152 if (info->src > info->dst) { 153 info->convert = convert_stereo_down; 154 } else { 155 info->convert = convert_stereo_up; 156 } 157 158 /* 159 * Determine number of conversion rounds that will fit into 160 * buffer. NB Must set info->rounds to one before using 161 * src_ticks_per_cycle here since it used by src_ticks_per_cycle. 162 */ 163 info->rounds = 1; 164 r = (FEEDBUFSZ - bytes_per_tick(info)) / 165 (src_ticks_per_cycle(info) * bytes_per_tick(info)); 166 if (r == 0) { 167 RATE_TRACE("Insufficient buffer space for conversion %d -> %d " 168 "(%d < %d)\n", info->src, info->dst, FEEDBUFSZ, 169 src_ticks_per_cycle(info) * bytes_per_tick(info)); 170 return -1; 171 } 172 info->rounds = r; 173 174 /* 175 * Find scale and roll combination that allows us to trade 176 * costly divide operations in the main loop for multiply-rolls. 177 */ 178 for (l = 96; l >= MINGAIN; l -= 3) { 179 for (mroll = 0; mroll < 16; mroll ++) { 180 mscale = (1 << mroll) / info->sscale; 181 182 r = (mscale * info->sscale * 100) >> mroll; 183 if (r > l && r <= 100) { 184 info->mscale = mscale; 185 info->mroll = mroll; 186 RATE_TRACE("Converting %d to %d with " 187 "mscale = %d and mroll = %d " 188 "(gain = %d / 100)\n", 189 info->src, info->dst, 190 info->mscale, info->mroll, r); 191 return 0; 192 } 193 } 194 } 195 196 RATE_TRACE("Failed to find a converter within %d%% gain for " 197 "%d to %d.\n", l, info->src, info->dst); 198 199 return -2; 200 } 201 202 static int 203 feed_rate_set(struct pcm_feeder *f, int what, int value) 204 { 205 struct feed_rate_info *info = f->data; 206 int rvalue; 207 208 if (value < RATEMIN || value > RATEMAX) { 209 return -1; 210 } 211 212 rvalue = (value / ROUNDHZ) * ROUNDHZ; 213 if (value - rvalue > ROUNDHZ / 2) { 214 rvalue += ROUNDHZ; 215 } 216 217 switch(what) { 218 case FEEDRATE_SRC: 219 info->src = rvalue; 220 break; 221 case FEEDRATE_DST: 222 info->dst = rvalue; 223 break; 224 default: 225 return -1; 226 } 227 228 return feed_rate_setup(f); 229 } 230 231 static int 232 feed_rate_get(struct pcm_feeder *f, int what) 233 { 234 struct feed_rate_info *info = f->data; 235 236 switch(what) { 237 case FEEDRATE_SRC: 238 return info->src; 239 case FEEDRATE_DST: 240 return info->dst; 241 default: 242 return -1; 243 } 244 return -1; 245 } 246 247 static int 248 feed_rate_init(struct pcm_feeder *f) 249 { 250 struct feed_rate_info *info; 251 252 info = malloc(sizeof(*info), M_RATEFEEDER, M_NOWAIT | M_ZERO); 253 info->src = DSP_DEFAULT_SPEED; 254 info->dst = DSP_DEFAULT_SPEED; 255 info->channels = 2; 256 257 f->data = info; 258 return 0; 259 } 260 261 static int 262 feed_rate_free(struct pcm_feeder *f) 263 { 264 struct feed_rate_info *info = f->data; 265 266 if (info) { 267 free(info, M_RATEFEEDER); 268 } 269 f->data = NULL; 270 return 0; 271 } 272 273 static int 274 convert_stereo_up(struct feed_rate_info *info, 275 uint32_t src_ticks, 276 uint32_t dst_ticks, 277 int16_t *dst) 278 { 279 uint32_t max_dst_ticks; 280 int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o; 281 int16_t *src; 282 283 sp = info->buffer_pos * 2; 284 se = sp + src_ticks * 2; 285 286 src = info->buffer; 287 alpha = info->alpha * info->mscale; 288 dalpha = info->dscale * info->mscale; /* Alpha increment */ 289 malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */ 290 mroll = info->mroll; 291 292 /* 293 * For efficiency the main conversion loop should only depend on 294 * one variable. We use the state to work out the maximum number 295 * of output samples that are available and eliminate the checking of 296 * sp from the loop. 297 */ 298 max_dst_ticks = src_ticks * info->dst / info->src - alpha / dalpha; 299 if (max_dst_ticks < dst_ticks) { 300 dst_ticks = max_dst_ticks; 301 } 302 303 dp = 0; 304 de = dst_ticks * 2; 305 /* 306 * Unrolling this loop manually does not help much here because 307 * of the alpha, malpha comparison. 308 */ 309 while (dp < de) { 310 o = malpha - alpha; 311 x = alpha * src[sp + 2] + o * src[sp]; 312 dst[dp++] = x >> mroll; 313 x = alpha * src[sp + 3] + o * src[sp + 1]; 314 dst[dp++] = x >> mroll; 315 alpha += dalpha; 316 if (alpha >= malpha) { 317 alpha -= malpha; 318 sp += 2; 319 } 320 } 321 RATE_ASSERT(sp <= se, ("%s: Source overrun\n", __func__)); 322 323 info->buffer_pos = sp / info->channels; 324 info->alpha = alpha / info->mscale; 325 326 return dp / info->channels; 327 } 328 329 static int 330 convert_stereo_down(struct feed_rate_info *info, 331 uint32_t src_ticks, 332 uint32_t dst_ticks, 333 int16_t *dst) 334 { 335 int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o, m, 336 mdalpha, mstep; 337 int16_t *src; 338 339 sp = info->buffer_pos * 2; 340 se = sp + src_ticks * 2; 341 342 src = info->buffer; 343 alpha = info->alpha * info->mscale; 344 dalpha = info->dscale * info->mscale; /* Alpha increment */ 345 malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */ 346 mroll = info->mroll; 347 348 dp = 0; 349 de = dst_ticks * 2; 350 351 m = dalpha / malpha; 352 mstep = m * 2; 353 mdalpha = dalpha - m * malpha; 354 355 /* 356 * TODO: eliminate sp or dp from this loop comparison for a few 357 * extra % performance. 358 */ 359 while (sp < se && dp < de) { 360 o = malpha - alpha; 361 x = alpha * src[sp + 2] + o * src[sp]; 362 dst[dp++] = x >> mroll; 363 x = alpha * src[sp + 3] + o * src[sp + 1]; 364 dst[dp++] = x >> mroll; 365 366 alpha += mdalpha; 367 sp += mstep; 368 if (alpha >= malpha) { 369 alpha -= malpha; 370 sp += 2; 371 } 372 } 373 374 info->buffer_pos = sp / 2; 375 info->alpha = alpha / info->mscale; 376 377 RATE_ASSERT(info->buffer_pos <= info->buffer_ticks, 378 ("%s: Source overrun\n", __func__)); 379 380 return dp / 2; 381 } 382 383 static int 384 feed_rate(struct pcm_feeder *f, 385 struct pcm_channel *c, 386 uint8_t *b, 387 uint32_t count, 388 void *source) 389 { 390 struct feed_rate_info *info = f->data; 391 392 uint32_t done, s_ticks, d_ticks; 393 done = 0; 394 395 RATE_ASSERT(info->channels == 2, 396 ("%s: channels (%d) != 2", __func__, info->channels)); 397 398 while (done < count) { 399 /* Slurp in more data if input buffer is not full */ 400 while (info->buffer_ticks < src_ticks_per_cycle(info)) { 401 uint8_t *u8b; 402 int fetch; 403 fetch = src_bytes_per_cycle(info) - 404 info->buffer_ticks * bytes_per_tick(info); 405 u8b = (uint8_t*)info->buffer + 406 (info->buffer_ticks + 1) * 407 bytes_per_tick(info); 408 fetch = FEEDER_FEED(f->source, c, u8b, fetch, source); 409 RATE_ASSERT(fetch % bytes_per_tick(info) == 0, 410 ("%s: fetched unaligned bytes (%d)", 411 __func__, fetch)); 412 info->buffer_ticks += fetch / bytes_per_tick(info); 413 RATE_ASSERT(src_ticks_per_cycle(info) >= 414 info->buffer_ticks, 415 ("%s: buffer overfilled (%d > %d).", 416 __func__, info->buffer_ticks, 417 src_ticks_per_cycle(info))); 418 if (fetch == 0) 419 break; 420 } 421 422 /* Find amount of input buffer data that should be processed */ 423 d_ticks = (count - done) / bytes_per_tick(info); 424 s_ticks = info->buffer_ticks - info->buffer_pos; 425 if (info->buffer_ticks != src_ticks_per_cycle(info)) { 426 if (s_ticks > 8) 427 s_ticks -= 8; 428 else 429 s_ticks = 0; 430 } 431 432 d_ticks = info->convert(info, s_ticks, d_ticks, 433 (int16_t*)(b + done)); 434 if (d_ticks == 0) 435 break; 436 done += d_ticks * bytes_per_tick(info); 437 438 RATE_ASSERT(info->buffer_pos <= info->buffer_ticks, 439 ("%s: buffer_ticks too big\n", __func__)); 440 RATE_ASSERT(info->buffer_ticks <= src_ticks_per_cycle(info), 441 ("too many ticks %d / %d\n", 442 info->buffer_ticks, src_ticks_per_cycle(info))); 443 RATE_TRACE("%s: ticks %5d / %d pos %d\n", __func__, 444 info->buffer_ticks, src_ticks_per_cycle(info), 445 info->buffer_pos); 446 447 if (src_ticks_per_cycle(info) <= info->buffer_pos) { 448 /* End of cycle reached, copy last samples to start */ 449 uint8_t *u8b; 450 u8b = (uint8_t*)info->buffer; 451 bcopy(u8b + src_bytes_per_cycle(info), u8b, 452 bytes_per_tick(info)); 453 454 RATE_ASSERT(info->alpha == 0, 455 ("%s: completed cycle with " 456 "alpha non-zero", __func__, info->alpha)); 457 458 info->buffer_pos = 0; 459 info->buffer_ticks = 0; 460 } 461 } 462 463 RATE_ASSERT(count >= done, 464 ("%s: generated too many bytes of data (%d > %d).", 465 __func__, done, count)); 466 467 if (done != count) { 468 RATE_TRACE("Only did %d of %d\n", done, count); 469 } 470 471 return done; 472 } 473 474 static struct pcm_feederdesc feeder_rate_desc[] = { 475 {FEEDER_RATE, AFMT_S16_LE | AFMT_STEREO, AFMT_S16_LE | AFMT_STEREO, 0}, 476 {0, 0, 0, 0}, 477 }; 478 static kobj_method_t feeder_rate_methods[] = { 479 KOBJMETHOD(feeder_init, feed_rate_init), 480 KOBJMETHOD(feeder_free, feed_rate_free), 481 KOBJMETHOD(feeder_set, feed_rate_set), 482 KOBJMETHOD(feeder_get, feed_rate_get), 483 KOBJMETHOD(feeder_feed, feed_rate), 484 {0, 0} 485 }; 486 FEEDER_DECLARE(feeder_rate, 2, NULL); 487 488