Line data Source code
1 : ////////////////////////////////////////////////////////////////////////////////
2 : ///
3 : /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
4 : /// while maintaining the original pitch by using a time domain WSOLA-like
5 : /// method with several performance-increasing tweaks.
6 : ///
7 : /// Note : MMX optimized functions reside in a separate, platform-specific
8 : /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
9 : ///
10 : /// Author : Copyright (c) Olli Parviainen
11 : /// Author e-mail : oparviai 'at' iki.fi
12 : /// SoundTouch WWW: http://www.surina.net/soundtouch
13 : ///
14 : ////////////////////////////////////////////////////////////////////////////////
15 : //
16 : // Last changed : $Date: 2015-02-22 15:07:12 +0000 (Sun, 22 Feb 2015) $
17 : // File revision : $Revision: 1.12 $
18 : //
19 : // $Id: TDStretch.cpp 205 2015-02-22 15:07:12Z oparviai $
20 : //
21 : ////////////////////////////////////////////////////////////////////////////////
22 : //
23 : // License :
24 : //
25 : // SoundTouch audio processing library
26 : // Copyright (c) Olli Parviainen
27 : //
28 : // This library is free software; you can redistribute it and/or
29 : // modify it under the terms of the GNU Lesser General Public
30 : // License as published by the Free Software Foundation; either
31 : // version 2.1 of the License, or (at your option) any later version.
32 : //
33 : // This library is distributed in the hope that it will be useful,
34 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
35 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36 : // Lesser General Public License for more details.
37 : //
38 : // You should have received a copy of the GNU Lesser General Public
39 : // License along with this library; if not, write to the Free Software
40 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41 : //
42 : ////////////////////////////////////////////////////////////////////////////////
43 :
44 : #include <string.h>
45 : #include <limits.h>
46 : #include <assert.h>
47 : #include <math.h>
48 : #include <float.h>
49 :
50 : #include "STTypes.h"
51 : #include "cpu_detect.h"
52 : #include "TDStretch.h"
53 :
54 : using namespace soundtouch;
55 :
56 : #define max(x, y) (((x) > (y)) ? (x) : (y))
57 :
58 :
59 : /*****************************************************************************
60 : *
61 : * Constant definitions
62 : *
63 : *****************************************************************************/
64 :
65 : // Table for the hierarchical mixing position seeking algorithm
66 : static const short _scanOffsets[5][24]={
67 : { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806,
68 : 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0},
69 : {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0,
70 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
71 : { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0,
72 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
73 : { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0,
74 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
75 : { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111,
76 : 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}};
77 :
78 : /*****************************************************************************
79 : *
80 : * Implementation of the class 'TDStretch'
81 : *
82 : *****************************************************************************/
83 :
84 :
85 0 : TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
86 : {
87 0 : bQuickSeek = false;
88 0 : channels = 2;
89 :
90 0 : pMidBuffer = NULL;
91 0 : pMidBufferUnaligned = NULL;
92 0 : overlapLength = 0;
93 :
94 0 : bAutoSeqSetting = true;
95 0 : bAutoSeekSetting = true;
96 :
97 : // outDebt = 0;
98 0 : skipFract = 0;
99 :
100 0 : tempo = 1.0f;
101 0 : setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
102 0 : setTempo(1.0f);
103 :
104 0 : clear();
105 0 : }
106 :
107 :
108 :
109 0 : TDStretch::~TDStretch()
110 : {
111 0 : delete[] pMidBufferUnaligned;
112 0 : }
113 :
114 :
115 :
116 : // Sets routine control parameters. These control are certain time constants
117 : // defining how the sound is stretched to the desired duration.
118 : //
119 : // 'sampleRate' = sample rate of the sound
120 : // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
121 : // 'seekwindowMS' = seeking window length for scanning the best overlapping
122 : // position (default = 28 ms)
123 : // 'overlapMS' = overlapping length (default = 12 ms)
124 :
125 0 : void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
126 : int aSeekWindowMS, int aOverlapMS)
127 : {
128 : // accept only positive parameter values - if zero or negative, use old values instead
129 0 : if (aSampleRate > 0) this->sampleRate = aSampleRate;
130 0 : if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
131 :
132 0 : if (aSequenceMS > 0)
133 : {
134 0 : this->sequenceMs = aSequenceMS;
135 0 : bAutoSeqSetting = false;
136 : }
137 0 : else if (aSequenceMS == 0)
138 : {
139 : // if zero, use automatic setting
140 0 : bAutoSeqSetting = true;
141 : }
142 :
143 0 : if (aSeekWindowMS > 0)
144 : {
145 0 : this->seekWindowMs = aSeekWindowMS;
146 0 : bAutoSeekSetting = false;
147 : }
148 0 : else if (aSeekWindowMS == 0)
149 : {
150 : // if zero, use automatic setting
151 0 : bAutoSeekSetting = true;
152 : }
153 :
154 0 : calcSeqParameters();
155 :
156 0 : calculateOverlapLength(overlapMs);
157 :
158 : // set tempo to recalculate 'sampleReq'
159 0 : setTempo(tempo);
160 0 : }
161 :
162 :
163 :
164 : /// Get routine control parameters, see setParameters() function.
165 : /// Any of the parameters to this function can be NULL, in such case corresponding parameter
166 : /// value isn't returned.
167 0 : void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
168 : {
169 0 : if (pSampleRate)
170 : {
171 0 : *pSampleRate = sampleRate;
172 : }
173 :
174 0 : if (pSequenceMs)
175 : {
176 0 : *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
177 : }
178 :
179 0 : if (pSeekWindowMs)
180 : {
181 0 : *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
182 : }
183 :
184 0 : if (pOverlapMs)
185 : {
186 0 : *pOverlapMs = overlapMs;
187 : }
188 0 : }
189 :
190 :
191 : // Overlaps samples in 'midBuffer' with the samples in 'pInput'
192 0 : void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
193 : {
194 : int i;
195 : SAMPLETYPE m1, m2;
196 :
197 0 : m1 = (SAMPLETYPE)0;
198 0 : m2 = (SAMPLETYPE)overlapLength;
199 :
200 0 : for (i = 0; i < overlapLength ; i ++)
201 : {
202 0 : pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
203 0 : m1 += 1;
204 0 : m2 -= 1;
205 : }
206 0 : }
207 :
208 :
209 :
210 0 : void TDStretch::clearMidBuffer()
211 : {
212 0 : memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
213 0 : }
214 :
215 :
216 0 : void TDStretch::clearInput()
217 : {
218 0 : inputBuffer.clear();
219 0 : clearMidBuffer();
220 0 : }
221 :
222 :
223 : // Clears the sample buffers
224 0 : void TDStretch::clear()
225 : {
226 0 : outputBuffer.clear();
227 0 : clearInput();
228 0 : }
229 :
230 :
231 :
232 : // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
233 : // to enable
234 0 : void TDStretch::enableQuickSeek(bool enable)
235 : {
236 0 : bQuickSeek = enable;
237 0 : }
238 :
239 :
240 : // Returns nonzero if the quick seeking algorithm is enabled.
241 0 : bool TDStretch::isQuickSeekEnabled() const
242 : {
243 0 : return bQuickSeek;
244 : }
245 :
246 :
247 : // Seeks for the optimal overlap-mixing position.
248 0 : int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
249 : {
250 0 : if (bQuickSeek)
251 : {
252 0 : return seekBestOverlapPositionQuick(refPos);
253 : }
254 : else
255 : {
256 0 : return seekBestOverlapPositionFull(refPos);
257 : }
258 : }
259 :
260 :
261 : // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
262 : // of 'ovlPos'.
263 0 : inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
264 : {
265 : #ifndef USE_MULTICH_ALWAYS
266 0 : if (channels == 1)
267 : {
268 : // mono sound.
269 0 : overlapMono(pOutput, pInput + ovlPos);
270 : }
271 0 : else if (channels == 2)
272 : {
273 : // stereo sound
274 0 : overlapStereo(pOutput, pInput + 2 * ovlPos);
275 : }
276 : else
277 : #endif // USE_MULTICH_ALWAYS
278 : {
279 0 : assert(channels > 0);
280 0 : overlapMulti(pOutput, pInput + channels * ovlPos);
281 : }
282 0 : }
283 :
284 :
285 :
286 : // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
287 : // routine
288 : //
289 : // The best position is determined as the position where the two overlapped
290 : // sample sequences are 'most alike', in terms of the highest cross-correlation
291 : // value over the overlapping period
292 0 : int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
293 : {
294 : int bestOffs;
295 : double bestCorr;
296 : int i;
297 : double norm;
298 :
299 0 : bestCorr = FLT_MIN;
300 0 : bestOffs = 0;
301 :
302 : // Scans for the best correlation value by testing each possible position
303 : // over the permitted range.
304 0 : bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
305 :
306 : #pragma omp parallel for
307 0 : for (i = 1; i < seekLength; i ++)
308 : {
309 : double corr;
310 : // Calculates correlation value for the mixing position corresponding to 'i'
311 : #ifdef _OPENMP
312 : // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
313 : // iterate the loop in sequential order
314 : corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
315 : #else
316 : // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
317 : // as "calcCrossCorr", but saves time by reusing & updating previously stored
318 : // "norm" value
319 0 : corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
320 : #endif
321 : // heuristic rule to slightly favour values close to mid of the range
322 0 : double tmp = (double)(2 * i - seekLength) / (double)seekLength;
323 0 : corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
324 :
325 : // Checks for the highest correlation value
326 0 : if (corr > bestCorr)
327 : {
328 : // For optimal performance, enter critical section only in case that best value found.
329 : // in such case repeat 'if' condition as it's possible that parallel execution may have
330 : // updated the bestCorr value in the mean time
331 : #pragma omp critical
332 0 : if (corr > bestCorr)
333 : {
334 0 : bestCorr = corr;
335 0 : bestOffs = i;
336 : }
337 : }
338 : }
339 : // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
340 0 : clearCrossCorrState();
341 :
342 0 : return bestOffs;
343 : }
344 :
345 :
346 : // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
347 : // routine
348 : //
349 : // The best position is determined as the position where the two overlapped
350 : // sample sequences are 'most alike', in terms of the highest cross-correlation
351 : // value over the overlapping period
352 0 : int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
353 : {
354 : int j;
355 : int bestOffs;
356 : double bestCorr, corr;
357 : int scanCount, corrOffset, tempOffset;
358 :
359 0 : bestCorr = FLT_MIN;
360 0 : bestOffs = _scanOffsets[0][0];
361 0 : corrOffset = 0;
362 0 : tempOffset = 0;
363 :
364 : // Scans for the best correlation value using four-pass hierarchical search.
365 : //
366 : // The look-up table 'scans' has hierarchical position adjusting steps.
367 : // In first pass the routine searhes for the highest correlation with
368 : // relatively coarse steps, then rescans the neighbourhood of the highest
369 : // correlation with better resolution and so on.
370 0 : for (scanCount = 0;scanCount < 4; scanCount ++)
371 : {
372 0 : j = 0;
373 0 : while (_scanOffsets[scanCount][j])
374 : {
375 : double norm;
376 0 : tempOffset = corrOffset + _scanOffsets[scanCount][j];
377 0 : if (tempOffset >= seekLength) break;
378 :
379 : // Calculates correlation value for the mixing position corresponding
380 : // to 'tempOffset'
381 0 : corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm);
382 : // heuristic rule to slightly favour values close to mid of the range
383 0 : double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
384 0 : corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
385 :
386 : // Checks for the highest correlation value
387 0 : if (corr > bestCorr)
388 : {
389 0 : bestCorr = corr;
390 0 : bestOffs = tempOffset;
391 : }
392 0 : j ++;
393 : }
394 0 : corrOffset = bestOffs;
395 : }
396 : // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
397 0 : clearCrossCorrState();
398 :
399 0 : return bestOffs;
400 : }
401 :
402 :
403 :
404 : /// clear cross correlation routine state if necessary
405 0 : void TDStretch::clearCrossCorrState()
406 : {
407 : // default implementation is empty.
408 0 : }
409 :
410 :
411 : /// Calculates processing sequence length according to tempo setting
412 0 : void TDStretch::calcSeqParameters()
413 : {
414 : // Adjust tempo param according to tempo, so that variating processing sequence length is used
415 : // at varius tempo settings, between the given low...top limits
416 : #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%)
417 : #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)
418 :
419 : // sequence-ms setting values at above low & top tempo
420 : #define AUTOSEQ_AT_MIN 125.0
421 : #define AUTOSEQ_AT_MAX 50.0
422 : #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
423 : #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
424 :
425 : // seek-window-ms setting values at above low & top tempo
426 : #define AUTOSEEK_AT_MIN 25.0
427 : #define AUTOSEEK_AT_MAX 15.0
428 : #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
429 : #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
430 :
431 : #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
432 :
433 : double seq, seek;
434 :
435 0 : if (bAutoSeqSetting)
436 : {
437 0 : seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
438 0 : seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
439 0 : sequenceMs = (int)(seq + 0.5);
440 : }
441 :
442 0 : if (bAutoSeekSetting)
443 : {
444 0 : seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
445 0 : seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
446 0 : seekWindowMs = (int)(seek + 0.5);
447 : }
448 :
449 : // Update seek window lengths
450 0 : seekWindowLength = (sampleRate * sequenceMs) / 1000;
451 0 : if (seekWindowLength < 2 * overlapLength)
452 : {
453 0 : seekWindowLength = 2 * overlapLength;
454 : }
455 0 : seekLength = (sampleRate * seekWindowMs) / 1000;
456 0 : }
457 :
458 :
459 :
460 : // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower
461 : // tempo, larger faster tempo.
462 0 : void TDStretch::setTempo(float newTempo)
463 : {
464 : int intskip;
465 :
466 0 : tempo = newTempo;
467 :
468 : // Calculate new sequence duration
469 0 : calcSeqParameters();
470 :
471 : // Calculate ideal skip length (according to tempo value)
472 0 : nominalSkip = tempo * (seekWindowLength - overlapLength);
473 0 : intskip = (int)(nominalSkip + 0.5f);
474 :
475 : // Calculate how many samples are needed in the 'inputBuffer' to
476 : // process another batch of samples
477 : //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
478 0 : sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
479 0 : }
480 :
481 :
482 :
483 : // Sets the number of channels, 1 = mono, 2 = stereo
484 0 : void TDStretch::setChannels(int numChannels)
485 : {
486 0 : assert(numChannels > 0);
487 0 : if (channels == numChannels) return;
488 : // assert(numChannels == 1 || numChannels == 2);
489 :
490 0 : channels = numChannels;
491 0 : inputBuffer.setChannels(channels);
492 0 : outputBuffer.setChannels(channels);
493 :
494 : // re-init overlap/buffer
495 0 : overlapLength=0;
496 0 : setParameters(sampleRate);
497 : }
498 :
499 :
500 : // nominal tempo, no need for processing, just pass the samples through
501 : // to outputBuffer
502 : /*
503 : void TDStretch::processNominalTempo()
504 : {
505 : assert(tempo == 1.0f);
506 :
507 : if (bMidBufferDirty)
508 : {
509 : // If there are samples in pMidBuffer waiting for overlapping,
510 : // do a single sliding overlapping with them in order to prevent a
511 : // clicking distortion in the output sound
512 : if (inputBuffer.numSamples() < overlapLength)
513 : {
514 : // wait until we've got overlapLength input samples
515 : return;
516 : }
517 : // Mix the samples in the beginning of 'inputBuffer' with the
518 : // samples in 'midBuffer' using sliding overlapping
519 : overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
520 : outputBuffer.putSamples(overlapLength);
521 : inputBuffer.receiveSamples(overlapLength);
522 : clearMidBuffer();
523 : // now we've caught the nominal sample flow and may switch to
524 : // bypass mode
525 : }
526 :
527 : // Simply bypass samples from input to output
528 : outputBuffer.moveSamples(inputBuffer);
529 : }
530 : */
531 :
532 :
533 : // Processes as many processing frames of the samples 'inputBuffer', store
534 : // the result into 'outputBuffer'
535 0 : void TDStretch::processSamples()
536 : {
537 : int ovlSkip, offset;
538 : int temp;
539 :
540 : /* Removed this small optimization - can introduce a click to sound when tempo setting
541 : crosses the nominal value
542 : if (tempo == 1.0f)
543 : {
544 : // tempo not changed from the original, so bypass the processing
545 : processNominalTempo();
546 : return;
547 : }
548 : */
549 :
550 : // Process samples as long as there are enough samples in 'inputBuffer'
551 : // to form a processing frame.
552 0 : while ((int)inputBuffer.numSamples() >= sampleReq)
553 : {
554 : // If tempo differs from the normal ('SCALE'), scan for the best overlapping
555 : // position
556 0 : offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
557 :
558 : // Mix the samples in the 'inputBuffer' at position of 'offset' with the
559 : // samples in 'midBuffer' using sliding overlapping
560 : // ... first partially overlap with the end of the previous sequence
561 : // (that's in 'midBuffer')
562 0 : overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
563 0 : outputBuffer.putSamples((uint)overlapLength);
564 :
565 : // ... then copy sequence samples from 'inputBuffer' to output:
566 :
567 : // length of sequence
568 0 : temp = (seekWindowLength - 2 * overlapLength);
569 :
570 : // crosscheck that we don't have buffer overflow...
571 0 : if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
572 : {
573 0 : continue; // just in case, shouldn't really happen
574 : }
575 :
576 0 : outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
577 :
578 : // Copies the end of the current sequence from 'inputBuffer' to
579 : // 'midBuffer' for being mixed with the beginning of the next
580 : // processing sequence and so on
581 0 : assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
582 0 : memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength),
583 0 : channels * sizeof(SAMPLETYPE) * overlapLength);
584 :
585 : // Remove the processed samples from the input buffer. Update
586 : // the difference between integer & nominal skip step to 'skipFract'
587 : // in order to prevent the error from accumulating over time.
588 0 : skipFract += nominalSkip; // real skip size
589 0 : ovlSkip = (int)skipFract; // rounded to integer skip
590 0 : skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip
591 0 : inputBuffer.receiveSamples((uint)ovlSkip);
592 : }
593 0 : }
594 :
595 :
596 : // Adds 'numsamples' pcs of samples from the 'samples' memory position into
597 : // the input of the object.
598 0 : void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
599 : {
600 : // Add the samples into the input buffer
601 0 : inputBuffer.putSamples(samples, nSamples);
602 : // Process the samples in input buffer
603 0 : processSamples();
604 0 : }
605 :
606 :
607 :
608 : /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
609 0 : void TDStretch::acceptNewOverlapLength(int newOverlapLength)
610 : {
611 : int prevOvl;
612 :
613 0 : assert(newOverlapLength >= 0);
614 0 : prevOvl = overlapLength;
615 0 : overlapLength = newOverlapLength;
616 :
617 0 : if (overlapLength > prevOvl)
618 : {
619 0 : delete[] pMidBufferUnaligned;
620 :
621 0 : pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
622 : // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
623 0 : pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
624 :
625 0 : clearMidBuffer();
626 : }
627 0 : }
628 :
629 :
630 : // Operator 'new' is overloaded so that it automatically creates a suitable instance
631 : // depending on if we've a MMX/SSE/etc-capable CPU available or not.
632 0 : void * TDStretch::operator new(size_t s)
633 : {
634 : // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
635 : ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
636 0 : return newInstance();
637 : }
638 :
639 :
640 0 : TDStretch * TDStretch::newInstance()
641 : {
642 : #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)
643 : uint uExtensions;
644 :
645 0 : uExtensions = detectCPUextensions();
646 : #endif
647 :
648 : // Check if MMX/SSE instruction set extensions supported by CPU
649 :
650 : #ifdef SOUNDTOUCH_ALLOW_MMX
651 : // MMX routines available only with integer sample types
652 : if (uExtensions & SUPPORT_MMX)
653 : {
654 : return ::new TDStretchMMX;
655 : }
656 : else
657 : #endif // SOUNDTOUCH_ALLOW_MMX
658 :
659 :
660 : #ifdef SOUNDTOUCH_ALLOW_SSE
661 0 : if (uExtensions & SUPPORT_SSE)
662 : {
663 : // SSE support
664 0 : return ::new TDStretchSSE;
665 : }
666 : else
667 : #endif // SOUNDTOUCH_ALLOW_SSE
668 :
669 : {
670 : // ISA optimizations not supported, use plain C version
671 0 : return ::new TDStretch;
672 : }
673 : }
674 :
675 :
676 : //////////////////////////////////////////////////////////////////////////////
677 : //
678 : // Integer arithmetics specific algorithm implementations.
679 : //
680 : //////////////////////////////////////////////////////////////////////////////
681 :
682 : #ifdef SOUNDTOUCH_INTEGER_SAMPLES
683 :
684 : // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
685 : // version of the routine.
686 : void TDStretch::overlapStereo(short *poutput, const short *input) const
687 : {
688 : int i;
689 : short temp;
690 : int cnt2;
691 :
692 : for (i = 0; i < overlapLength ; i ++)
693 : {
694 : temp = (short)(overlapLength - i);
695 : cnt2 = 2 * i;
696 : poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength;
697 : poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
698 : }
699 : }
700 :
701 :
702 : // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
703 : // version of the routine.
704 : void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
705 : {
706 : SAMPLETYPE m1=(SAMPLETYPE)0;
707 : SAMPLETYPE m2;
708 : int i=0;
709 :
710 : for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
711 : {
712 : for (int c = 0; c < channels; c ++)
713 : {
714 : poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
715 : i++;
716 : }
717 :
718 : m1++;
719 : }
720 : }
721 :
722 : // Calculates the x having the closest 2^x value for the given value
723 : static int _getClosest2Power(double value)
724 : {
725 : return (int)(log(value) / log(2.0) + 0.5);
726 : }
727 :
728 :
729 : /// Calculates overlap period length in samples.
730 : /// Integer version rounds overlap length to closest power of 2
731 : /// for a divide scaling operation.
732 : void TDStretch::calculateOverlapLength(int aoverlapMs)
733 : {
734 : int newOvl;
735 :
736 : assert(aoverlapMs >= 0);
737 :
738 : // calculate overlap length so that it's power of 2 - thus it's easy to do
739 : // integer division by right-shifting. Term "-1" at end is to account for
740 : // the extra most significatnt bit left unused in result by signed multiplication
741 : overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
742 : if (overlapDividerBits > 9) overlapDividerBits = 9;
743 : if (overlapDividerBits < 3) overlapDividerBits = 3;
744 : newOvl = (int)pow(2.0, (int)overlapDividerBits + 1); // +1 => account for -1 above
745 :
746 : acceptNewOverlapLength(newOvl);
747 :
748 : // calculate sloping divider so that crosscorrelation operation won't
749 : // overflow 32-bit register. Max. sum of the crosscorrelation sum without
750 : // divider would be 2^30*(N^3-N)/3, where N = overlap length
751 : slopingDivider = (newOvl * newOvl - 1) / 3;
752 : }
753 :
754 :
755 : double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const
756 : {
757 : long corr;
758 : long lnorm;
759 : int i;
760 :
761 : corr = lnorm = 0;
762 : // Same routine for stereo and mono. For stereo, unroll loop for better
763 : // efficiency and gives slightly better resolution against rounding.
764 : // For mono it same routine, just unrolls loop by factor of 4
765 : for (i = 0; i < channels * overlapLength; i += 4)
766 : {
767 : corr += (mixingPos[i] * compare[i] +
768 : mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
769 : corr += (mixingPos[i + 2] * compare[i + 2] +
770 : mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
771 : lnorm += (mixingPos[i] * mixingPos[i] +
772 : mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
773 : lnorm += (mixingPos[i + 2] * mixingPos[i + 2] +
774 : mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
775 : }
776 :
777 : // Normalize result by dividing by sqrt(norm) - this step is easiest
778 : // done using floating point operation
779 : norm = (double)lnorm;
780 : return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
781 : }
782 :
783 :
784 : /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
785 : double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const
786 : {
787 : long corr;
788 : long lnorm;
789 : int i;
790 :
791 : // cancel first normalizer tap from previous round
792 : lnorm = 0;
793 : for (i = 1; i <= channels; i ++)
794 : {
795 : lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits;
796 : }
797 :
798 : corr = 0;
799 : // Same routine for stereo and mono. For stereo, unroll loop for better
800 : // efficiency and gives slightly better resolution against rounding.
801 : // For mono it same routine, just unrolls loop by factor of 4
802 : for (i = 0; i < channels * overlapLength; i += 4)
803 : {
804 : corr += (mixingPos[i] * compare[i] +
805 : mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
806 : corr += (mixingPos[i + 2] * compare[i + 2] +
807 : mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
808 : }
809 :
810 : // update normalizer with last samples of this round
811 : for (int j = 0; j < channels; j ++)
812 : {
813 : i --;
814 : lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
815 : }
816 : norm += (double)lnorm;
817 :
818 : // Normalize result by dividing by sqrt(norm) - this step is easiest
819 : // done using floating point operation
820 : return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
821 : }
822 :
823 : #endif // SOUNDTOUCH_INTEGER_SAMPLES
824 :
825 : //////////////////////////////////////////////////////////////////////////////
826 : //
827 : // Floating point arithmetics specific algorithm implementations.
828 : //
829 :
830 : #ifdef SOUNDTOUCH_FLOAT_SAMPLES
831 :
832 : // Overlaps samples in 'midBuffer' with the samples in 'pInput'
833 0 : void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
834 : {
835 : int i;
836 : float fScale;
837 : float f1;
838 : float f2;
839 :
840 0 : fScale = 1.0f / (float)overlapLength;
841 :
842 0 : f1 = 0;
843 0 : f2 = 1.0f;
844 :
845 0 : for (i = 0; i < 2 * (int)overlapLength ; i += 2)
846 : {
847 0 : pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
848 0 : pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
849 :
850 0 : f1 += fScale;
851 0 : f2 -= fScale;
852 : }
853 0 : }
854 :
855 :
856 : // Overlaps samples in 'midBuffer' with the samples in 'input'.
857 0 : void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
858 : {
859 : int i;
860 : float fScale;
861 : float f1;
862 : float f2;
863 :
864 0 : fScale = 1.0f / (float)overlapLength;
865 :
866 0 : f1 = 0;
867 0 : f2 = 1.0f;
868 :
869 0 : i=0;
870 0 : for (int i2 = 0; i2 < overlapLength; i2 ++)
871 : {
872 : // note: Could optimize this slightly by taking into account that always channels > 2
873 0 : for (int c = 0; c < channels; c ++)
874 : {
875 0 : pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
876 0 : i++;
877 : }
878 0 : f1 += fScale;
879 0 : f2 -= fScale;
880 : }
881 0 : }
882 :
883 :
884 : /// Calculates overlapInMsec period length in samples.
885 0 : void TDStretch::calculateOverlapLength(int overlapInMsec)
886 : {
887 : int newOvl;
888 :
889 0 : assert(overlapInMsec >= 0);
890 0 : newOvl = (sampleRate * overlapInMsec) / 1000;
891 0 : if (newOvl < 16) newOvl = 16;
892 :
893 : // must be divisible by 8
894 0 : newOvl -= newOvl % 8;
895 :
896 0 : acceptNewOverlapLength(newOvl);
897 0 : }
898 :
899 :
900 : /// Calculate cross-correlation
901 0 : double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) const
902 : {
903 : double corr;
904 : double norm;
905 : int i;
906 :
907 0 : corr = norm = 0;
908 : // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
909 : // For mono it's same routine yet unrollsd by factor of 4.
910 0 : for (i = 0; i < channels * overlapLength; i += 4)
911 : {
912 0 : corr += mixingPos[i] * compare[i] +
913 0 : mixingPos[i + 1] * compare[i + 1];
914 :
915 0 : norm += mixingPos[i] * mixingPos[i] +
916 0 : mixingPos[i + 1] * mixingPos[i + 1];
917 :
918 : // unroll the loop for better CPU efficiency:
919 0 : corr += mixingPos[i + 2] * compare[i + 2] +
920 0 : mixingPos[i + 3] * compare[i + 3];
921 :
922 0 : norm += mixingPos[i + 2] * mixingPos[i + 2] +
923 0 : mixingPos[i + 3] * mixingPos[i + 3];
924 : }
925 :
926 0 : anorm = norm;
927 0 : return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
928 : }
929 :
930 :
931 : /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
932 0 : double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const
933 : {
934 : double corr;
935 : int i;
936 :
937 0 : corr = 0;
938 :
939 : // cancel first normalizer tap from previous round
940 0 : for (i = 1; i <= channels; i ++)
941 : {
942 0 : norm -= mixingPos[-i] * mixingPos[-i];
943 : }
944 :
945 : // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
946 : // For mono it's same routine yet unrollsd by factor of 4.
947 0 : for (i = 0; i < channels * overlapLength; i += 4)
948 : {
949 0 : corr += mixingPos[i] * compare[i] +
950 0 : mixingPos[i + 1] * compare[i + 1] +
951 0 : mixingPos[i + 2] * compare[i + 2] +
952 0 : mixingPos[i + 3] * compare[i + 3];
953 : }
954 :
955 : // update normalizer with last samples of this round
956 0 : for (int j = 0; j < channels; j ++)
957 : {
958 0 : i --;
959 0 : norm += mixingPos[i] * mixingPos[i];
960 : }
961 :
962 0 : return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
963 : }
964 :
965 :
966 : #endif // SOUNDTOUCH_FLOAT_SAMPLES
|