AudioDescriptors.cxx
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <cmath>
00025 #include "AudioDescriptors.hxx"
00026 #include "Audio.hxx"
00027 #include "OSDefines.hxx"
00028
00029 namespace CLAM {
00030
00031
00032 const TData AudioDescriptors::mEpsilon = 1e-5;
00033
00034 AudioDescriptors::AudioDescriptors(Audio* pAudio): DescriptorAbs(eNumAttr)
00035 {
00036 MandatoryInit();
00037 mpAudio=pAudio;
00038 }
00039
00040 AudioDescriptors::AudioDescriptors(TData initVal):DescriptorAbs(eNumAttr)
00041 {
00042 MandatoryInit();
00043 AddAll();
00044 UpdateData();
00045 SetMean(initVal);
00046 SetTemporalCentroid(initVal);
00047 SetEnergy(initVal);
00048 SetVariance(initVal);
00049 SetZeroCrossingRate(initVal);
00050 SetRiseTime(initVal);
00051 SetLogAttackTime(initVal);
00052 SetDecrease(initVal);
00053 }
00054
00055 void AudioDescriptors::DefaultInit() {
00056 mpAudio=0;
00057 }
00058
00059
00060 void AudioDescriptors::CopyInit(const AudioDescriptors & copied) {
00061 mpAudio=copied.mpAudio;
00062 }
00063
00064 const Audio* AudioDescriptors::GetpAudio() const {
00065 return mpAudio;
00066 }
00067
00068
00069
00070 void AudioDescriptors::SetpAudio(Audio* pAudio) {
00071 mpAudio=pAudio;
00072
00073 InitStats(&mpAudio->GetBuffer());
00074 mIsAttackTimeComputed=false;
00075 }
00076
00077 void AudioDescriptors::ConcreteCompute()
00078 {
00079 if (HasMean())
00080 SetMean(mpStats->GetMean());
00081 if (HasTemporalCentroid())
00082 SetTemporalCentroid(mpStats->GetCentroid()/mpAudio->GetSampleRate());
00083 if (HasEnergy())
00084 SetEnergy(mpStats->GetEnergy());
00085 if(HasVariance())
00086 SetVariance(mpStats->GetVariance());
00087 if(HasZeroCrossingRate())
00088 SetZeroCrossingRate(ComputeZeroCrossingRate());
00089 if(HasRiseTime())
00090 SetRiseTime(ComputeAttackTime());
00091 if(HasLogAttackTime())
00092 SetLogAttackTime(ComputeLogAttackTime());
00093 if(HasDecrease())
00094 SetDecrease(ComputeDecrease());
00095 }
00096
00097 TData AudioDescriptors::ComputeZeroCrossingRate()
00098 {
00099 DataArray& data = mpAudio->GetBuffer();
00100
00101 int signChangeCount = 0;
00102 const TSize size = data.Size();
00103 bool wasPositive = data[0] > 0.0;
00104
00105 for (int i=1; i<size; i++)
00106 {
00107 const bool isPositive = (data[i] > 0.0);
00108 if (wasPositive == isPositive) continue;
00109
00110 signChangeCount++;
00111 wasPositive = isPositive;
00112 }
00113
00114 return ((TData)signChangeCount)/size;
00115 }
00116
00117 TData AudioDescriptors::ComputeAttackTime()
00118 {
00119 if(mIsAttackTimeComputed) return mComputedAttackTime;
00120
00121 const DataArray& data = mpAudio->GetBuffer();
00122 const TSize dataSize = mpAudio->GetSize();
00123
00124 DataArray energyEnv;
00125 energyEnv.Resize(dataSize);
00126 energyEnv.SetSize(dataSize);
00127
00128
00129 const TData omega_c = 2*PI*20/mpAudio->GetSampleRate();
00130 const TData alpha = (1-sin(omega_c)) / cos(omega_c);
00131
00132 const TData b0 = (1-alpha)/2;
00133 const TData a1 = -alpha;
00134
00135
00136 energyEnv[0] = b0*CLAM::Abs(data[0]);
00137 TData maxVal = energyEnv[0];
00138
00139 for (TIndex i=1; i<dataSize; i++) {
00140 energyEnv[i] = b0*(CLAM::Abs(data[i]) + CLAM::Abs(data[i-1])) - a1*energyEnv[i-1];
00141 if (energyEnv[i] > maxVal) maxVal = energyEnv[i];
00142 }
00143
00144
00145 const TData startThreshold = 0.02*maxVal;
00146 const TData stopThreshold = 0.80*maxVal;
00147
00148 TIndex startIdx;
00149 for (startIdx=0; startIdx<dataSize; startIdx++) {
00150 if (energyEnv[startIdx] > startThreshold) break;
00151 }
00152
00153 TIndex stopIdx;
00154 for (stopIdx=startIdx; stopIdx<dataSize; stopIdx++) {
00155 if (energyEnv[stopIdx] > stopThreshold) break;
00156 }
00157
00158 mComputedAttackTime=(stopIdx - startIdx) / mpAudio->GetSampleRate();
00159 mIsAttackTimeComputed=true;
00160 return mComputedAttackTime;
00161 }
00162
00163
00164 TData AudioDescriptors::ComputeLogAttackTime()
00165 {
00166 ComputeAttackTime();
00167 if (mComputedAttackTime==0)
00168 return log10(mEpsilon);
00169 return log10(mComputedAttackTime);
00170 }
00171
00172
00173 TData AudioDescriptors::ComputeDecrease()
00174 {
00175 const DataArray& data = mpAudio->GetBuffer();
00176 const TSize dataSize = mpAudio->GetSize();
00177
00178
00179 const double omega_c = 2*PI*20/mpAudio->GetSampleRate();
00180 const double alpha = (1-sin(omega_c)) / cos(omega_c);
00181
00182 const double b0 = (1-alpha)/2;
00183 const double a1 = -alpha;
00184
00185
00186 double y = b0*CLAM::Abs(data[0]);
00187 TData correctedY = y<mEpsilon ? mEpsilon : y;
00188 double logEnv = log10(correctedY);
00189
00190 TData maxVal = logEnv;
00191 TSize maxIdx = 0;
00192 double sumXX = 0;
00193 double sumY = 0;
00194 double sumXY = 0;
00195
00196 for (TIndex i=1; i<dataSize; i++)
00197 {
00198 y = b0*(CLAM::Abs(data[i-1]) + CLAM::Abs(data[i])) - a1*y;
00199 correctedY = y<mEpsilon ? mEpsilon : y;
00200 const double logEnv = log10(correctedY);
00201
00202 if (logEnv > maxVal)
00203 {
00204 maxVal = logEnv;
00205 maxIdx = i;
00206 sumXX = 0;
00207 sumY = 0;
00208 sumXY = 0;
00209 }
00210 sumY += logEnv;
00211 sumXY += i*logEnv;
00212 sumXX += i*i;
00213 }
00214
00215
00216 const long N = dataSize - maxIdx;
00217 TData sumX = N*(N + 2*maxIdx - 1)/2;
00218
00219 TData num = N * sumXY - sumX * sumY;
00220 TData den = N * sumXX - sumX * sumX;
00221
00222 return (num / den) * mpAudio->GetSampleRate();
00223 }
00224
00225
00226 AudioDescriptors operator * (const AudioDescriptors& a,TData mult)
00227 {
00228
00229 AudioDescriptors tmpD(a);
00230
00231 if (a.HasMean())
00232 {
00233 tmpD.SetMean(a.GetMean()*mult);
00234 }
00235 if (a.HasTemporalCentroid())
00236 {
00237 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*mult);
00238 }
00239 if (a.HasEnergy())
00240 {
00241 tmpD.SetEnergy(a.GetEnergy()*mult);
00242 }
00243 if(a.HasVariance())
00244 {
00245 tmpD.SetVariance(a.GetVariance()*mult);
00246 }
00247 if(a.HasZeroCrossingRate())
00248 {
00249 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*mult);
00250 }
00251 if(a.HasRiseTime())
00252 {
00253 tmpD.SetRiseTime(a.GetRiseTime()*mult);
00254 }
00255 if(a.HasLogAttackTime())
00256 {
00257 tmpD.SetLogAttackTime(a.GetLogAttackTime()*mult);
00258 }
00259 if(a.HasDecrease())
00260 {
00261 tmpD.SetDecrease(a.GetDecrease()*mult);
00262 }
00263 return tmpD;
00264 }
00265
00266 AudioDescriptors operator * (TData mult, const AudioDescriptors& a)
00267 {
00268 return a*mult;
00269 }
00270
00271 AudioDescriptors operator * (const AudioDescriptors& a,const AudioDescriptors& b)
00272 {
00273 AudioDescriptors tmpD;
00274
00275 if (a.HasMean() && b.HasMean() )
00276 {
00277 tmpD.AddMean();
00278 tmpD.UpdateData();
00279 tmpD.SetMean(a.GetMean()*b.GetMean() );
00280 }
00281 if (a.HasTemporalCentroid() && b.HasTemporalCentroid() )
00282 {
00283 tmpD.AddTemporalCentroid();
00284 tmpD.UpdateData();
00285 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*b.GetTemporalCentroid() );
00286 }
00287 if (a.HasEnergy() && b.HasEnergy() )
00288 {
00289 tmpD.AddEnergy();
00290 tmpD.UpdateData();
00291 tmpD.SetEnergy(a.GetEnergy()*b.GetEnergy() );
00292 }
00293 if(a.HasVariance() && b.HasVariance() )
00294 {
00295 tmpD.AddVariance();
00296 tmpD.UpdateData();
00297 tmpD.SetVariance(a.GetVariance()*b.GetVariance() );
00298 }
00299 if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() )
00300 {
00301 tmpD.AddZeroCrossingRate();
00302 tmpD.UpdateData();
00303 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*b.GetZeroCrossingRate() );
00304 }
00305 if(a.HasRiseTime() && b.HasRiseTime() )
00306 {
00307 tmpD.AddRiseTime();
00308 tmpD.UpdateData();
00309 tmpD.SetRiseTime(a.GetRiseTime()*b.GetRiseTime() );
00310 }
00311 if(a.HasLogAttackTime() && b.HasLogAttackTime() )
00312 {
00313 tmpD.AddLogAttackTime();
00314 tmpD.UpdateData();
00315 tmpD.SetLogAttackTime(a.GetLogAttackTime()*b.GetLogAttackTime() );
00316 }
00317 if(a.HasDecrease() && b.HasDecrease() )
00318 {
00319 tmpD.AddDecrease();
00320 tmpD.UpdateData();
00321 tmpD.SetDecrease(a.GetDecrease()*b.GetDecrease() );
00322 }
00323 return tmpD;
00324 }
00325
00326 AudioDescriptors operator + (const AudioDescriptors& a,const AudioDescriptors& b)
00327 {
00328 AudioDescriptors tmpD;
00329
00330 if (a.HasMean() && b.HasMean() )
00331 {
00332 tmpD.AddMean();
00333 tmpD.UpdateData();
00334 tmpD.SetMean(a.GetMean()+b.GetMean() );
00335 }
00336 if (a.HasTemporalCentroid() && b.HasTemporalCentroid() )
00337 {
00338 tmpD.AddTemporalCentroid();
00339 tmpD.UpdateData();
00340 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()+b.GetTemporalCentroid() );
00341 }
00342 if (a.HasEnergy() && b.HasEnergy() )
00343 {
00344 tmpD.AddEnergy();
00345 tmpD.UpdateData();
00346 tmpD.SetEnergy(a.GetEnergy()+b.GetEnergy() );
00347 }
00348 if(a.HasVariance() && b.HasVariance() )
00349 {
00350 tmpD.AddVariance();
00351 tmpD.UpdateData();
00352 tmpD.SetVariance(a.GetVariance()+b.GetVariance() );
00353 }
00354 if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() )
00355 {
00356 tmpD.AddZeroCrossingRate();
00357 tmpD.UpdateData();
00358 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()+b.GetZeroCrossingRate() );
00359 }
00360 if(a.HasRiseTime() && b.HasRiseTime() )
00361 {
00362 tmpD.AddRiseTime();
00363 tmpD.UpdateData();
00364 tmpD.SetRiseTime(a.GetRiseTime()+b.GetRiseTime() );
00365 }
00366 if(a.HasLogAttackTime() && b.HasLogAttackTime() )
00367 {
00368 tmpD.AddLogAttackTime();
00369 tmpD.UpdateData();
00370 tmpD.SetLogAttackTime(a.GetLogAttackTime()+b.GetLogAttackTime() );
00371 }
00372 if(a.HasDecrease() && b.HasDecrease() )
00373 {
00374 tmpD.AddDecrease();
00375 tmpD.UpdateData();
00376 tmpD.SetDecrease(a.GetDecrease()+b.GetDecrease() );
00377 }
00378 return tmpD;
00379
00380 }
00381
00382 AudioDescriptors operator - (const AudioDescriptors& a,const AudioDescriptors& b)
00383 {
00384 return a+((-1)*b);
00385 }
00386
00387 AudioDescriptors operator / (const AudioDescriptors& a,TData div)
00388 {
00389 return a*(1/div);
00390 }
00391
00392 }
00393