00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <cmath>
00025 #include "AudioDescriptors.hxx"
00026 #include "Audio.hxx"
00027 #include "OSDefines.hxx"
00028
00029 namespace CLAM {
00030
00031
00032 const TData AudioDescriptors::mEpsilon = 1e-5;
00033
00034 AudioDescriptors::AudioDescriptors(Audio* pAudio): DescriptorAbs(eNumAttr)
00035 {
00036 MandatoryInit();
00037 mpAudio=pAudio;
00038 }
00039
00040 AudioDescriptors::AudioDescriptors(TData initVal):DescriptorAbs(eNumAttr)
00041 {
00042 MandatoryInit();
00043 AddAll();
00044 UpdateData();
00045 SetMean(initVal);
00046 SetTemporalCentroid(initVal);
00047 SetEnergy(initVal);
00048 SetVariance(initVal);
00049 SetZeroCrossingRate(initVal);
00050 SetRiseTime(initVal);
00051 SetLogAttackTime(initVal);
00052 SetDecrease(initVal);
00053 }
00054
00055 void AudioDescriptors::DefaultInit() {
00056 mpAudio=0;
00057 }
00058
00059
00060 void AudioDescriptors::CopyInit(const AudioDescriptors & copied) {
00061 mpAudio=copied.mpAudio;
00062 }
00063
00064 const Audio* AudioDescriptors::GetpAudio() const {
00065 return mpAudio;
00066 }
00067
00068
00069
00070 void AudioDescriptors::SetpAudio(Audio* pAudio) {
00071 mpAudio=pAudio;
00072
00073 InitStats(&mpAudio->GetBuffer());
00074 mIsAttackTimeComputed=false;
00075 }
00076
00077 void AudioDescriptors::ConcreteCompute()
00078 {
00079 if (HasMean())
00080 SetMean(mpStats->GetMean());
00081 if (HasTemporalCentroid())
00082 SetTemporalCentroid(mpStats->GetCentroid()/mpAudio->GetSampleRate());
00083 if (HasEnergy())
00084 SetEnergy(mpStats->GetEnergy());
00085 if(HasVariance())
00086 SetVariance(mpStats->GetVariance());
00087 if(HasZeroCrossingRate())
00088 SetZeroCrossingRate(ComputeZeroCrossingRate());
00089 if(HasRiseTime())
00090 SetRiseTime(ComputeAttackTime());
00091 if(HasLogAttackTime())
00092 SetLogAttackTime(ComputeLogAttackTime());
00093 if(HasDecrease())
00094 SetDecrease(ComputeDecrease());
00095 }
00096
00097 TData AudioDescriptors::ComputeZeroCrossingRate()
00098 {
00099 DataArray& data = mpAudio->GetBuffer();
00100
00101 int signChangeCount = 0;
00102 const TSize size = data.Size();
00103 bool wasPositive = data[0] > 0.0;
00104
00105 for (int i=1; i<size; i++)
00106 {
00107 const bool isPositive = (data[i] > 0.0);
00108 if (wasPositive == isPositive) continue;
00109
00110 signChangeCount++;
00111 wasPositive = isPositive;
00112 }
00113
00114 return ((TData)signChangeCount)/size;
00115 }
00116
00117 TData AudioDescriptors::ComputeAttackTime()
00118 {
00119 if(mIsAttackTimeComputed) return mComputedAttackTime;
00120
00121 const DataArray& data = mpAudio->GetBuffer();
00122 const TSize dataSize = mpAudio->GetSize();
00123
00124 DataArray energyEnv;
00125 energyEnv.Resize(dataSize);
00126 energyEnv.SetSize(dataSize);
00127
00128
00129 const TData omega_c = 2*PI*20/mpAudio->GetSampleRate();
00130 const TData alpha = (1-sin(omega_c)) / cos(omega_c);
00131
00132 const TData b0 = (1-alpha)/2;
00133 const TData a1 = -alpha;
00134
00135
00136 energyEnv[0] = b0*CLAM::Abs(data[0]);
00137 TData maxVal = energyEnv[0];
00138
00139 for (TIndex i=1; i<dataSize; i++) {
00140 energyEnv[i] = b0*(CLAM::Abs(data[i]) + CLAM::Abs(data[i-1])) - a1*energyEnv[i-1];
00141 if (energyEnv[i] > maxVal) maxVal = energyEnv[i];
00142 }
00143
00144
00145 const TData startThreshold = 0.02*maxVal;
00146 const TData stopThreshold = 0.80*maxVal;
00147
00148 TIndex startIdx;
00149 for (startIdx=0; startIdx<dataSize; startIdx++) {
00150 if (energyEnv[startIdx] > startThreshold) break;
00151 }
00152
00153 TIndex stopIdx;
00154 for (stopIdx=startIdx; stopIdx<dataSize; stopIdx++) {
00155 if (energyEnv[stopIdx] > stopThreshold) break;
00156 }
00157
00158 mComputedAttackTime=(stopIdx - startIdx) / mpAudio->GetSampleRate();
00159 mIsAttackTimeComputed=true;
00160 return mComputedAttackTime;
00161 }
00162
00163
00164 TData AudioDescriptors::ComputeLogAttackTime()
00165 {
00166 ComputeAttackTime();
00167 if (mComputedAttackTime==0)
00168 return log10(mEpsilon);
00169 return log10(mComputedAttackTime);
00170 }
00171
00172
00173 TData AudioDescriptors::ComputeDecrease()
00174 {
00175 const DataArray& data = mpAudio->GetBuffer();
00176 const TSize dataSize = mpAudio->GetSize();
00177
00178
00179 const double omega_c = 2*PI*20/mpAudio->GetSampleRate();
00180 const double alpha = (1-sin(omega_c)) / cos(omega_c);
00181
00182 const double b0 = (1-alpha)/2;
00183 const double a1 = -alpha;
00184
00185
00186 double y = b0*CLAM::Abs(data[0]);
00187 TData correctedY = y<mEpsilon ? mEpsilon : y;
00188 double logEnv = log10(correctedY);
00189
00190 TData maxVal = logEnv;
00191 TSize maxIdx = 0;
00192 double sumXX = 0;
00193 double sumY = 0;
00194 double sumXY = 0;
00195
00196 for (TIndex i=1; i<dataSize; i++)
00197 {
00198 y = b0*(CLAM::Abs(data[i-1]) + CLAM::Abs(data[i])) - a1*y;
00199 correctedY = y<mEpsilon ? mEpsilon : y;
00200 const double logEnv = log10(correctedY);
00201
00202 if (logEnv > maxVal)
00203 {
00204 maxVal = logEnv;
00205 maxIdx = i;
00206 sumXX = 0;
00207 sumY = 0;
00208 sumXY = 0;
00209 }
00210 sumY += logEnv;
00211 sumXY += i*logEnv;
00212 sumXX += i*i;
00213 }
00214
00215
00216 const long N = dataSize - maxIdx;
00217 TData sumX = N*(N + 2*maxIdx - 1)/2;
00218
00219 TData num = N * sumXY - sumX * sumY;
00220 TData den = N * sumXX - sumX * sumX;
00221
00222 return (num / den) * mpAudio->GetSampleRate();
00223 }
00224
00225
00226 AudioDescriptors operator * (const AudioDescriptors& a,TData mult)
00227 {
00228
00229 AudioDescriptors tmpD(a);
00230
00231 if (a.HasMean())
00232 {
00233 tmpD.SetMean(a.GetMean()*mult);
00234 }
00235 if (a.HasTemporalCentroid())
00236 {
00237 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*mult);
00238 }
00239 if (a.HasEnergy())
00240 {
00241 tmpD.SetEnergy(a.GetEnergy()*mult);
00242 }
00243 if(a.HasVariance())
00244 {
00245 tmpD.SetVariance(a.GetVariance()*mult);
00246 }
00247 if(a.HasZeroCrossingRate())
00248 {
00249 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*mult);
00250 }
00251 if(a.HasRiseTime())
00252 {
00253 tmpD.SetRiseTime(a.GetRiseTime()*mult);
00254 }
00255 if(a.HasLogAttackTime())
00256 {
00257 tmpD.SetLogAttackTime(a.GetLogAttackTime()*mult);
00258 }
00259 if(a.HasDecrease())
00260 {
00261 tmpD.SetDecrease(a.GetDecrease()*mult);
00262 }
00263 return tmpD;
00264 }
00265
00266 AudioDescriptors operator * (TData mult, const AudioDescriptors& a)
00267 {
00268 return a*mult;
00269 }
00270
00271 AudioDescriptors operator * (const AudioDescriptors& a,const AudioDescriptors& b)
00272 {
00273 AudioDescriptors tmpD;
00274
00275 if (a.HasMean() && b.HasMean() )
00276 {
00277 tmpD.AddMean();
00278 tmpD.UpdateData();
00279 tmpD.SetMean(a.GetMean()*b.GetMean() );
00280 }
00281 if (a.HasTemporalCentroid() && b.HasTemporalCentroid() )
00282 {
00283 tmpD.AddTemporalCentroid();
00284 tmpD.UpdateData();
00285 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*b.GetTemporalCentroid() );
00286 }
00287 if (a.HasEnergy() && b.HasEnergy() )
00288 {
00289 tmpD.AddEnergy();
00290 tmpD.UpdateData();
00291 tmpD.SetEnergy(a.GetEnergy()*b.GetEnergy() );
00292 }
00293 if(a.HasVariance() && b.HasVariance() )
00294 {
00295 tmpD.AddVariance();
00296 tmpD.UpdateData();
00297 tmpD.SetVariance(a.GetVariance()*b.GetVariance() );
00298 }
00299 if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() )
00300 {
00301 tmpD.AddZeroCrossingRate();
00302 tmpD.UpdateData();
00303 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*b.GetZeroCrossingRate() );
00304 }
00305 if(a.HasRiseTime() && b.HasRiseTime() )
00306 {
00307 tmpD.AddRiseTime();
00308 tmpD.UpdateData();
00309 tmpD.SetRiseTime(a.GetRiseTime()*b.GetRiseTime() );
00310 }
00311 if(a.HasLogAttackTime() && b.HasLogAttackTime() )
00312 {
00313 tmpD.AddLogAttackTime();
00314 tmpD.UpdateData();
00315 tmpD.SetLogAttackTime(a.GetLogAttackTime()*b.GetLogAttackTime() );
00316 }
00317 if(a.HasDecrease() && b.HasDecrease() )
00318 {
00319 tmpD.AddDecrease();
00320 tmpD.UpdateData();
00321 tmpD.SetDecrease(a.GetDecrease()*b.GetDecrease() );
00322 }
00323 return tmpD;
00324 }
00325
00326 AudioDescriptors operator + (const AudioDescriptors& a,const AudioDescriptors& b)
00327 {
00328 AudioDescriptors tmpD;
00329
00330 if (a.HasMean() && b.HasMean() )
00331 {
00332 tmpD.AddMean();
00333 tmpD.UpdateData();
00334 tmpD.SetMean(a.GetMean()+b.GetMean() );
00335 }
00336 if (a.HasTemporalCentroid() && b.HasTemporalCentroid() )
00337 {
00338 tmpD.AddTemporalCentroid();
00339 tmpD.UpdateData();
00340 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()+b.GetTemporalCentroid() );
00341 }
00342 if (a.HasEnergy() && b.HasEnergy() )
00343 {
00344 tmpD.AddEnergy();
00345 tmpD.UpdateData();
00346 tmpD.SetEnergy(a.GetEnergy()+b.GetEnergy() );
00347 }
00348 if(a.HasVariance() && b.HasVariance() )
00349 {
00350 tmpD.AddVariance();
00351 tmpD.UpdateData();
00352 tmpD.SetVariance(a.GetVariance()+b.GetVariance() );
00353 }
00354 if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() )
00355 {
00356 tmpD.AddZeroCrossingRate();
00357 tmpD.UpdateData();
00358 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()+b.GetZeroCrossingRate() );
00359 }
00360 if(a.HasRiseTime() && b.HasRiseTime() )
00361 {
00362 tmpD.AddRiseTime();
00363 tmpD.UpdateData();
00364 tmpD.SetRiseTime(a.GetRiseTime()+b.GetRiseTime() );
00365 }
00366 if(a.HasLogAttackTime() && b.HasLogAttackTime() )
00367 {
00368 tmpD.AddLogAttackTime();
00369 tmpD.UpdateData();
00370 tmpD.SetLogAttackTime(a.GetLogAttackTime()+b.GetLogAttackTime() );
00371 }
00372 if(a.HasDecrease() && b.HasDecrease() )
00373 {
00374 tmpD.AddDecrease();
00375 tmpD.UpdateData();
00376 tmpD.SetDecrease(a.GetDecrease()+b.GetDecrease() );
00377 }
00378 return tmpD;
00379
00380 }
00381
00382 AudioDescriptors operator - (const AudioDescriptors& a,const AudioDescriptors& b)
00383 {
00384 return a+((-1)*b);
00385 }
00386
00387 AudioDescriptors operator / (const AudioDescriptors& a,TData div)
00388 {
00389 return a*(1/div);
00390 }
00391
00392 }
00393