CLAM: AudioDescriptors.cxx Source File

00001 /*
00002  * Copyright (c) 2001-2004 MUSIC TECHNOLOGY GROUP (MTG)
00003  *                         UNIVERSITAT POMPEU FABRA
00004  *
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 
00023 
00024 #include <cmath>
00025 #include "AudioDescriptors.hxx"
00026 #include "Audio.hxx"
00027 #include "OSDefines.hxx"
00028 
00029 namespace CLAM {
00030 
00031 
00032 const TData AudioDescriptors::mEpsilon = 1e-5;
00033 
00034 AudioDescriptors::AudioDescriptors(Audio* pAudio): DescriptorAbs(eNumAttr)
00035 {
00036         MandatoryInit();
00037         mpAudio=pAudio;
00038 }
00039 
00040 AudioDescriptors::AudioDescriptors(TData initVal):DescriptorAbs(eNumAttr)
00041 {
00042         MandatoryInit();
00043         AddAll();
00044         UpdateData();
00045         SetMean(initVal);
00046         SetTemporalCentroid(initVal);
00047         SetEnergy(initVal);
00048         SetVariance(initVal);
00049         SetZeroCrossingRate(initVal);
00050         SetRiseTime(initVal);
00051         SetLogAttackTime(initVal);
00052         SetDecrease(initVal);
00053 }
00054 
00055 void AudioDescriptors::DefaultInit() {
00056         mpAudio=0;
00057 }
00058 
00059 
00060 void AudioDescriptors::CopyInit(const AudioDescriptors & copied) {
00061         mpAudio=copied.mpAudio;
00062 }
00063 
00064 const Audio* AudioDescriptors::GetpAudio() const {
00065         return mpAudio;
00066 }
00067 
00068 
00069 
00070 void AudioDescriptors::SetpAudio(Audio* pAudio) {
00071         mpAudio=pAudio;
00072         //TODO: it may give problems because pointer passed
00073         InitStats(&mpAudio->GetBuffer());
00074         mIsAttackTimeComputed=false;
00075 }
00076 
00077 void AudioDescriptors::ConcreteCompute()
00078 {
00079         if (HasMean())
00080                 SetMean(mpStats->GetMean());
00081         if (HasTemporalCentroid())
00082                 SetTemporalCentroid(mpStats->GetCentroid()/mpAudio->GetSampleRate());
00083         if (HasEnergy())
00084                 SetEnergy(mpStats->GetEnergy());
00085         if(HasVariance())
00086                 SetVariance(mpStats->GetVariance());
00087         if(HasZeroCrossingRate())
00088                 SetZeroCrossingRate(ComputeZeroCrossingRate());
00089         if(HasRiseTime())
00090                 SetRiseTime(ComputeAttackTime());
00091         if(HasLogAttackTime())
00092                 SetLogAttackTime(ComputeLogAttackTime());
00093         if(HasDecrease())
00094                 SetDecrease(ComputeDecrease());
00095 }
00096 
00097 TData AudioDescriptors::ComputeZeroCrossingRate()
00098 {
00099         DataArray& data = mpAudio->GetBuffer();
00100 
00101         int signChangeCount = 0;
00102         const TSize size = data.Size();
00103         bool wasPositive = data[0] > 0.0;
00104 
00105         for (int i=1; i<size; i++)
00106         {
00107                 const bool isPositive = (data[i] > 0.0);
00108                 if (wasPositive ==  isPositive) continue;
00109 
00110                 signChangeCount++;
00111                 wasPositive = isPositive;
00112         }
00113         // Average
00114         return ((TData)signChangeCount)/size;
00115 }
00116 
00117 TData AudioDescriptors::ComputeAttackTime()
00118 {
00119         if(mIsAttackTimeComputed) return mComputedAttackTime;
00120 
00121         const DataArray& data     = mpAudio->GetBuffer();
00122         const TSize      dataSize = mpAudio->GetSize();
00123 
00124         DataArray energyEnv;
00125         energyEnv.Resize(dataSize);
00126         energyEnv.SetSize(dataSize);
00127 
00128         // Compute 20Hz lowpass filter coefficients
00129         const TData omega_c = 2*PI*20/mpAudio->GetSampleRate();
00130         const TData alpha   = (1-sin(omega_c)) / cos(omega_c);
00131 
00132         const TData b0 = (1-alpha)/2;
00133         const TData a1 = -alpha;
00134 
00135         // Find maximum value
00136         energyEnv[0] = b0*CLAM::Abs(data[0]);
00137         TData maxVal = energyEnv[0];
00138 
00139         for (TIndex i=1; i<dataSize; i++) {
00140                 energyEnv[i] = b0*(CLAM::Abs(data[i]) + CLAM::Abs(data[i-1])) - a1*energyEnv[i-1];
00141                 if (energyEnv[i] > maxVal) maxVal = energyEnv[i];
00142         }
00143 
00144         // Locate start and stop of attack
00145         const TData startThreshold = 0.02*maxVal;
00146         const TData stopThreshold  = 0.80*maxVal;
00147 
00148         TIndex startIdx;
00149         for (startIdx=0; startIdx<dataSize; startIdx++) {
00150                 if (energyEnv[startIdx] > startThreshold) break;
00151         }
00152 
00153         TIndex stopIdx;
00154         for (stopIdx=startIdx; stopIdx<dataSize; stopIdx++) {
00155                 if (energyEnv[stopIdx] > stopThreshold) break;
00156         }
00157 
00158         mComputedAttackTime=(stopIdx - startIdx) / mpAudio->GetSampleRate();
00159         mIsAttackTimeComputed=true;
00160         return mComputedAttackTime;
00161 }
00162 
00163 
00164 TData AudioDescriptors::ComputeLogAttackTime()
00165 {
00166         ComputeAttackTime();
00167         if (mComputedAttackTime==0)
00168                 return log10(mEpsilon);
00169         return log10(mComputedAttackTime);
00170 }
00171 
00172 
00173 TData AudioDescriptors::ComputeDecrease()
00174 {
00175         const DataArray& data = mpAudio->GetBuffer();
00176         const TSize dataSize  = mpAudio->GetSize();
00177 
00178         // Compute 20Hz lowpass filter coefficients
00179         const double omega_c = 2*PI*20/mpAudio->GetSampleRate();
00180         const double alpha   = (1-sin(omega_c)) / cos(omega_c);
00181 
00182         const double b0 = (1-alpha)/2;
00183         const double a1 = -alpha;
00184 
00185         // Find maximum value
00186         double y = b0*CLAM::Abs(data[0]);
00187         TData correctedY = y<mEpsilon ? mEpsilon : y;
00188         double logEnv = log10(correctedY);
00189 
00190         TData maxVal = logEnv;
00191         TSize maxIdx = 0;
00192         double sumXX = 0;
00193         double sumY = 0;
00194         double sumXY = 0;
00195 
00196         for (TIndex i=1; i<dataSize; i++)
00197         {
00198                 y = b0*(CLAM::Abs(data[i-1]) + CLAM::Abs(data[i])) - a1*y;
00199                 correctedY = y<mEpsilon ? mEpsilon : y;
00200                 const double logEnv = log10(correctedY);
00201 
00202                 if (logEnv > maxVal)
00203                 {
00204                         maxVal = logEnv;
00205                         maxIdx = i;
00206                         sumXX = 0;
00207                         sumY = 0;
00208                         sumXY = 0;
00209                 }
00210                 sumY += logEnv;
00211                 sumXY += i*logEnv;
00212                 sumXX += i*i;
00213         }
00214 
00215         // Compute means and gradient of decay part
00216         const long N = dataSize - maxIdx;
00217         TData sumX = N*(N + 2*maxIdx - 1)/2;
00218 
00219         TData num = N * sumXY - sumX * sumY;
00220         TData den = N * sumXX - sumX * sumX;
00221 
00222         return (num / den) * mpAudio->GetSampleRate();
00223 }
00224 
00225 
00226 AudioDescriptors operator * (const AudioDescriptors& a,TData mult)
00227 {
00228         
00229         AudioDescriptors tmpD(a);
00230 
00231         if (a.HasMean())
00232         {
00233                 tmpD.SetMean(a.GetMean()*mult);
00234         }
00235         if (a.HasTemporalCentroid())
00236         {
00237                 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*mult);
00238         }
00239         if (a.HasEnergy())
00240         {
00241                 tmpD.SetEnergy(a.GetEnergy()*mult);
00242         }
00243         if(a.HasVariance())
00244         {
00245                 tmpD.SetVariance(a.GetVariance()*mult);
00246         }
00247         if(a.HasZeroCrossingRate())
00248         {
00249                 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*mult);
00250         }
00251         if(a.HasRiseTime())
00252         {
00253                 tmpD.SetRiseTime(a.GetRiseTime()*mult);
00254         }
00255         if(a.HasLogAttackTime())
00256         {
00257                 tmpD.SetLogAttackTime(a.GetLogAttackTime()*mult);
00258         }
00259         if(a.HasDecrease())
00260         {
00261                 tmpD.SetDecrease(a.GetDecrease()*mult);
00262         }
00263         return tmpD;
00264 }
00265 
00266 AudioDescriptors operator * (TData mult, const AudioDescriptors& a)
00267 {
00268         return a*mult;
00269 }
00270 
00271 AudioDescriptors operator * (const AudioDescriptors& a,const AudioDescriptors& b)
00272 {
00273         AudioDescriptors tmpD;
00274 
00275         if (a.HasMean() && b.HasMean() )
00276         {
00277                 tmpD.AddMean();
00278                 tmpD.UpdateData();
00279                 tmpD.SetMean(a.GetMean()*b.GetMean() );
00280         }
00281         if (a.HasTemporalCentroid() && b.HasTemporalCentroid() )
00282         {
00283                 tmpD.AddTemporalCentroid();
00284                 tmpD.UpdateData();
00285                 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*b.GetTemporalCentroid() );
00286         }
00287         if (a.HasEnergy() && b.HasEnergy() )
00288         {
00289                 tmpD.AddEnergy();
00290                 tmpD.UpdateData();
00291                 tmpD.SetEnergy(a.GetEnergy()*b.GetEnergy() );
00292         }
00293         if(a.HasVariance() && b.HasVariance() )
00294         {
00295                 tmpD.AddVariance();
00296                 tmpD.UpdateData();
00297                 tmpD.SetVariance(a.GetVariance()*b.GetVariance() );
00298         }
00299         if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() )
00300         {
00301                 tmpD.AddZeroCrossingRate();
00302                 tmpD.UpdateData();
00303                 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*b.GetZeroCrossingRate() );
00304         }
00305         if(a.HasRiseTime() && b.HasRiseTime() )
00306         {
00307                 tmpD.AddRiseTime();
00308                 tmpD.UpdateData();
00309                 tmpD.SetRiseTime(a.GetRiseTime()*b.GetRiseTime() );
00310         }
00311         if(a.HasLogAttackTime() && b.HasLogAttackTime() )
00312         {
00313                 tmpD.AddLogAttackTime();
00314                 tmpD.UpdateData();
00315                 tmpD.SetLogAttackTime(a.GetLogAttackTime()*b.GetLogAttackTime() );
00316         }
00317         if(a.HasDecrease() && b.HasDecrease() )
00318         {
00319                 tmpD.AddDecrease();
00320                 tmpD.UpdateData();
00321                 tmpD.SetDecrease(a.GetDecrease()*b.GetDecrease() );
00322         }
00323         return tmpD;
00324 }
00325 
00326 AudioDescriptors operator + (const AudioDescriptors& a,const AudioDescriptors& b)
00327 {
00328         AudioDescriptors tmpD;
00329 
00330         if (a.HasMean() && b.HasMean() )
00331         {
00332                 tmpD.AddMean();
00333                 tmpD.UpdateData();
00334                 tmpD.SetMean(a.GetMean()+b.GetMean() );
00335         }
00336         if (a.HasTemporalCentroid() && b.HasTemporalCentroid() )
00337         {
00338                 tmpD.AddTemporalCentroid();
00339                 tmpD.UpdateData();
00340                 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()+b.GetTemporalCentroid() );
00341         }
00342         if (a.HasEnergy() && b.HasEnergy() )
00343         {
00344                 tmpD.AddEnergy();
00345                 tmpD.UpdateData();
00346                 tmpD.SetEnergy(a.GetEnergy()+b.GetEnergy() );
00347         }
00348         if(a.HasVariance() && b.HasVariance() )
00349         {
00350                 tmpD.AddVariance();
00351                 tmpD.UpdateData();
00352                 tmpD.SetVariance(a.GetVariance()+b.GetVariance() );
00353         }
00354         if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() )
00355         {
00356                 tmpD.AddZeroCrossingRate();
00357                 tmpD.UpdateData();
00358                 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()+b.GetZeroCrossingRate() );
00359         }
00360         if(a.HasRiseTime() && b.HasRiseTime() )
00361         {
00362                 tmpD.AddRiseTime();
00363                 tmpD.UpdateData();
00364                 tmpD.SetRiseTime(a.GetRiseTime()+b.GetRiseTime() );
00365         }
00366         if(a.HasLogAttackTime() && b.HasLogAttackTime() )
00367         {
00368                 tmpD.AddLogAttackTime();
00369                 tmpD.UpdateData();
00370                 tmpD.SetLogAttackTime(a.GetLogAttackTime()+b.GetLogAttackTime() );
00371         }
00372         if(a.HasDecrease() && b.HasDecrease() )
00373         {
00374                 tmpD.AddDecrease();
00375                 tmpD.UpdateData();
00376                 tmpD.SetDecrease(a.GetDecrease()+b.GetDecrease() );
00377         }
00378         return tmpD;
00379 
00380 }
00381 
00382 AudioDescriptors operator - (const AudioDescriptors& a,const AudioDescriptors& b)
00383 {
00384         return a+((-1)*b);
00385 }
00386 
00387 AudioDescriptors operator / (const AudioDescriptors& a,TData div) 
00388 {
00389         return a*(1/div);
00390 }
00391 
00392 }
00393