CepstralTransform.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2004 MUSIC TECHNOLOGY GROUP (MTG)
00003  *                         UNIVERSITAT POMPEU FABRA
00004  *
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #include "CepstralTransform.hxx"
00023 #include "OSDefines.hxx"
00024 #include "Assert.hxx"
00025 #include "ProcessingFactory.hxx"
00026 
00027 namespace CLAM
00028 {
00029 namespace Hidden 
00030 {
00031         static const char* metadata[] = {
00032                 "key", "CepstralTransform",
00033                 "category", "Analysis",
00034                 "description", "CepstralTransform",
00035                 0
00036         };
00037         static FactoryRegistrator<ProcessingFactory, CepstralTransform> reg = metadata;
00038 }
00039 
00040 void CepstralTransformConfig::DefaultInit()
00041 {
00042         AddAll();
00043         UpdateData();
00044         SetNumMelCoefficients(20);
00045         SetNumCepstrumCoefficients(20);
00046         SetUseBase10(false);
00047 }
00048 
00049 CepstralTransform::CepstralTransform(  )
00050         : mIn("Mel Spectrum", this)
00051         , mOut("Mel Cepstrum", this)
00052 {
00053         Configure(CepstralTransformConfig());
00054 }
00055 
00056 CepstralTransform::CepstralTransform( const CepstralTransformConfig& cfg )
00057         : mIn("Mel Spectrum", this)
00058         , mOut("Mel Cepstrum", this)
00059 {
00060         Configure( cfg );
00061 }
00062 
00063 bool CepstralTransform::ConcreteConfigure( const ProcessingConfig& cfg )
00064 {
00065         CopyAsConcreteConfig( mConfig, cfg );
00066 
00067         mLogBuffer.Resize(mConfig.GetNumMelCoefficients());
00068         mLogBuffer.SetSize(mConfig.GetNumMelCoefficients());
00069 
00070         return true;
00071 }
00072 
00073 bool CepstralTransform::Do()
00074 {
00075         const MelSpectrum & spectrum = mIn.GetData();
00076         MelCepstrum & cepstrum = mOut.GetData();
00077         bool ok = Do(spectrum, cepstrum);
00078         mIn.Consume();
00079         mOut.Produce();
00080         return ok;
00081 }
00082 
00083 bool CepstralTransform::ConcreteStart()
00084 {
00085         return true;
00086 }
00087 
00088 bool CepstralTransform::Do( const MelSpectrum& melSpec, MelCepstrum& melCeps )
00089 {
00090         if ( !AbleToExecute() ) return false; // object was disabled
00091 
00092         const DataArray& melCoeffs = melSpec.GetCoefficients();
00093         DataArray&  cepstrumCoeffs = melCeps.GetCoefficients();
00094         const TSize numMelCoefficients = melCoeffs.Size();
00095         const TSize numCepstrumCoeffs  = mConfig.GetNumCepstrumCoefficients();
00096 
00097         if ( cepstrumCoeffs.Size() < numCepstrumCoeffs )
00098         {
00099                 cepstrumCoeffs.Resize(numCepstrumCoeffs);
00100                 cepstrumCoeffs.SetSize(numCepstrumCoeffs);
00101         }
00102 
00103         melCeps.SetCenterTime(melSpec.GetCenterTime());
00104         melCeps.SetLowCutoff(melSpec.GetLowCutoff());
00105         melCeps.SetHighCutoff(melSpec.GetHighCutoff());
00106 
00107         /* Take the logarithm of the Mel spectrum.  NOTE: What is the
00108                  significance of log vs. log10? */
00109         const TData melFloor  = 1.0;
00110         const bool  useBase10 = mConfig.GetUseBase10();
00111 
00112         if ( mLogBuffer.Size()!=numMelCoefficients)
00113         {
00114                 // TODO: This has realtime penalty
00115                 mLogBuffer.Resize(numMelCoefficients);
00116                 mLogBuffer.SetSize(numMelCoefficients);
00117         }
00118         for (TIndex i=0; i<numMelCoefficients; i++) {
00119                 if (melCoeffs[i] < melFloor)
00120                         mLogBuffer[i] = useBase10 ? log10(melFloor) : log(melFloor);
00121                 else
00122                         mLogBuffer[i] = useBase10 ? log10(melCoeffs[i]) : log(melCoeffs[i]);
00123         }
00124 
00125         /* Take the DCT of the logarithm. */
00126         const TData piord = PI / (TData)numMelCoefficients;
00127         const TData scale = sqrt(2.0/(TData)numMelCoefficients);
00128 
00129         // Compute N-1 last coefficients.
00130         TData freq = 0;
00131         for (TIndex i=1; i<numCepstrumCoeffs; i++)
00132         {
00133                 freq = i*piord;
00134                 cepstrumCoeffs[i] = 0.0;
00135                 for (TIndex j=0; j<numMelCoefficients; j++)
00136                         cepstrumCoeffs[i] += mLogBuffer[j] * cos(freq*(j+1 - 0.5));
00137                 cepstrumCoeffs[i] *= scale;
00138         }
00139 
00140         // Compute first coefficient.
00141         for (TIndex j=0; j<numMelCoefficients; j++)
00142                 cepstrumCoeffs[0] += mLogBuffer[j];
00143         cepstrumCoeffs[0] *= scale;
00144 
00145 
00146         /* NOTE: How about implementing cepstral liftering from HTK? Probably not. */
00147 
00148         return true;
00149 }
00150 
00151 }
00152 
Generated by  doxygen 1.6.3