Normalization.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2001-2004 MUSIC TECHNOLOGY GROUP (MTG)
00003  *                         UNIVERSITAT POMPEU FABRA
00004  *
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 #include "DataTypes.hxx"
00022 #include "CLAM_Math.hxx"
00023 #include "Normalization.hxx"
00024 #include "Stats.hxx"
00025 
00026 
00027 
00028 namespace CLAM
00029 {
00030 
00031 
00032         void NormalizationConfig::DefaultInit()
00033         {
00034                 AddType();
00035                 AddFrameSize();
00036                 UpdateData();
00037                 SetType(1);
00038                 SetFrameSize(2205);//0.05s at 44.1k
00039 
00040         }
00041 
00042 
00043         Normalization::Normalization()
00044                 : mIsSilenceCtrl( "Silence", this )
00045         {
00046                 Configure(NormalizationConfig());
00047         }
00048 
00049         Normalization::Normalization(NormalizationConfig& c)
00050                 : mIsSilenceCtrl( "Silence", this )
00051         {
00052                 Configure(c);
00053         }
00054 
00055         Normalization::~Normalization() {}
00056 
00057 
00058         bool Normalization::ConcreteConfigure(const ProcessingConfig& c)
00059         {
00060                 CopyAsConcreteConfig(mConfig,c);
00061 
00062                 mType=mConfig.GetType();
00063                 mFrameSize=mConfig.GetFrameSize();
00064         
00065                 return true;
00066         }
00067 
00068         bool Normalization::Do(void) 
00069         {
00070                 return false;
00071         }
00072         
00073         bool Normalization::Do(Audio &in)
00074         {
00075                 TData scaleFactor = 0;
00076 
00077                 //Type #1: normalizes according to the max energy 
00078                 //Type #2: normalizes according to the average energy
00079                 //Type #3: normalizes according to the threshold under which lies percent% of
00080                 //the energy values that are not silence
00081 
00082                 if ( mType == 1 ) 
00083                         scaleFactor = ComputeScaleFactorFromMaxEnergy( in.GetBuffer() );
00084                 else if ( mType == 2 )
00085                         scaleFactor = ComputeScaleFactorFromAvgEnergy( in.GetBuffer() );
00086                 else if ( mType == 3 )
00087                         scaleFactor = ComputeScaleFactorFromDominantEnergy( in.GetBuffer() );
00088 
00089                 const TData invScaleFactor = 1.0 / scaleFactor;
00090                 DataArray& inBufferSamples = in.GetBuffer();
00091 
00092                 for (int n=0; n<in.GetSize(); n++)
00093                         inBufferSamples[n]*=invScaleFactor;
00094                 
00095                 return true;
00096         }
00097 
00098 
00099         bool Normalization::Do(const Audio& unnorm, Audio& norm)
00100         {
00101                 CLAM_ASSERT( unnorm.GetSize() == norm.GetSize(),
00102                              "Normalization::Do() : input and output audio sizes must match" );
00103 
00104                 TData scaleFactor = 0;
00105 
00106                 //Type #1: normalizes according to the max energy 
00107                 //Type #2: normalizes according to the average energy
00108                 //Type #3: normalizes according to the threshold under which lies percent% of
00109                 //the energy values that are not silence
00110 
00111                 if ( mType == 1 ) 
00112                         scaleFactor = ComputeScaleFactorFromMaxEnergy( unnorm.GetBuffer() );
00113                 else if ( mType == 2 )
00114                         scaleFactor = ComputeScaleFactorFromAvgEnergy( unnorm.GetBuffer() );
00115                 else if ( mType == 3 )
00116                         scaleFactor = ComputeScaleFactorFromDominantEnergy( unnorm.GetBuffer() );
00117 
00118                 const TData invScaleFactor = 1.0 / scaleFactor;
00119                 DataArray& outBufferSamples = norm.GetBuffer();
00120                 const DataArray& inBufferSamples = unnorm.GetBuffer();
00121 
00122                 for (int n=0; n<norm.GetSize(); n++)
00123                         outBufferSamples[n]=inBufferSamples[n]*invScaleFactor;
00124                 
00125                 return true;
00126         }
00127 
00128 
00129         TData Normalization::ComputeScaleFactorFromMaxEnergy( DataArray& inAudio )
00130         {
00131                 TIndex    p = 0;
00132                 const     TIndex end = inAudio.Size() - mFrameSize;
00133                 DataArray chunk;
00134                 TData     maxEnergy = 0.0;
00135                 const     TData quantizationThreshold = 0.3 * TData( mFrameSize ) / TData( 4410 );
00136 
00137                 do
00138                 {
00139                         chunk.SetPtr( inAudio.GetPtr()+p, mFrameSize );
00140                         /* unused: TSize size = chunk.GetSize(); */
00141                         DataArray moments(4);
00142                         moments.SetSize(4);
00143                         Stats myStats(&chunk);
00144                         myStats.GetMoments(moments, FifthOrder);
00145 
00146                         TData currentChunkEnergy = myStats.GetEnergy();
00147 
00148                         //remove silence
00149                         if ( currentChunkEnergy > quantizationThreshold ) //seems to be just above noise due to 8 bits quantization
00150                         {
00151                                 if(maxEnergy<currentChunkEnergy) maxEnergy=currentChunkEnergy;
00152                         }
00153 
00154                         p += mFrameSize;
00155                         
00156                 } while (p <= end );
00157 
00158                 // Enjoy the Silence...
00159                 if ( maxEnergy <= 1e-7 )
00160                 {
00161                         mIsSilenceCtrl.SendControl( true );
00162                         return 1.0;
00163                 }
00164 
00165                 mIsSilenceCtrl.SendControl(false);
00166                         
00167                 return CLAM_sqrt( maxEnergy / TData(mFrameSize ) );
00168                 
00169 
00170         }
00171         
00172         TData Normalization::ComputeScaleFactorFromAvgEnergy( DataArray& inAudio )
00173         {
00174 
00175                 TIndex    p = 0;
00176                 const     TIndex end = inAudio.Size() - mFrameSize;
00177                 DataArray chunk;
00178                 TData     avgEnergy = 0.0;
00179                 const     TData quantizationThreshold = 0.3 * TData( mFrameSize ) / TData( 4410 );
00180 
00181                 do
00182                 {
00183                         chunk.SetPtr( inAudio.GetPtr()+p, mFrameSize );
00184                         DataArray moments(4);
00185                         moments.SetSize(4);
00186                         Stats myStats(&chunk);
00187                         myStats.GetMoments(moments, FifthOrder);
00188 
00189                         TData currentChunkEnergy = myStats.GetEnergy();
00190 
00191                         //remove silence
00192                         if ( currentChunkEnergy > quantizationThreshold ) //seems to be just above noise due to 8 bits quantization
00193                         {
00194                                 avgEnergy += currentChunkEnergy;
00195                         }
00196 
00197                         p += mFrameSize;
00198                         
00199                 } while (p <= end );
00200 
00201                 avgEnergy /= TData( inAudio.Size() );
00202 
00203                 // Enjoy the Silence...
00204                 if ( avgEnergy <= 1e-7 )
00205                 {
00206                         mIsSilenceCtrl.SendControl( true );
00207                         return 1.0;
00208                 }
00209 
00210                 mIsSilenceCtrl.SendControl(false);
00211 
00212                 return CLAM_sqrt( avgEnergy );
00213 
00214         }
00215 
00216         TData Normalization::ComputeScaleFactorFromDominantEnergy( DataArray& inAudio )
00217         {
00218                 TIndex    p = 0;
00219                 const     TIndex end = inAudio.Size() - mFrameSize;
00220                 DataArray chunk;
00221                 DataArray chunksEnergies;
00222                 const     TData quantizationThreshold = 0.3 * TData( mFrameSize ) / TData( 4410 );
00223 
00224                 do
00225                 {
00226                         chunk.SetPtr( inAudio.GetPtr()+p, mFrameSize );
00227                         DataArray moments(4);
00228                         moments.SetSize(4);
00229                         Stats myStats(&chunk);
00230                         myStats.GetMoments(moments, FifthOrder);
00231 
00232                         TData currentChunkEnergy = myStats.GetEnergy();
00233 
00234                         //remove silence
00235                         if ( currentChunkEnergy > quantizationThreshold ) //seems to be just above noise due to 8 bits quantization
00236                         {
00237                                 chunksEnergies.AddElem( currentChunkEnergy );
00238                         }
00239 
00240                         p += mFrameSize;                        
00241                 } while (p <= end );
00242 
00243                 // Enjoy the silence...
00244                 if ( chunksEnergies.Size() == 0 )
00245                 {
00246                         mIsSilenceCtrl.SendControl( true );
00247                         return 1.0;
00248                 }
00249 
00250                 std::sort( chunksEnergies.GetPtr(), chunksEnergies.GetPtr()+chunksEnergies.Size() );
00251 
00252                 //find the threshold under which lies percent% of the energy values
00253                 //that are not silence
00254                 
00255                 int percentage = 90;
00256 
00257                 int i = ( chunksEnergies.Size()*percentage ) / 100;
00258 
00259                 i = ( i == 0 ) ? i : i - 1;
00260 
00261                 mIsSilenceCtrl.SendControl(false);
00262 
00263                 return CLAM_sqrt( chunksEnergies[i]/TData(mFrameSize) );
00264 
00265         }
00266         
00267         void Normalization::CheckSilence( int size )
00268         {
00269                 if (size==0) 
00270                         mIsSilenceCtrl.SendControl(true);
00271                 else
00272                         mIsSilenceCtrl.SendControl(false);
00273         }
00274 
00275 
00276 }
00277 
Generated by  doxygen 1.6.3