Segmentator.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2001-2004 MUSIC TECHNOLOGY GROUP (MTG)
00003  *                         UNIVERSITAT POMPEU FABRA
00004  *
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #include "Segmentator.hxx"
00023 #include "Segment.hxx"
00024 #include <iostream>
00025 #include "SegmentDescriptors.hxx"
00026 
00027 
00028 namespace CLAM
00029 {
00030         class SegmentBoundaries
00031         {
00032         public:
00033                 Array < Array < std::pair < int , TData > > > mArray;
00034                 SegmentBoundaries(int size)
00035                         : mArray(size)
00036                 {
00037                 }
00038         };
00039 }
00040 
00041 using namespace CLAM;
00042 
00046 
00047 std::ostream& operator << (std::ostream& myStream, const TDescriptorsParams& a)
00048 {
00049         switch (a.id)
00050         {
00051                 case SpectralMeanId:
00052                 {
00053                         myStream << "SpectralMean";
00054                         break;
00055                 }
00056                 case SpectralGeometricMeanId :
00057                 {
00058                         myStream << "SpectralGeometricMean";
00059                         break;
00060                 }
00061                 case SpectralEnergyId:
00062                 {
00063                         myStream << "SpectralEnergy";
00064                         break;
00065                 }
00066                 case SpectralCentroidId :
00067                 {
00068                         myStream << "SpectralCentroid";
00069                         break;
00070                 }
00071                 case SpectralMoment2Id:
00072                 {
00073                         myStream << "SpectralMoment2";
00074                         break;
00075                 }
00076                 case SpectralMoment3Id:
00077                 {
00078                         myStream << "SpectralMoment3";
00079                         break;
00080                 }
00081                 case SpectralMoment4Id:
00082                 {
00083                         myStream << "SpectralMoment4";
00084                         break;
00085                 }
00086                 case SpectralMoment5Id:
00087                 {
00088                         myStream << "SpectralMoment5";
00089                         break;
00090                 }
00091                 case SpectralMoment6Id:
00092                 {
00093                         myStream << "SpectralMoment6";
00094                         break;
00095                 }
00096                 case SpectralFlatnessId :
00097                 {
00098                         myStream << "SpectralFlatness";
00099                         break;
00100                 }
00101                 case SpectralKurtosisId  :
00102                 {
00103                         myStream << "SpectralKurtosis";
00104                         break;
00105                 }
00106                 case FundamentalId  :
00107                 {
00108                         myStream << "Fundamental";
00109                         break;
00110                 }
00111                 default:
00112                 {
00113                         myStream << "UnknownDescriptor";
00114                         break;
00115                 }
00116         }
00117         myStream << "threshold=";
00118         myStream << a.threshold;
00119         myStream << " ";
00120         myStream << "percentil=";
00121         myStream << a.percentil;
00122         return myStream;
00123 }
00124 
00125 std::istream& operator >> (std::istream& myStream, const TDescriptorsParams& a)
00126 {
00127         CLAM_ASSERT(false, "TDescriptorParams extractor operator is not implemented");
00128         return myStream;
00129 }
00130 
00131 
00132 void SegmentatorConfig::DefaultInit()
00133 {
00134         //AddAll();
00135         AddDescriptorsParams();
00136         AddMinSegmentLength();
00137         UpdateData();
00138         Array<TDescriptorsParams> tmpArray(0);
00139         SetDescriptorsParams(tmpArray);
00140         SearchArray<TDescriptorsParams> tmpSearch(GetDescriptorsParams());
00141         SetDescriptorsSearch(tmpSearch);
00142         
00143         // Default values
00144         SetMinSegmentLength(0);
00145 };
00146 
00147 void SegmentatorConfig::AddDescParams(const TDescriptorsParams& descParams)
00148 {
00149         TIndex position;
00150         if(GetDescriptorsParams().Size()==0) 
00151                 GetDescriptorsParams().AddElem(descParams);
00152         else if ((position=GetDescriptorsSearch().Find(descParams))==-1)
00153         {
00154                 if(descParams<GetDescriptorsParams()[0])
00155                         GetDescriptorsParams().InsertElem(0,descParams);
00156                 else
00157                         GetDescriptorsParams().AddElem(descParams);
00158         }
00159         else
00160                 GetDescriptorsParams().InsertElem(position,descParams);
00161 }
00162 
00163 /*false if descriptor is not found, true if it is*/
00164 bool SegmentatorConfig::FindDescParams(TDescriptorsParams& descParams)
00165 {
00166         int pos;
00167         if((pos=GetDescriptorsSearch().Find(descParams))!=-1)
00168         {
00169                 descParams.percentil=GetDescriptorsParams()[pos].percentil;
00170                 descParams.threshold=GetDescriptorsParams()[pos].threshold;
00171                 return true;
00172         }
00173         return false;
00174 }
00175 
00176 void SegmentatorConfig::ClearDescParams() {
00177         GetDescriptorsParams().Init();
00178 }
00179 
00183 
00184 Segmentator::Segmentator()
00185 {
00186         Configure(SegmentatorConfig());
00187 }
00188 
00189 Segmentator::Segmentator(const SegmentatorConfig& c)
00190 {
00191         Configure(c);
00192 }
00193 
00194 Segmentator::~Segmentator()
00195 {
00196 }
00197 
00198 bool Segmentator::ConcreteConfigure(const ProcessingConfig& c)
00199 {
00200         CopyAsConcreteConfig(mConfig, c);
00201         return true;
00202 }
00203 
00204 bool Segmentator::Do()
00205 {
00206         CLAM_DEBUG_ASSERT(IsRunning(), "Segmentator: Do(): Not in execution mode");
00207 
00208         CLAM_ASSERT(false, "Segmentator: Do(): Supervised mode not implemented");
00209 
00210         return false;
00211 }
00212 
00213 
00214 bool Segmentator::Do(Segment& originalSegment,SegmentDescriptors& descriptors)
00215 {
00216         int nFrames=originalSegment.GetnFrames();
00217         Matrix descriptorsValues(mConfig.GetDescriptorsParams().Size(),nFrames);
00218         UnwrapDescriptors(originalSegment, descriptors,descriptorsValues);
00219         Algorithm(originalSegment,descriptorsValues);
00220         return true;
00221 }
00222 
00223 
00224 void Segmentator::UnwrapDescriptors(const Segment& originalSegment, SegmentDescriptors& descriptors,Matrix& descriptorsValues)
00225 {
00226         int nFrames=originalSegment.GetnFrames();
00227         int nDescriptors=mConfig.GetDescriptorsParams().Size();
00228         for(int i=0;i<nFrames;i++)
00229         {
00230 /*This looks ugly but right now is the only way to deal with it*/
00231                 int z=0;
00232                 TData value;
00233                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMeanId)
00234                 {
00235                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMean();
00236                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00237                                 descriptorsValues.SetAt(z,i,value);
00238                         else descriptorsValues.SetAt(z,i,0);
00239                         z++;
00240                 }
00241                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralGeometricMeanId )
00242                 {
00243                         value=descriptors.GetFrameD(i).GetSpectrumD().GetGeometricMean();
00244                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00245                                 descriptorsValues.SetAt(z,i,value);
00246                         else descriptorsValues.SetAt(z,i,0);
00247                         z++;
00248                 }
00249                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralEnergyId)
00250                 {
00251                         value=descriptors.GetFrameD(i).GetSpectrumD().GetEnergy();
00252                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00253                                 descriptorsValues.SetAt(z,i,value);
00254                         else descriptorsValues.SetAt(z,i,0);
00255                         z++;
00256                 }
00257                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralCentroidId )
00258                 {
00259                         value=descriptors.GetFrameD(i).GetSpectrumD().GetCentroid();
00260                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00261                                 descriptorsValues.SetAt(z,i,value);
00262                         else descriptorsValues.SetAt(z,i,0);
00263                         z++;
00264                 }
00265                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment2Id)
00266                 {
00267                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment2();
00268                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00269                                 descriptorsValues.SetAt(z,i,value);
00270                         else descriptorsValues.SetAt(z,i,0);
00271                         z++;
00272                 }
00273                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment3Id)
00274                 {
00275                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment3();
00276                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00277                                 descriptorsValues.SetAt(z,i,value);
00278                         else descriptorsValues.SetAt(z,i,0);
00279                         z++;
00280                 }
00281                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment4Id)
00282                 {
00283                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment4();
00284                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00285                                 descriptorsValues.SetAt(z,i,value);
00286                         else descriptorsValues.SetAt(z,i,0);
00287                         z++;
00288                 }
00289                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment5Id)
00290                 {
00291                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment5();
00292                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00293                                 descriptorsValues.SetAt(z,i,value);
00294                         else descriptorsValues.SetAt(z,i,0);
00295                         z++;
00296                 }
00297                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment6Id)
00298                 {
00299                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment6();
00300                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00301                                 descriptorsValues.SetAt(z,i,value);
00302                         else descriptorsValues.SetAt(z,i,0);
00303                         z++;
00304                 }
00305                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralFlatnessId )
00306                 {
00307                         value=descriptors.GetFrameD(i).GetSpectrumD().GetFlatness();
00308                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00309                                 descriptorsValues.SetAt(z,i,value);
00310                         else descriptorsValues.SetAt(z,i,0);
00311                         z++;
00312                 }
00313                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralKurtosisId  )
00314                 {
00315                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMagnitudeKurtosis();
00316                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00317                                 descriptorsValues.SetAt(z,i,value);
00318                         else descriptorsValues.SetAt(z,i,0);
00319                         z++;
00320                 }
00321                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==FundamentalId  )
00322                 {
00323                         value=originalSegment.GetFrame(i).GetFundamental().GetFreq();
00324                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00325                                 descriptorsValues.SetAt(z,i,value);
00326                         else descriptorsValues.SetAt(z,i,0);
00327                         z++;
00328                 }
00329                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioEnergyId)
00330                 {
00331                         value=descriptors.GetFrameD(i).GetAudioFrameD().GetEnergy();
00332                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00333                                 descriptorsValues.SetAt(z,i,value);
00334                         else descriptorsValues.SetAt(z,i,0);
00335                         z++;
00336                 }
00337                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioVarianceId)
00338                 {
00339                         value=descriptors.GetFrameD(i).GetAudioFrameD().GetVariance();
00340                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00341                                 descriptorsValues.SetAt(z,i,value);
00342                         else descriptorsValues.SetAt(z,i,0);
00343                         z++;
00344                 }
00345                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioCentroidId)
00346                 {
00347                         value=descriptors.GetFrameD(i).GetAudioFrameD().GetTemporalCentroid();
00348                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00349                                 descriptorsValues.SetAt(z,i,value);
00350                         else descriptorsValues.SetAt(z,i,0);
00351                         z++;
00352                 }
00353                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioZeroCrossingRateId)
00354                 {
00355                         value=descriptors.GetFrameD(i).GetAudioFrameD().GetZeroCrossingRate();
00356                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00357                                 descriptorsValues.SetAt(z,i,value);
00358                         else descriptorsValues.SetAt(z,i,0);
00359                         z++;
00360                 }
00361         }
00362 
00363 
00364 }
00365 
00366 
00367 void Segmentator::Algorithm(Segment& s,const Matrix& values)
00368 {
00369 
00370         // Segmentation objects
00371         // segment boundaries for each parameter
00372         int nFrames=s.GetnFrames();
00373         int nDescriptors=mConfig.GetDescriptorsParams().Size();
00374         SegmentBoundaries segmentBoundaries(nDescriptors);
00375         // segment boundaries for each parameter
00376         segmentBoundaries.mArray.SetSize(nDescriptors);
00377 
00378         for (int z=0;z<nDescriptors;z++)
00379         {
00380                 segmentBoundaries.mArray[z].AddElem(std::pair<int,TData>(0,100));//very high value
00381         }
00382         for (int i=0; i<nFrames-4; i++)
00383         {
00384                 for (int z=0;z<nDescriptors;z++)
00385                 {
00386                         const TData & x3 = values.GetAt(z,i+3);
00387                         const TData & x2 = values.GetAt(z,i+2);
00388                         const TData & x1 = values.GetAt(z,i+1);
00389                         const TData & x0 = values.GetAt(z,i);
00390                         // Avoid div by 0
00391                         if (x2==0) continue;
00392                         
00393                         const TData relevance = fabs((x3-x2)/x2);
00394                         const TData & ratio = mConfig.GetDescriptorsParams()[z].percentil/100;
00395 
00396 
00397                         if ((x3/x2)>(1+ratio) ||
00398                             (x3/x2)<(1-ratio))
00399                         {
00400                         /*
00401                         if (i>2)
00402                         {
00403                                 if ((x3/x1)>(1+ratio) || (x3/x1)<(1-ratio))
00404                                 {
00405                                         //if((i-segmentBoundaries.mArray[z][segmentBoundaries.mArray[z].Size()-1])>=mConfig.GetMinSegmentLength())
00406                         */
00407                                 if (( x3>x2 && x2>x1 && x1>x0 )||
00408                                    (  x3<x2 && x2<x1 && x1<x0 ))
00409                                 {
00410                                         std::pair<int,TData>  tmpValue(i+3,relevance/ratio);
00411                                         segmentBoundaries.mArray[z].AddElem(tmpValue);
00412                                 }
00413                                 else if((x3/x2)>(1+2*ratio)||
00414                                         (x3/x2)<(1-2*ratio))
00415                                 {
00416                                         std::pair<int,TData>  tmpValue(i+3,relevance/ratio);
00417                                         segmentBoundaries.mArray[z].AddElem(tmpValue);
00418                                 }
00419 
00420                         }
00421                         /*
00422                                 }
00423                         }
00424                         else if (i>2)
00425                         {
00426                                 if ((x3/x1)>(1+ratio) || (x3/x1)<(1-ratio))
00427                                 {
00428                                         //if((i-segmentBoundaries.mArray[z][segmentBoundaries.mArray[z].Size()-1])>=mConfig.GetMinSegmentLength())
00429                                         segmentBoundaries.mArray[z].AddElem(i);
00430                                 }
00431                         }
00432                         else if (i>3)
00433                         {
00434                                 if ((x3/x0)>(1+ratio) || (x3/x0)<(1-ratio))
00435                                 {
00436                                         //if((i-segmentBoundaries.mArray[z][segmentBoundaries.mArray[z].Size()-1])>=mConfig.GetMinSegmentLength())
00437                                         segmentBoundaries.mArray[z].AddElem(i);
00438                                 }
00439                         }
00440                         */
00441                         /*
00442                         if ( x3==0 && x2!=0 )
00443                         {
00444                                 std::pair<int,TData>  tmpValue(i,100);
00445                                 segmentBoundaries.mArray[z].AddElem(tmpValue);
00446                         }
00447                         */
00448 
00449                 }
00450         }
00451         DataFusion(s,segmentBoundaries);
00452 }
00453 
00454 void Segmentator::DataFusion(Segment& s,const SegmentBoundaries& segmentBoundaries)
00455 {
00456 
00457         // DATA FUSION (of the segmentation parameters), taken from Rossignol's Thesis
00458         // DoNothing,1) Generate probability functions for both parameters
00459         const int nFrames=s.GetnFrames();
00460         const int nDescriptors=mConfig.GetDescriptorsParams().Size();
00461         TData duration=s.GetFrame(0).GetDuration();/*BEWARE!Assuming equal lengthed frames*/
00462         TData sampleRate=s.GetSamplingRate();
00463 
00464         /*Initializing Probability Matrix*/
00465         Matrix probabilityMatrix(nFrames,nDescriptors);
00466         memset(probabilityMatrix.GetBuffer().GetPtr(),0,nFrames*nDescriptors*sizeof(TData));
00467 
00468         /*Setting probability to one wherever a segment boundary was found*/
00469         for (int z=0;z<nDescriptors;z++)
00470         {
00471                 for (int n=0;n<segmentBoundaries.mArray[z].Size();n++)
00472                         probabilityMatrix.SetAt(
00473                                 segmentBoundaries.mArray[z][n].first,
00474                                 z,
00475                                 segmentBoundaries.mArray[z][n].second);
00476         }
00477 
00478         // Adding probability values of different descriptors
00479         Array<TData> globalProb;
00480         for (int n=0; n<nFrames; n++)
00481         {
00482                 TData tmpProb=0;
00483                 for(int z=0;z<nDescriptors;z++)
00484                 {
00485                         tmpProb+=probabilityMatrix.GetAt(n,z);
00486                 }
00487                 globalProb.AddElem(tmpProb);
00488         }
00489 
00490         // MERGE: Two comments, choose one
00491         // 3) Fusion of too near marks (separated 1 or 2 frames)
00492         // 3) Fusion of too near marks (separated less than the minSegmentLength)
00493         // Also compute maximun (to re-use the loop)
00494         Array<TData> prob_fusion(globalProb);
00495         {
00496                 int n=0;
00497                 while(globalProb[n]<=0) // Find first frame with prob>0
00498                         n++;
00499                 TData mag=globalProb[n];
00500                 TData gcenter=n*globalProb[n];
00501                 prob_fusion[n]=0;
00502                 for (int m=n+1; m<globalProb.Size(); m++)
00503                 {
00504                         if (globalProb[m]<=0) continue;
00505                         if ((m-n)>mConfig.GetMinSegmentLength())
00506                         {
00507                                 // Store information and begin another search
00508                                 prob_fusion[(int)(gcenter/mag)]=mag;
00509                                 mag=0;
00510                                 gcenter=0;
00511                         }
00512                         mag+=globalProb[m];
00513                         gcenter+=m*globalProb[m];
00514                         prob_fusion[m]=0;
00515                         n=m;
00516                 }
00517         }
00518         // 4) DELETE SMALL MARKS (1/7 of the max value, parameter that should be optimized...) 
00519         TData max=0;
00520         for (int n=0; n<prob_fusion.Size(); n++)
00521                 if (prob_fusion[n]>max)
00522                         max=prob_fusion[n];
00523         for (int n=0; n<prob_fusion.Size(); n++)
00524                 // MERGE: cuidado max/100 vs. CLAM04 max/7
00525                 if (prob_fusion[n]<=(max/100))
00526                         prob_fusion[n]=0;
00527 
00528         Array<TData> finalSegments; // final segment boundaries in samples
00529         for (int n=0; n<prob_fusion.Size(); n++)
00530         {
00531                 if (prob_fusion[n]>0)
00532                         finalSegments.AddElem(n*duration*sampleRate);
00533         }
00534 
00535         // Store segment boundaries information
00536 
00537         if (finalSegments.Size()<=0) return;
00538 
00539         for (int n=0; n<(finalSegments.Size()-1); n++)
00540         {
00541                 Segment tmpSegment;
00542                 tmpSegment.SetBeginTime(finalSegments[n]  /sampleRate);
00543                 tmpSegment.SetEndTime  (finalSegments[n+1]/sampleRate);
00544                 tmpSegment.SetpParent(&s);
00545                 tmpSegment.SetHoldsData( false );
00546                 s.GetChildren().AddElem(tmpSegment);
00547         }
00548 
00549         Segment tmpSegment;
00550         tmpSegment.SetBeginTime(finalSegments[finalSegments.Size()-1] /sampleRate);
00551         tmpSegment.SetEndTime(s.GetAudio().GetEndTime());
00552         tmpSegment.SetpParent(&s);
00553         s.GetChildren().AddElem(tmpSegment);
00554 
00555 }
00556 
Generated by  doxygen 1.6.3