Segmentator.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2001-2004 MUSIC TECHNOLOGY GROUP (MTG)
00003  *                         UNIVERSITAT POMPEU FABRA
00004  *
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #include "Segmentator.hxx"
00023 #include "Point.hxx"
00024 #include "Segment.hxx"
00025 #include <iostream>
00026 #include "SegmentDescriptors.hxx"
00027 
00028 
00029 namespace CLAM
00030 {
00031         class SegmentBoundaries
00032         {
00033         public:
00034                 Array < Array < PointTmpl < int , TData > > > mArray;
00035 
00036                 SegmentBoundaries(int size):mArray(size)
00037                 {
00038                 }
00039         };
00040 }
00041 
00042 using namespace CLAM;
00043 
00047 
00048 std::ostream& operator << (std::ostream& myStream, const TDescriptorsParams& a)
00049 {
00050         switch (a.id)
00051         {
00052                 case SpectralMeanId:
00053                 {
00054                         myStream << "SpectralMean";
00055                         break;
00056                 }
00057                 case SpectralGeometricMeanId :
00058                 {
00059                         myStream << "SpectralGeometricMean";
00060                         break;
00061                 }
00062                 case SpectralEnergyId:
00063                 {
00064                         myStream << "SpectralEnergy";
00065                         break;
00066                 }
00067                 case SpectralCentroidId :
00068                 {
00069                         myStream << "SpectralCentroid";
00070                         break;
00071                 }
00072                 case SpectralMoment2Id:
00073                 {
00074                         myStream << "SpectralMoment2";
00075                         break;
00076                 }
00077                 case SpectralMoment3Id:
00078                 {
00079                         myStream << "SpectralMoment3";
00080                         break;
00081                 }
00082                 case SpectralMoment4Id:
00083                 {
00084                         myStream << "SpectralMoment4";
00085                         break;
00086                 }
00087                 case SpectralMoment5Id:
00088                 {
00089                         myStream << "SpectralMoment5";
00090                         break;
00091                 }
00092                 case SpectralMoment6Id:
00093                 {
00094                         myStream << "SpectralMoment6";
00095                         break;
00096                 }
00097                 case SpectralFlatnessId :
00098                 {
00099                         myStream << "SpectralFlatness";
00100                         break;
00101                 }
00102                 case SpectralKurtosisId  :
00103                 {
00104                         myStream << "SpectralKurtosis";
00105                         break;
00106                 }
00107                 case FundamentalId  :
00108                 {
00109                         myStream << "Fundamental";
00110                         break;
00111                 }
00112                 default:
00113                 {
00114                         myStream << "UnknownDescriptor";
00115                         break;
00116                 }
00117         }
00118         myStream << "threshold=";
00119         myStream << a.threshold;
00120         myStream << " ";
00121         myStream << "percentil=";
00122         myStream << a.percentil;
00123         return myStream;
00124 }
00125 
00126 std::istream& operator >> (std::istream& myStream, const TDescriptorsParams& a)
00127 {
00128         CLAM_ASSERT(false, "TDescriptorParams extractor operator is not implemented");
00129         return myStream;
00130 }
00131 
00132 
00133 void SegmentatorConfig::DefaultInit()
00134 {
00135         //AddAll();
00136         AddDescriptorsParams();
00137         AddMinSegmentLength();
00138         UpdateData();
00139         Array<TDescriptorsParams> tmpArray(0);
00140         SetDescriptorsParams(tmpArray);
00141         SearchArray<TDescriptorsParams> tmpSearch(GetDescriptorsParams());
00142         SetDescriptorsSearch(tmpSearch);
00143         
00144         // Default values
00145         SetMinSegmentLength(0);
00146 };
00147 
00148 void SegmentatorConfig::AddDescParams(const TDescriptorsParams& descParams)
00149 {
00150         TIndex position;
00151         if(GetDescriptorsParams().Size()==0) 
00152                 GetDescriptorsParams().AddElem(descParams);
00153         else if ((position=GetDescriptorsSearch().Find(descParams))==-1)
00154         {
00155                 if(descParams<GetDescriptorsParams()[0])
00156                         GetDescriptorsParams().InsertElem(0,descParams);
00157                 else
00158                         GetDescriptorsParams().AddElem(descParams);
00159         }
00160         else
00161                 GetDescriptorsParams().InsertElem(position,descParams);
00162 }
00163 
00164 /*false if descriptor is not found, true if it is*/
00165 bool SegmentatorConfig::FindDescParams(TDescriptorsParams& descParams)
00166 {
00167         int pos;
00168         if((pos=GetDescriptorsSearch().Find(descParams))!=-1)
00169         {
00170                 descParams.percentil=GetDescriptorsParams()[pos].percentil;
00171                 descParams.threshold=GetDescriptorsParams()[pos].threshold;
00172                 return true;
00173         }
00174         return false;
00175 }
00176 
00177 void SegmentatorConfig::ClearDescParams() {
00178         GetDescriptorsParams().Init();
00179 }
00180 
00184 
00185 Segmentator::Segmentator()
00186 {
00187         Configure(SegmentatorConfig());
00188 }
00189 
00190 Segmentator::Segmentator(const SegmentatorConfig& c)
00191 {
00192         Configure(c);
00193 }
00194 
00195 Segmentator::~Segmentator()
00196 {
00197 }
00198 
00199 bool Segmentator::ConcreteConfigure(const ProcessingConfig& c)
00200 {
00201         CopyAsConcreteConfig(mConfig, c);
00202         return true;
00203 }
00204 
00205 bool Segmentator::Do()
00206 {
00207         CLAM_DEBUG_ASSERT(IsRunning(), "Segmentator: Do(): Not in execution mode");
00208 
00209         CLAM_ASSERT(false, "Segmentator: Do(): Supervised mode not implemented");
00210 
00211         return false;
00212 }
00213 
00214 
00215 bool Segmentator::Do(Segment& originalSegment,SegmentDescriptors& descriptors)
00216 {
00217         int nFrames=originalSegment.GetnFrames();
00218         Matrix descriptorsValues(mConfig.GetDescriptorsParams().Size(),nFrames);
00219         UnwrapDescriptors(originalSegment, descriptors,descriptorsValues);
00220         Algorithm(originalSegment,descriptorsValues);
00221         return true;
00222 }
00223 
00224 
00225 void Segmentator::UnwrapDescriptors(const Segment& originalSegment, SegmentDescriptors& descriptors,Matrix& descriptorsValues)
00226 {
00227         int nFrames=originalSegment.GetnFrames();
00228         int nDescriptors=mConfig.GetDescriptorsParams().Size();
00229         for(int i=0;i<nFrames;i++)
00230         {
00231 /*This looks ugly but right now is the only way to deal with it*/
00232                 int z=0;
00233                 TData value;
00234                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMeanId)
00235                 {
00236                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMean();
00237                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00238                                 descriptorsValues.SetAt(z,i,value);
00239                         else descriptorsValues.SetAt(z,i,0);
00240                         z++;
00241                 }
00242                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralGeometricMeanId )
00243                 {
00244                         value=descriptors.GetFrameD(i).GetSpectrumD().GetGeometricMean();
00245                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00246                                 descriptorsValues.SetAt(z,i,value);
00247                         else descriptorsValues.SetAt(z,i,0);
00248                         z++;
00249                 }
00250                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralEnergyId)
00251                 {
00252                         value=descriptors.GetFrameD(i).GetSpectrumD().GetEnergy();
00253                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00254                                 descriptorsValues.SetAt(z,i,value);
00255                         else descriptorsValues.SetAt(z,i,0);
00256                         z++;
00257                 }
00258                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralCentroidId )
00259                 {
00260                         value=descriptors.GetFrameD(i).GetSpectrumD().GetCentroid();
00261                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00262                                 descriptorsValues.SetAt(z,i,value);
00263                         else descriptorsValues.SetAt(z,i,0);
00264                         z++;
00265                 }
00266                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment2Id)
00267                 {
00268                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment2();
00269                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00270                                 descriptorsValues.SetAt(z,i,value);
00271                         else descriptorsValues.SetAt(z,i,0);
00272                         z++;
00273                 }
00274                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment3Id)
00275                 {
00276                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment3();
00277                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00278                                 descriptorsValues.SetAt(z,i,value);
00279                         else descriptorsValues.SetAt(z,i,0);
00280                         z++;
00281                 }
00282                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment4Id)
00283                 {
00284                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment4();
00285                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00286                                 descriptorsValues.SetAt(z,i,value);
00287                         else descriptorsValues.SetAt(z,i,0);
00288                         z++;
00289                 }
00290                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment5Id)
00291                 {
00292                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment5();
00293                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00294                                 descriptorsValues.SetAt(z,i,value);
00295                         else descriptorsValues.SetAt(z,i,0);
00296                         z++;
00297                 }
00298                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment6Id)
00299                 {
00300                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment6();
00301                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00302                                 descriptorsValues.SetAt(z,i,value);
00303                         else descriptorsValues.SetAt(z,i,0);
00304                         z++;
00305                 }
00306                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralFlatnessId )
00307                 {
00308                         value=descriptors.GetFrameD(i).GetSpectrumD().GetFlatness();
00309                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00310                                 descriptorsValues.SetAt(z,i,value);
00311                         else descriptorsValues.SetAt(z,i,0);
00312                         z++;
00313                 }
00314                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralKurtosisId  )
00315                 {
00316                         value=descriptors.GetFrameD(i).GetSpectrumD().GetMagnitudeKurtosis();
00317                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00318                                 descriptorsValues.SetAt(z,i,value);
00319                         else descriptorsValues.SetAt(z,i,0);
00320                         z++;
00321                 }
00322                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==FundamentalId  )
00323                 {
00324                         value=originalSegment.GetFrame(i).GetFundamental().GetFreq();
00325                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00326                                 descriptorsValues.SetAt(z,i,value);
00327                         else descriptorsValues.SetAt(z,i,0);
00328                         z++;
00329                 }
00330                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioEnergyId)
00331                 {
00332                         value=descriptors.GetFrameD(i).GetAudioFrameD().GetEnergy();
00333                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00334                                 descriptorsValues.SetAt(z,i,value);
00335                         else descriptorsValues.SetAt(z,i,0);
00336                         z++;
00337                 }
00338                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioVarianceId)
00339                 {
00340                         value=descriptors.GetFrameD(i).GetAudioFrameD().GetVariance();
00341                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00342                                 descriptorsValues.SetAt(z,i,value);
00343                         else descriptorsValues.SetAt(z,i,0);
00344                         z++;
00345                 }
00346                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioCentroidId)
00347                 {
00348                         value=descriptors.GetFrameD(i).GetAudioFrameD().GetTemporalCentroid();
00349                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00350                                 descriptorsValues.SetAt(z,i,value);
00351                         else descriptorsValues.SetAt(z,i,0);
00352                         z++;
00353                 }
00354                 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioZeroCrossingRateId)
00355                 {
00356                         value=descriptors.GetFrameD(i).GetAudioFrameD().GetZeroCrossingRate();
00357                         if(value>mConfig.GetDescriptorsParams()[z].threshold)
00358                                 descriptorsValues.SetAt(z,i,value);
00359                         else descriptorsValues.SetAt(z,i,0);
00360                         z++;
00361                 }
00362         }
00363 
00364 
00365 }
00366 
00367 
00368 void Segmentator::Algorithm(Segment& s,const Matrix& values)
00369 {
00370 
00371         // Segmentation objects
00372         // segment boundaries for each parameter
00373         int nFrames=s.GetnFrames();
00374         int nDescriptors=mConfig.GetDescriptorsParams().Size();
00375         SegmentBoundaries segmentBoundaries(nDescriptors);
00376         // segment boundaries for each parameter
00377         segmentBoundaries.mArray.SetSize(nDescriptors);
00378 
00379         for (int z=0;z<nDescriptors;z++)
00380         {
00381                 segmentBoundaries.mArray[z].AddElem(PointTmpl<int,TData>(0,100));//very high value
00382         }
00383         for (int i=0; i<nFrames-4; i++)
00384         {
00385                 for (int z=0;z<nDescriptors;z++)
00386                 {
00387                         const TData & x3 = values.GetAt(z,i+3);
00388                         const TData & x2 = values.GetAt(z,i+2);
00389                         const TData & x1 = values.GetAt(z,i+1);
00390                         const TData & x0 = values.GetAt(z,i);
00391                         // Avoid div by 0
00392                         if (x2==0) continue;
00393                         
00394                         const TData relevance = fabs((x3-x2)/x2);
00395                         const TData & ratio = mConfig.GetDescriptorsParams()[z].percentil/100;
00396 
00397 
00398                         if ((x3/x2)>(1+ratio) ||
00399                             (x3/x2)<(1-ratio))
00400                         {
00401                         /*
00402                         if (i>2)
00403                         {
00404                                 if ((x3/x1)>(1+ratio) || (x3/x1)<(1-ratio))
00405                                 {
00406                                         //if((i-segmentBoundaries.mArray[z][segmentBoundaries.mArray[z].Size()-1])>=mConfig.GetMinSegmentLength())
00407                         */
00408                                 if (( x3>x2 && x2>x1 && x1>x0 )||
00409                                    (  x3<x2 && x2<x1 && x1<x0 ))
00410                                 {
00411                                         PointTmpl<int,TData>  tmpValue(i+3,relevance/ratio);
00412                                         segmentBoundaries.mArray[z].AddElem(tmpValue);
00413                                 }
00414                                 else if((x3/x2)>(1+2*ratio)||
00415                                         (x3/x2)<(1-2*ratio))
00416                                 {
00417                                         PointTmpl<int,TData>  tmpValue(i+3,relevance/ratio);
00418                                         segmentBoundaries.mArray[z].AddElem(tmpValue);
00419                                 }
00420 
00421                         }
00422                         /*
00423                                 }
00424                         }
00425                         else if (i>2)
00426                         {
00427                                 if ((x3/x1)>(1+ratio) || (x3/x1)<(1-ratio))
00428                                 {
00429                                         //if((i-segmentBoundaries.mArray[z][segmentBoundaries.mArray[z].Size()-1])>=mConfig.GetMinSegmentLength())
00430                                         segmentBoundaries.mArray[z].AddElem(i);
00431                                 }
00432                         }
00433                         else if (i>3)
00434                         {
00435                                 if ((x3/x0)>(1+ratio) || (x3/x0)<(1-ratio))
00436                                 {
00437                                         //if((i-segmentBoundaries.mArray[z][segmentBoundaries.mArray[z].Size()-1])>=mConfig.GetMinSegmentLength())
00438                                         segmentBoundaries.mArray[z].AddElem(i);
00439                                 }
00440                         }
00441                         */
00442                         /*
00443                         if ( x3==0 && x2!=0 )
00444                         {
00445                                 Point<int,TData>  tmpValue(i,100);
00446                                 segmentBoundaries.mArray[z].AddElem(tmpValue);
00447                         }
00448                         */
00449 
00450                 }
00451         }
00452         DataFusion(s,segmentBoundaries);
00453 }
00454 
00455 void Segmentator::DataFusion(Segment& s,const SegmentBoundaries& segmentBoundaries)
00456 {
00457 
00458         // DATA FUSION (of the segmentation parameters), taken from Rossignol's Thesis
00459         // DoNothing,1) Generate probability functions for both parameters
00460         const int nFrames=s.GetnFrames();
00461         const int nDescriptors=mConfig.GetDescriptorsParams().Size();
00462         TData duration=s.GetFrame(0).GetDuration();/*BEWARE!Assuming equal lengthed frames*/
00463         TData sampleRate=s.GetSamplingRate();
00464 
00465         /*Initializing Probability Matrix*/
00466         Matrix probabilityMatrix(nFrames,nDescriptors);
00467         memset(probabilityMatrix.GetBuffer().GetPtr(),0,nFrames*nDescriptors*sizeof(TData));
00468 
00469         /*Setting probability to one wherever a segment boundary was found*/
00470         for (int z=0;z<nDescriptors;z++)
00471         {
00472                 for (int n=0;n<segmentBoundaries.mArray[z].Size();n++)
00473                         probabilityMatrix.SetAt(segmentBoundaries.mArray[z][n].GetX(),z,segmentBoundaries.mArray[z][n].GetY());
00474         }
00475 
00476         // Adding probability values of different descriptors
00477         Array<TData> globalProb;
00478         for (int n=0; n<nFrames; n++)
00479         {
00480                 TData tmpProb=0;
00481                 for(int z=0;z<nDescriptors;z++)
00482                 {
00483                         tmpProb+=probabilityMatrix.GetAt(n,z);
00484                 }
00485                 globalProb.AddElem(tmpProb);
00486         }
00487 
00488         // MERGE: Two comments, choose one
00489         // 3) Fusion of too near marks (separated 1 or 2 frames)
00490         // 3) Fusion of too near marks (separated less than the minSegmentLength)
00491         // Also compute maximun (to re-use the loop)
00492         Array<TData> prob_fusion(globalProb);
00493         {
00494                 int n=0;
00495                 while(globalProb[n]<=0) // Find first frame with prob>0
00496                         n++;
00497                 TData mag=globalProb[n];
00498                 TData gcenter=n*globalProb[n];
00499                 prob_fusion[n]=0;
00500                 for (int m=n+1; m<globalProb.Size(); m++)
00501                 {
00502                         if (globalProb[m]<=0) continue;
00503                         if ((m-n)>mConfig.GetMinSegmentLength())
00504                         {
00505                                 // Store information and begin another search
00506                                 prob_fusion[(int)(gcenter/mag)]=mag;
00507                                 mag=0;
00508                                 gcenter=0;
00509                         }
00510                         mag+=globalProb[m];
00511                         gcenter+=m*globalProb[m];
00512                         prob_fusion[m]=0;
00513                         n=m;
00514                 }
00515         }
00516         // 4) DELETE SMALL MARKS (1/7 of the max value, parameter that should be optimized...) 
00517         TData max=0;
00518         for (int n=0; n<prob_fusion.Size(); n++)
00519                 if (prob_fusion[n]>max)
00520                         max=prob_fusion[n];
00521         for (int n=0; n<prob_fusion.Size(); n++)
00522                 // MERGE: cuidado max/100 vs. CLAM04 max/7
00523                 if (prob_fusion[n]<=(max/100))
00524                         prob_fusion[n]=0;
00525 
00526         Array<TData> finalSegments; // final segment boundaries in samples
00527         for (int n=0; n<prob_fusion.Size(); n++)
00528         {
00529                 if (prob_fusion[n]>0)
00530                         finalSegments.AddElem(n*duration*sampleRate);
00531         }
00532 
00533         // Store segment boundaries information
00534 
00535         if (finalSegments.Size()<=0) return;
00536 
00537         for (int n=0; n<(finalSegments.Size()-1); n++)
00538         {
00539                 Segment tmpSegment;
00540                 tmpSegment.SetBeginTime(finalSegments[n]  /sampleRate);
00541                 tmpSegment.SetEndTime  (finalSegments[n+1]/sampleRate);
00542                 tmpSegment.SetpParent(&s);
00543                 tmpSegment.SetHoldsData( false );
00544                 s.GetChildren().AddElem(tmpSegment);
00545         }
00546 
00547         Segment tmpSegment;
00548         tmpSegment.SetBeginTime(finalSegments[finalSegments.Size()-1] /sampleRate);
00549         tmpSegment.SetEndTime(s.GetAudio().GetEndTime());
00550         tmpSegment.SetpParent(&s);
00551         s.GetChildren().AddElem(tmpSegment);
00552 
00553 }
00554 
Generated by  doxygen 1.6.3