00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "Segmentator.hxx"
00023 #include "Point.hxx"
00024 #include "Segment.hxx"
00025 #include <iostream>
00026 #include "SegmentDescriptors.hxx"
00027
00028
00029 namespace CLAM
00030 {
00031 class SegmentBoundaries
00032 {
00033 public:
00034 Array < Array < PointTmpl < int , TData > > > mArray;
00035
00036 SegmentBoundaries(int size):mArray(size)
00037 {
00038 }
00039 };
00040 }
00041
00042 using namespace CLAM;
00043
00047
00048 std::ostream& operator << (std::ostream& myStream, const TDescriptorsParams& a)
00049 {
00050 switch (a.id)
00051 {
00052 case SpectralMeanId:
00053 {
00054 myStream << "SpectralMean";
00055 break;
00056 }
00057 case SpectralGeometricMeanId :
00058 {
00059 myStream << "SpectralGeometricMean";
00060 break;
00061 }
00062 case SpectralEnergyId:
00063 {
00064 myStream << "SpectralEnergy";
00065 break;
00066 }
00067 case SpectralCentroidId :
00068 {
00069 myStream << "SpectralCentroid";
00070 break;
00071 }
00072 case SpectralMoment2Id:
00073 {
00074 myStream << "SpectralMoment2";
00075 break;
00076 }
00077 case SpectralMoment3Id:
00078 {
00079 myStream << "SpectralMoment3";
00080 break;
00081 }
00082 case SpectralMoment4Id:
00083 {
00084 myStream << "SpectralMoment4";
00085 break;
00086 }
00087 case SpectralMoment5Id:
00088 {
00089 myStream << "SpectralMoment5";
00090 break;
00091 }
00092 case SpectralMoment6Id:
00093 {
00094 myStream << "SpectralMoment6";
00095 break;
00096 }
00097 case SpectralFlatnessId :
00098 {
00099 myStream << "SpectralFlatness";
00100 break;
00101 }
00102 case SpectralKurtosisId :
00103 {
00104 myStream << "SpectralKurtosis";
00105 break;
00106 }
00107 case FundamentalId :
00108 {
00109 myStream << "Fundamental";
00110 break;
00111 }
00112 default:
00113 {
00114 myStream << "UnknownDescriptor";
00115 break;
00116 }
00117 }
00118 myStream << "threshold=";
00119 myStream << a.threshold;
00120 myStream << " ";
00121 myStream << "percentil=";
00122 myStream << a.percentil;
00123 return myStream;
00124 }
00125
00126 std::istream& operator >> (std::istream& myStream, const TDescriptorsParams& a)
00127 {
00128 CLAM_ASSERT(false, "TDescriptorParams extractor operator is not implemented");
00129 return myStream;
00130 }
00131
00132
00133 void SegmentatorConfig::DefaultInit()
00134 {
00135
00136 AddDescriptorsParams();
00137 AddMinSegmentLength();
00138 UpdateData();
00139 Array<TDescriptorsParams> tmpArray(0);
00140 SetDescriptorsParams(tmpArray);
00141 SearchArray<TDescriptorsParams> tmpSearch(GetDescriptorsParams());
00142 SetDescriptorsSearch(tmpSearch);
00143
00144
00145 SetMinSegmentLength(0);
00146 };
00147
00148 void SegmentatorConfig::AddDescParams(const TDescriptorsParams& descParams)
00149 {
00150 TIndex position;
00151 if(GetDescriptorsParams().Size()==0)
00152 GetDescriptorsParams().AddElem(descParams);
00153 else if ((position=GetDescriptorsSearch().Find(descParams))==-1)
00154 {
00155 if(descParams<GetDescriptorsParams()[0])
00156 GetDescriptorsParams().InsertElem(0,descParams);
00157 else
00158 GetDescriptorsParams().AddElem(descParams);
00159 }
00160 else
00161 GetDescriptorsParams().InsertElem(position,descParams);
00162 }
00163
00164
00165 bool SegmentatorConfig::FindDescParams(TDescriptorsParams& descParams)
00166 {
00167 int pos;
00168 if((pos=GetDescriptorsSearch().Find(descParams))!=-1)
00169 {
00170 descParams.percentil=GetDescriptorsParams()[pos].percentil;
00171 descParams.threshold=GetDescriptorsParams()[pos].threshold;
00172 return true;
00173 }
00174 return false;
00175 }
00176
00177 void SegmentatorConfig::ClearDescParams() {
00178 GetDescriptorsParams().Init();
00179 }
00180
00184
00185 Segmentator::Segmentator()
00186 {
00187 Configure(SegmentatorConfig());
00188 }
00189
00190 Segmentator::Segmentator(const SegmentatorConfig& c)
00191 {
00192 Configure(c);
00193 }
00194
00195 Segmentator::~Segmentator()
00196 {
00197 }
00198
00199 bool Segmentator::ConcreteConfigure(const ProcessingConfig& c)
00200 {
00201 CopyAsConcreteConfig(mConfig, c);
00202 return true;
00203 }
00204
00205 bool Segmentator::Do()
00206 {
00207 CLAM_DEBUG_ASSERT(IsRunning(), "Segmentator: Do(): Not in execution mode");
00208
00209 CLAM_ASSERT(false, "Segmentator: Do(): Supervised mode not implemented");
00210
00211 return false;
00212 }
00213
00214
00215 bool Segmentator::Do(Segment& originalSegment,SegmentDescriptors& descriptors)
00216 {
00217 int nFrames=originalSegment.GetnFrames();
00218 Matrix descriptorsValues(mConfig.GetDescriptorsParams().Size(),nFrames);
00219 UnwrapDescriptors(originalSegment, descriptors,descriptorsValues);
00220 Algorithm(originalSegment,descriptorsValues);
00221 return true;
00222 }
00223
00224
00225 void Segmentator::UnwrapDescriptors(const Segment& originalSegment, SegmentDescriptors& descriptors,Matrix& descriptorsValues)
00226 {
00227 int nFrames=originalSegment.GetnFrames();
00228 int nDescriptors=mConfig.GetDescriptorsParams().Size();
00229 for(int i=0;i<nFrames;i++)
00230 {
00231
00232 int z=0;
00233 TData value;
00234 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMeanId)
00235 {
00236 value=descriptors.GetFrameD(i).GetSpectrumD().GetMean();
00237 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00238 descriptorsValues.SetAt(z,i,value);
00239 else descriptorsValues.SetAt(z,i,0);
00240 z++;
00241 }
00242 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralGeometricMeanId )
00243 {
00244 value=descriptors.GetFrameD(i).GetSpectrumD().GetGeometricMean();
00245 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00246 descriptorsValues.SetAt(z,i,value);
00247 else descriptorsValues.SetAt(z,i,0);
00248 z++;
00249 }
00250 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralEnergyId)
00251 {
00252 value=descriptors.GetFrameD(i).GetSpectrumD().GetEnergy();
00253 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00254 descriptorsValues.SetAt(z,i,value);
00255 else descriptorsValues.SetAt(z,i,0);
00256 z++;
00257 }
00258 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralCentroidId )
00259 {
00260 value=descriptors.GetFrameD(i).GetSpectrumD().GetCentroid();
00261 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00262 descriptorsValues.SetAt(z,i,value);
00263 else descriptorsValues.SetAt(z,i,0);
00264 z++;
00265 }
00266 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment2Id)
00267 {
00268 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment2();
00269 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00270 descriptorsValues.SetAt(z,i,value);
00271 else descriptorsValues.SetAt(z,i,0);
00272 z++;
00273 }
00274 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment3Id)
00275 {
00276 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment3();
00277 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00278 descriptorsValues.SetAt(z,i,value);
00279 else descriptorsValues.SetAt(z,i,0);
00280 z++;
00281 }
00282 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment4Id)
00283 {
00284 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment4();
00285 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00286 descriptorsValues.SetAt(z,i,value);
00287 else descriptorsValues.SetAt(z,i,0);
00288 z++;
00289 }
00290 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment5Id)
00291 {
00292 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment5();
00293 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00294 descriptorsValues.SetAt(z,i,value);
00295 else descriptorsValues.SetAt(z,i,0);
00296 z++;
00297 }
00298 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment6Id)
00299 {
00300 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment6();
00301 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00302 descriptorsValues.SetAt(z,i,value);
00303 else descriptorsValues.SetAt(z,i,0);
00304 z++;
00305 }
00306 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralFlatnessId )
00307 {
00308 value=descriptors.GetFrameD(i).GetSpectrumD().GetFlatness();
00309 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00310 descriptorsValues.SetAt(z,i,value);
00311 else descriptorsValues.SetAt(z,i,0);
00312 z++;
00313 }
00314 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralKurtosisId )
00315 {
00316 value=descriptors.GetFrameD(i).GetSpectrumD().GetMagnitudeKurtosis();
00317 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00318 descriptorsValues.SetAt(z,i,value);
00319 else descriptorsValues.SetAt(z,i,0);
00320 z++;
00321 }
00322 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==FundamentalId )
00323 {
00324 value=originalSegment.GetFrame(i).GetFundamental().GetFreq();
00325 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00326 descriptorsValues.SetAt(z,i,value);
00327 else descriptorsValues.SetAt(z,i,0);
00328 z++;
00329 }
00330 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioEnergyId)
00331 {
00332 value=descriptors.GetFrameD(i).GetAudioFrameD().GetEnergy();
00333 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00334 descriptorsValues.SetAt(z,i,value);
00335 else descriptorsValues.SetAt(z,i,0);
00336 z++;
00337 }
00338 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioVarianceId)
00339 {
00340 value=descriptors.GetFrameD(i).GetAudioFrameD().GetVariance();
00341 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00342 descriptorsValues.SetAt(z,i,value);
00343 else descriptorsValues.SetAt(z,i,0);
00344 z++;
00345 }
00346 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioCentroidId)
00347 {
00348 value=descriptors.GetFrameD(i).GetAudioFrameD().GetTemporalCentroid();
00349 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00350 descriptorsValues.SetAt(z,i,value);
00351 else descriptorsValues.SetAt(z,i,0);
00352 z++;
00353 }
00354 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioZeroCrossingRateId)
00355 {
00356 value=descriptors.GetFrameD(i).GetAudioFrameD().GetZeroCrossingRate();
00357 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00358 descriptorsValues.SetAt(z,i,value);
00359 else descriptorsValues.SetAt(z,i,0);
00360 z++;
00361 }
00362 }
00363
00364
00365 }
00366
00367
00368 void Segmentator::Algorithm(Segment& s,const Matrix& values)
00369 {
00370
00371
00372
00373 int nFrames=s.GetnFrames();
00374 int nDescriptors=mConfig.GetDescriptorsParams().Size();
00375 SegmentBoundaries segmentBoundaries(nDescriptors);
00376
00377 segmentBoundaries.mArray.SetSize(nDescriptors);
00378
00379 for (int z=0;z<nDescriptors;z++)
00380 {
00381 segmentBoundaries.mArray[z].AddElem(PointTmpl<int,TData>(0,100));
00382 }
00383 for (int i=0; i<nFrames-4; i++)
00384 {
00385 for (int z=0;z<nDescriptors;z++)
00386 {
00387 const TData & x3 = values.GetAt(z,i+3);
00388 const TData & x2 = values.GetAt(z,i+2);
00389 const TData & x1 = values.GetAt(z,i+1);
00390 const TData & x0 = values.GetAt(z,i);
00391
00392 if (x2==0) continue;
00393
00394 const TData relevance = fabs((x3-x2)/x2);
00395 const TData & ratio = mConfig.GetDescriptorsParams()[z].percentil/100;
00396
00397
00398 if ((x3/x2)>(1+ratio) ||
00399 (x3/x2)<(1-ratio))
00400 {
00401
00402
00403
00404
00405
00406
00407
00408 if (( x3>x2 && x2>x1 && x1>x0 )||
00409 ( x3<x2 && x2<x1 && x1<x0 ))
00410 {
00411 PointTmpl<int,TData> tmpValue(i+3,relevance/ratio);
00412 segmentBoundaries.mArray[z].AddElem(tmpValue);
00413 }
00414 else if((x3/x2)>(1+2*ratio)||
00415 (x3/x2)<(1-2*ratio))
00416 {
00417 PointTmpl<int,TData> tmpValue(i+3,relevance/ratio);
00418 segmentBoundaries.mArray[z].AddElem(tmpValue);
00419 }
00420
00421 }
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450 }
00451 }
00452 DataFusion(s,segmentBoundaries);
00453 }
00454
00455 void Segmentator::DataFusion(Segment& s,const SegmentBoundaries& segmentBoundaries)
00456 {
00457
00458
00459
00460 const int nFrames=s.GetnFrames();
00461 const int nDescriptors=mConfig.GetDescriptorsParams().Size();
00462 TData duration=s.GetFrame(0).GetDuration();
00463 TData sampleRate=s.GetSamplingRate();
00464
00465
00466 Matrix probabilityMatrix(nFrames,nDescriptors);
00467 memset(probabilityMatrix.GetBuffer().GetPtr(),0,nFrames*nDescriptors*sizeof(TData));
00468
00469
00470 for (int z=0;z<nDescriptors;z++)
00471 {
00472 for (int n=0;n<segmentBoundaries.mArray[z].Size();n++)
00473 probabilityMatrix.SetAt(segmentBoundaries.mArray[z][n].GetX(),z,segmentBoundaries.mArray[z][n].GetY());
00474 }
00475
00476
00477 Array<TData> globalProb;
00478 for (int n=0; n<nFrames; n++)
00479 {
00480 TData tmpProb=0;
00481 for(int z=0;z<nDescriptors;z++)
00482 {
00483 tmpProb+=probabilityMatrix.GetAt(n,z);
00484 }
00485 globalProb.AddElem(tmpProb);
00486 }
00487
00488
00489
00490
00491
00492 Array<TData> prob_fusion(globalProb);
00493 {
00494 int n=0;
00495 while(globalProb[n]<=0)
00496 n++;
00497 TData mag=globalProb[n];
00498 TData gcenter=n*globalProb[n];
00499 prob_fusion[n]=0;
00500 for (int m=n+1; m<globalProb.Size(); m++)
00501 {
00502 if (globalProb[m]<=0) continue;
00503 if ((m-n)>mConfig.GetMinSegmentLength())
00504 {
00505
00506 prob_fusion[(int)(gcenter/mag)]=mag;
00507 mag=0;
00508 gcenter=0;
00509 }
00510 mag+=globalProb[m];
00511 gcenter+=m*globalProb[m];
00512 prob_fusion[m]=0;
00513 n=m;
00514 }
00515 }
00516
00517 TData max=0;
00518 for (int n=0; n<prob_fusion.Size(); n++)
00519 if (prob_fusion[n]>max)
00520 max=prob_fusion[n];
00521 for (int n=0; n<prob_fusion.Size(); n++)
00522
00523 if (prob_fusion[n]<=(max/100))
00524 prob_fusion[n]=0;
00525
00526 Array<TData> finalSegments;
00527 for (int n=0; n<prob_fusion.Size(); n++)
00528 {
00529 if (prob_fusion[n]>0)
00530 finalSegments.AddElem(n*duration*sampleRate);
00531 }
00532
00533
00534
00535 if (finalSegments.Size()<=0) return;
00536
00537 for (int n=0; n<(finalSegments.Size()-1); n++)
00538 {
00539 Segment tmpSegment;
00540 tmpSegment.SetBeginTime(finalSegments[n] /sampleRate);
00541 tmpSegment.SetEndTime (finalSegments[n+1]/sampleRate);
00542 tmpSegment.SetpParent(&s);
00543 tmpSegment.SetHoldsData( false );
00544 s.GetChildren().AddElem(tmpSegment);
00545 }
00546
00547 Segment tmpSegment;
00548 tmpSegment.SetBeginTime(finalSegments[finalSegments.Size()-1] /sampleRate);
00549 tmpSegment.SetEndTime(s.GetAudio().GetEndTime());
00550 tmpSegment.SetpParent(&s);
00551 s.GetChildren().AddElem(tmpSegment);
00552
00553 }
00554