00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "Segmentator.hxx"
00023 #include "Segment.hxx"
00024 #include <iostream>
00025 #include "SegmentDescriptors.hxx"
00026
00027
00028 namespace CLAM
00029 {
00030 class SegmentBoundaries
00031 {
00032 public:
00033 Array < Array < std::pair < int , TData > > > mArray;
00034 SegmentBoundaries(int size)
00035 : mArray(size)
00036 {
00037 }
00038 };
00039 }
00040
00041 using namespace CLAM;
00042
00046
00047 std::ostream& operator << (std::ostream& myStream, const TDescriptorsParams& a)
00048 {
00049 switch (a.id)
00050 {
00051 case SpectralMeanId:
00052 {
00053 myStream << "SpectralMean";
00054 break;
00055 }
00056 case SpectralGeometricMeanId :
00057 {
00058 myStream << "SpectralGeometricMean";
00059 break;
00060 }
00061 case SpectralEnergyId:
00062 {
00063 myStream << "SpectralEnergy";
00064 break;
00065 }
00066 case SpectralCentroidId :
00067 {
00068 myStream << "SpectralCentroid";
00069 break;
00070 }
00071 case SpectralMoment2Id:
00072 {
00073 myStream << "SpectralMoment2";
00074 break;
00075 }
00076 case SpectralMoment3Id:
00077 {
00078 myStream << "SpectralMoment3";
00079 break;
00080 }
00081 case SpectralMoment4Id:
00082 {
00083 myStream << "SpectralMoment4";
00084 break;
00085 }
00086 case SpectralMoment5Id:
00087 {
00088 myStream << "SpectralMoment5";
00089 break;
00090 }
00091 case SpectralMoment6Id:
00092 {
00093 myStream << "SpectralMoment6";
00094 break;
00095 }
00096 case SpectralFlatnessId :
00097 {
00098 myStream << "SpectralFlatness";
00099 break;
00100 }
00101 case SpectralKurtosisId :
00102 {
00103 myStream << "SpectralKurtosis";
00104 break;
00105 }
00106 case FundamentalId :
00107 {
00108 myStream << "Fundamental";
00109 break;
00110 }
00111 default:
00112 {
00113 myStream << "UnknownDescriptor";
00114 break;
00115 }
00116 }
00117 myStream << "threshold=";
00118 myStream << a.threshold;
00119 myStream << " ";
00120 myStream << "percentil=";
00121 myStream << a.percentil;
00122 return myStream;
00123 }
00124
00125 std::istream& operator >> (std::istream& myStream, const TDescriptorsParams& a)
00126 {
00127 CLAM_ASSERT(false, "TDescriptorParams extractor operator is not implemented");
00128 return myStream;
00129 }
00130
00131
00132 void SegmentatorConfig::DefaultInit()
00133 {
00134
00135 AddDescriptorsParams();
00136 AddMinSegmentLength();
00137 UpdateData();
00138 Array<TDescriptorsParams> tmpArray(0);
00139 SetDescriptorsParams(tmpArray);
00140 SearchArray<TDescriptorsParams> tmpSearch(GetDescriptorsParams());
00141 SetDescriptorsSearch(tmpSearch);
00142
00143
00144 SetMinSegmentLength(0);
00145 };
00146
00147 void SegmentatorConfig::AddDescParams(const TDescriptorsParams& descParams)
00148 {
00149 TIndex position;
00150 if(GetDescriptorsParams().Size()==0)
00151 GetDescriptorsParams().AddElem(descParams);
00152 else if ((position=GetDescriptorsSearch().Find(descParams))==-1)
00153 {
00154 if(descParams<GetDescriptorsParams()[0])
00155 GetDescriptorsParams().InsertElem(0,descParams);
00156 else
00157 GetDescriptorsParams().AddElem(descParams);
00158 }
00159 else
00160 GetDescriptorsParams().InsertElem(position,descParams);
00161 }
00162
00163
00164 bool SegmentatorConfig::FindDescParams(TDescriptorsParams& descParams)
00165 {
00166 int pos;
00167 if((pos=GetDescriptorsSearch().Find(descParams))!=-1)
00168 {
00169 descParams.percentil=GetDescriptorsParams()[pos].percentil;
00170 descParams.threshold=GetDescriptorsParams()[pos].threshold;
00171 return true;
00172 }
00173 return false;
00174 }
00175
00176 void SegmentatorConfig::ClearDescParams() {
00177 GetDescriptorsParams().Init();
00178 }
00179
00183
00184 Segmentator::Segmentator()
00185 {
00186 Configure(SegmentatorConfig());
00187 }
00188
00189 Segmentator::Segmentator(const SegmentatorConfig& c)
00190 {
00191 Configure(c);
00192 }
00193
00194 Segmentator::~Segmentator()
00195 {
00196 }
00197
00198 bool Segmentator::ConcreteConfigure(const ProcessingConfig& c)
00199 {
00200 CopyAsConcreteConfig(mConfig, c);
00201 return true;
00202 }
00203
00204 bool Segmentator::Do()
00205 {
00206 CLAM_DEBUG_ASSERT(IsRunning(), "Segmentator: Do(): Not in execution mode");
00207
00208 CLAM_ASSERT(false, "Segmentator: Do(): Supervised mode not implemented");
00209
00210 return false;
00211 }
00212
00213
00214 bool Segmentator::Do(Segment& originalSegment,SegmentDescriptors& descriptors)
00215 {
00216 int nFrames=originalSegment.GetnFrames();
00217 Matrix descriptorsValues(mConfig.GetDescriptorsParams().Size(),nFrames);
00218 UnwrapDescriptors(originalSegment, descriptors,descriptorsValues);
00219 Algorithm(originalSegment,descriptorsValues);
00220 return true;
00221 }
00222
00223
00224 void Segmentator::UnwrapDescriptors(const Segment& originalSegment, SegmentDescriptors& descriptors,Matrix& descriptorsValues)
00225 {
00226 int nFrames=originalSegment.GetnFrames();
00227 int nDescriptors=mConfig.GetDescriptorsParams().Size();
00228 for(int i=0;i<nFrames;i++)
00229 {
00230
00231 int z=0;
00232 TData value;
00233 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMeanId)
00234 {
00235 value=descriptors.GetFrameD(i).GetSpectrumD().GetMean();
00236 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00237 descriptorsValues.SetAt(z,i,value);
00238 else descriptorsValues.SetAt(z,i,0);
00239 z++;
00240 }
00241 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralGeometricMeanId )
00242 {
00243 value=descriptors.GetFrameD(i).GetSpectrumD().GetGeometricMean();
00244 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00245 descriptorsValues.SetAt(z,i,value);
00246 else descriptorsValues.SetAt(z,i,0);
00247 z++;
00248 }
00249 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralEnergyId)
00250 {
00251 value=descriptors.GetFrameD(i).GetSpectrumD().GetEnergy();
00252 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00253 descriptorsValues.SetAt(z,i,value);
00254 else descriptorsValues.SetAt(z,i,0);
00255 z++;
00256 }
00257 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralCentroidId )
00258 {
00259 value=descriptors.GetFrameD(i).GetSpectrumD().GetCentroid();
00260 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00261 descriptorsValues.SetAt(z,i,value);
00262 else descriptorsValues.SetAt(z,i,0);
00263 z++;
00264 }
00265 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment2Id)
00266 {
00267 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment2();
00268 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00269 descriptorsValues.SetAt(z,i,value);
00270 else descriptorsValues.SetAt(z,i,0);
00271 z++;
00272 }
00273 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment3Id)
00274 {
00275 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment3();
00276 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00277 descriptorsValues.SetAt(z,i,value);
00278 else descriptorsValues.SetAt(z,i,0);
00279 z++;
00280 }
00281 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment4Id)
00282 {
00283 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment4();
00284 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00285 descriptorsValues.SetAt(z,i,value);
00286 else descriptorsValues.SetAt(z,i,0);
00287 z++;
00288 }
00289 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment5Id)
00290 {
00291 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment5();
00292 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00293 descriptorsValues.SetAt(z,i,value);
00294 else descriptorsValues.SetAt(z,i,0);
00295 z++;
00296 }
00297 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment6Id)
00298 {
00299 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment6();
00300 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00301 descriptorsValues.SetAt(z,i,value);
00302 else descriptorsValues.SetAt(z,i,0);
00303 z++;
00304 }
00305 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralFlatnessId )
00306 {
00307 value=descriptors.GetFrameD(i).GetSpectrumD().GetFlatness();
00308 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00309 descriptorsValues.SetAt(z,i,value);
00310 else descriptorsValues.SetAt(z,i,0);
00311 z++;
00312 }
00313 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralKurtosisId )
00314 {
00315 value=descriptors.GetFrameD(i).GetSpectrumD().GetMagnitudeKurtosis();
00316 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00317 descriptorsValues.SetAt(z,i,value);
00318 else descriptorsValues.SetAt(z,i,0);
00319 z++;
00320 }
00321 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==FundamentalId )
00322 {
00323 value=originalSegment.GetFrame(i).GetFundamental().GetFreq();
00324 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00325 descriptorsValues.SetAt(z,i,value);
00326 else descriptorsValues.SetAt(z,i,0);
00327 z++;
00328 }
00329 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioEnergyId)
00330 {
00331 value=descriptors.GetFrameD(i).GetAudioFrameD().GetEnergy();
00332 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00333 descriptorsValues.SetAt(z,i,value);
00334 else descriptorsValues.SetAt(z,i,0);
00335 z++;
00336 }
00337 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioVarianceId)
00338 {
00339 value=descriptors.GetFrameD(i).GetAudioFrameD().GetVariance();
00340 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00341 descriptorsValues.SetAt(z,i,value);
00342 else descriptorsValues.SetAt(z,i,0);
00343 z++;
00344 }
00345 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioCentroidId)
00346 {
00347 value=descriptors.GetFrameD(i).GetAudioFrameD().GetTemporalCentroid();
00348 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00349 descriptorsValues.SetAt(z,i,value);
00350 else descriptorsValues.SetAt(z,i,0);
00351 z++;
00352 }
00353 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioZeroCrossingRateId)
00354 {
00355 value=descriptors.GetFrameD(i).GetAudioFrameD().GetZeroCrossingRate();
00356 if(value>mConfig.GetDescriptorsParams()[z].threshold)
00357 descriptorsValues.SetAt(z,i,value);
00358 else descriptorsValues.SetAt(z,i,0);
00359 z++;
00360 }
00361 }
00362
00363
00364 }
00365
00366
00367 void Segmentator::Algorithm(Segment& s,const Matrix& values)
00368 {
00369
00370
00371
00372 int nFrames=s.GetnFrames();
00373 int nDescriptors=mConfig.GetDescriptorsParams().Size();
00374 SegmentBoundaries segmentBoundaries(nDescriptors);
00375
00376 segmentBoundaries.mArray.SetSize(nDescriptors);
00377
00378 for (int z=0;z<nDescriptors;z++)
00379 {
00380 segmentBoundaries.mArray[z].AddElem(std::pair<int,TData>(0,100));
00381 }
00382 for (int i=0; i<nFrames-4; i++)
00383 {
00384 for (int z=0;z<nDescriptors;z++)
00385 {
00386 const TData & x3 = values.GetAt(z,i+3);
00387 const TData & x2 = values.GetAt(z,i+2);
00388 const TData & x1 = values.GetAt(z,i+1);
00389 const TData & x0 = values.GetAt(z,i);
00390
00391 if (x2==0) continue;
00392
00393 const TData relevance = fabs((x3-x2)/x2);
00394 const TData & ratio = mConfig.GetDescriptorsParams()[z].percentil/100;
00395
00396
00397 if ((x3/x2)>(1+ratio) ||
00398 (x3/x2)<(1-ratio))
00399 {
00400
00401
00402
00403
00404
00405
00406
00407 if (( x3>x2 && x2>x1 && x1>x0 )||
00408 ( x3<x2 && x2<x1 && x1<x0 ))
00409 {
00410 std::pair<int,TData> tmpValue(i+3,relevance/ratio);
00411 segmentBoundaries.mArray[z].AddElem(tmpValue);
00412 }
00413 else if((x3/x2)>(1+2*ratio)||
00414 (x3/x2)<(1-2*ratio))
00415 {
00416 std::pair<int,TData> tmpValue(i+3,relevance/ratio);
00417 segmentBoundaries.mArray[z].AddElem(tmpValue);
00418 }
00419
00420 }
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449 }
00450 }
00451 DataFusion(s,segmentBoundaries);
00452 }
00453
00454 void Segmentator::DataFusion(Segment& s,const SegmentBoundaries& segmentBoundaries)
00455 {
00456
00457
00458
00459 const int nFrames=s.GetnFrames();
00460 const int nDescriptors=mConfig.GetDescriptorsParams().Size();
00461 TData duration=s.GetFrame(0).GetDuration();
00462 TData sampleRate=s.GetSamplingRate();
00463
00464
00465 Matrix probabilityMatrix(nFrames,nDescriptors);
00466 memset(probabilityMatrix.GetBuffer().GetPtr(),0,nFrames*nDescriptors*sizeof(TData));
00467
00468
00469 for (int z=0;z<nDescriptors;z++)
00470 {
00471 for (int n=0;n<segmentBoundaries.mArray[z].Size();n++)
00472 probabilityMatrix.SetAt(
00473 segmentBoundaries.mArray[z][n].first,
00474 z,
00475 segmentBoundaries.mArray[z][n].second);
00476 }
00477
00478
00479 Array<TData> globalProb;
00480 for (int n=0; n<nFrames; n++)
00481 {
00482 TData tmpProb=0;
00483 for(int z=0;z<nDescriptors;z++)
00484 {
00485 tmpProb+=probabilityMatrix.GetAt(n,z);
00486 }
00487 globalProb.AddElem(tmpProb);
00488 }
00489
00490
00491
00492
00493
00494 Array<TData> prob_fusion(globalProb);
00495 {
00496 int n=0;
00497 while(globalProb[n]<=0)
00498 n++;
00499 TData mag=globalProb[n];
00500 TData gcenter=n*globalProb[n];
00501 prob_fusion[n]=0;
00502 for (int m=n+1; m<globalProb.Size(); m++)
00503 {
00504 if (globalProb[m]<=0) continue;
00505 if ((m-n)>mConfig.GetMinSegmentLength())
00506 {
00507
00508 prob_fusion[(int)(gcenter/mag)]=mag;
00509 mag=0;
00510 gcenter=0;
00511 }
00512 mag+=globalProb[m];
00513 gcenter+=m*globalProb[m];
00514 prob_fusion[m]=0;
00515 n=m;
00516 }
00517 }
00518
00519 TData max=0;
00520 for (int n=0; n<prob_fusion.Size(); n++)
00521 if (prob_fusion[n]>max)
00522 max=prob_fusion[n];
00523 for (int n=0; n<prob_fusion.Size(); n++)
00524
00525 if (prob_fusion[n]<=(max/100))
00526 prob_fusion[n]=0;
00527
00528 Array<TData> finalSegments;
00529 for (int n=0; n<prob_fusion.Size(); n++)
00530 {
00531 if (prob_fusion[n]>0)
00532 finalSegments.AddElem(n*duration*sampleRate);
00533 }
00534
00535
00536
00537 if (finalSegments.Size()<=0) return;
00538
00539 for (int n=0; n<(finalSegments.Size()-1); n++)
00540 {
00541 Segment tmpSegment;
00542 tmpSegment.SetBeginTime(finalSegments[n] /sampleRate);
00543 tmpSegment.SetEndTime (finalSegments[n+1]/sampleRate);
00544 tmpSegment.SetpParent(&s);
00545 tmpSegment.SetHoldsData( false );
00546 s.GetChildren().AddElem(tmpSegment);
00547 }
00548
00549 Segment tmpSegment;
00550 tmpSegment.SetBeginTime(finalSegments[finalSegments.Size()-1] /sampleRate);
00551 tmpSegment.SetEndTime(s.GetAudio().GetEndTime());
00552 tmpSegment.SetpParent(&s);
00553 s.GetChildren().AddElem(tmpSegment);
00554
00555 }
00556