ChordExtractor.hxx
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef ChordExtractor_hxx
00023 #define ChordExtractor_hxx
00024
00025 #include "DiscontinuousSegmentation.hxx"
00026 #include "ChordSegmentator.hxx"
00027 #include "ChordCorrelator.hxx"
00028 #include "CircularPeakPicking.hxx"
00029 #include "CircularPeaksToPCP.hxx"
00030 #include "CircularPeakTunner.hxx"
00031 #include "ConstantQFolder.hxx"
00032 #include "ConstantQTransform.hxx"
00033 #include "FourierTransform.hxx"
00034 #include "InstantTunningEstimator.hxx"
00035 #include "SemitoneCenterFinder.hxx"
00036 #include "PCPSmother.hxx"
00037
00038 namespace Simac
00039 {
00040
00041 class ChordExtractor
00042 {
00043 double _sparseConstantQKernelThreshold;
00044 ConstantQTransform _constantQTransform;
00045 ConstantQFolder _constantQFolder;
00046 FourierTransform _fourierTransform;
00047 CircularPeakPicking _circularPeakPicking;
00048 InstantTunningEstimator _instantTunningEstimator;
00049 CircularPeakTunner _circularPeakTunner;
00050 CircularPeaksToPCP _circularPeaksToPCP;
00051 PCPSmother _filter;
00052 ChordCorrelator _chordCorrelator;
00053 ChordSegmentator _chordSegmentator;
00054 bool _tunningEnabled;
00055 bool _peakWindowingEnabled;
00056 double _hopRatio;
00057 unsigned _estimatedChord;
00058 unsigned _secondCandidate;
00059 double _squaredRootEnergy;
00060 public:
00061 static double maximumFrequency(double sampleRate) { return sampleRate/2.1; }
00062 typedef float * AudioFrame;
00063
00064 ChordExtractor(unsigned sampleRate=44100, double minimumFrequency=98, unsigned binsPerOctave=36)
00065 : _sparseConstantQKernelThreshold(0.0054)
00066 , _constantQTransform(sampleRate, minimumFrequency, maximumFrequency(sampleRate), binsPerOctave)
00067 , _constantQFolder(_constantQTransform.getK(), binsPerOctave)
00068 , _fourierTransform(_constantQTransform.getfftlength(),1,0)
00069 , _circularPeakPicking(binsPerOctave, 12.0/binsPerOctave)
00070 , _instantTunningEstimator( 1.0)
00071 , _circularPeakTunner( 0.0)
00072 , _filter(0.7)
00073 , _tunningEnabled(true)
00074 , _peakWindowingEnabled(true)
00075 , _hopRatio(8.0)
00076 , _estimatedChord(0)
00077 , _secondCandidate(0)
00078 {
00079 _constantQTransform.sparsekernel(_sparseConstantQKernelThreshold);
00080 if (_peakWindowingEnabled)
00081 _circularPeaksToPCP.activateWindowing();
00082 }
00083 ~ChordExtractor()
00084 {
00085 }
00086
00087
00088 void filterInertia(double inertia)
00089 {
00090 _filter.inertia(inertia);
00091 }
00092 void enableTunning(bool tunningEnabled=true) { _tunningEnabled=tunningEnabled; }
00093 void enablePeakWindowing(bool peakWindowingEnabled=true) { _peakWindowingEnabled=peakWindowingEnabled; }
00094 void hopRatio(double hopRatio) { _hopRatio=hopRatio; }
00095 void segmentationMethod(double segmentationMethod) { _chordSegmentator.method(segmentationMethod); }
00096
00097 unsigned hop() const {return _constantQTransform.getfftlength()/_hopRatio;}
00098 unsigned frameSize() const {return _constantQTransform.getfftlength();}
00099
00100 void doIt(const AudioFrame & input, CLAM::TData & currentTime)
00101 {
00102 _squaredRootEnergy = 0.0;
00103 for (unsigned i=0; i<frameSize(); i++)
00104 _squaredRootEnergy += input[i]*input[i];
00105
00106 _fourierTransform.doIt(input);
00107 _constantQTransform.doIt(_fourierTransform.spectrum());
00108 _constantQFolder.doIt(_constantQTransform.constantQSpectrum());
00109 _circularPeakPicking.doIt(_constantQFolder.chromagram());
00110 _instantTunningEstimator.doIt(_circularPeakPicking.output());
00111 _circularPeakTunner.doIt(_instantTunningEstimator.output().first, _circularPeakPicking.output());
00112 if (_tunningEnabled)
00113 _circularPeaksToPCP.doIt(_circularPeakTunner.output());
00114 else
00115 _circularPeaksToPCP.doIt(_circularPeakPicking.output());
00116 _filter.doIt(_circularPeaksToPCP.output());
00117 _chordCorrelator.doIt(_filter.output());
00118 estimateChord(_chordCorrelator.output());
00119 _chordSegmentator.doIt(currentTime, _chordCorrelator.output(), _estimatedChord, _secondCandidate);
00120 }
00121 void estimateChord(const ChordCorrelator::ChordCorrelation & correlation)
00122 {
00123 double maxCorrelation = 0;
00124 double underMaxCorrelation = 0;
00125 unsigned maxIndex = 0;
00126 unsigned underMaxIndex = 0;
00127 for (unsigned i=0; i<correlation.size(); i++)
00128 {
00129 if (correlation[i]<underMaxCorrelation) continue;
00130 if (correlation[i]<maxCorrelation)
00131 {
00132 underMaxIndex=i;
00133 underMaxCorrelation=correlation[i];
00134 continue;
00135 }
00136 underMaxIndex=maxIndex;
00137 underMaxCorrelation=maxCorrelation;
00138 maxIndex=i;
00139 maxCorrelation=correlation[i];
00140 }
00141 _estimatedChord = maxIndex;
00142 _secondCandidate = underMaxIndex;
00143 }
00144 std::string chordRepresentation(unsigned chordIndex) const
00145 {
00146 return _chordCorrelator.chordRepresentation(chordIndex);
00147 }
00148 std::string root(unsigned chordIndex) const
00149 {
00150 return _chordCorrelator.root(chordIndex);
00151 }
00152 std::string mode(unsigned chordIndex) const
00153 {
00154 return _chordCorrelator.mode(chordIndex);
00155 }
00156 const std::string chordEstimation() const
00157 {
00158 const ChordCorrelator::ChordCorrelation & correlation = _chordCorrelator.output();
00159 double maxCorrelation=correlation[_estimatedChord];
00160 double underMaxCorrelation=correlation[_secondCandidate];
00161 if (maxCorrelation*0.7<=correlation[0]) return "None";
00162 bool estimationIsClear = maxCorrelation*0.9>underMaxCorrelation;
00163 std::ostringstream os;
00164 os << _chordCorrelator.chordRepresentation(_estimatedChord);
00165 if (!estimationIsClear)
00166 os << " [or "<< _chordCorrelator.chordRepresentation(_secondCandidate)<< "]";
00167 os << " (" << (correlation[0]/maxCorrelation) << ")";
00168 if (!estimationIsClear)
00169 os << " (" << (underMaxCorrelation/(underMaxCorrelation+maxCorrelation)) << ")";
00170 return os.str();
00171 }
00172 const std::vector<double> & chromagram() const
00173 {
00174 return _constantQFolder.chromagram();
00175 }
00176 const std::vector<double> & pcp() const
00177 {
00178 return _circularPeaksToPCP.output();
00179 }
00180 const std::vector<std::pair<double, double> > & peaks() const
00181 {
00182 return _circularPeakPicking.output();
00183 }
00184 const std::vector<double> & chordCorrelation() const
00185 {
00186 return _chordCorrelator.output();
00187 }
00188 const CLAM::DiscontinuousSegmentation & segmentation() const
00189 {
00190 return _chordSegmentator.segmentation();
00191 }
00192 const std::vector<unsigned> & chordIndexes() const
00193 {
00194 return _chordSegmentator.chordIndexes();
00195 }
00196 void clear()
00197 {
00198 _chordSegmentator.eraseAllSegments();
00199 }
00200 void closeLastSegment(CLAM::TData currentTime)
00201 {
00202 _chordSegmentator.closeLastSegment(currentTime);
00203 }
00204 double tunning() const {return _instantTunningEstimator.output().first; }
00205 double tunningStrength() const {return _instantTunningEstimator.output().second; }
00206 std::pair<double,double> instantTunning() const {return _instantTunningEstimator.instantTunning(); }
00207 double energy() const {return _squaredRootEnergy; }
00208 unsigned firstCandidate() const {return _estimatedChord;}
00209 unsigned secondCandidate() const {return _secondCandidate;}
00210 std::vector<double> spectrum() const {return _fourierTransform.spectrum(); }
00211
00212 };
00213 }
00214
00215 #endif//ChordExtractor
00216