forked from ibsh/libKeyFinder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkeyfinder.cpp
145 lines (123 loc) · 4.75 KB
/
keyfinder.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#include "keyfinder.h"
namespace KeyFinder {
KeyDetectionResult KeyFinder::keyOfAudio(
const AudioData& originalAudio,
const Parameters& params
) {
Workspace workspace;
progressiveChromagram(originalAudio, workspace, params);
finalChromagram(workspace, params);
return keyOfChromagram(workspace, params);
}
void KeyFinder::progressiveChromagram(
AudioData audio,
Workspace& workspace,
const Parameters& params
) {
preprocess(audio, workspace, params);
workspace.buffer.append(audio);
chromagramOfBufferedAudio(workspace, params);
}
void KeyFinder::finalChromagram(
Workspace& workspace,
const Parameters& params
) {
// zero padding
unsigned int paddedHopCount = ceil(workspace.buffer.getSampleCount() / (float)params.getHopSize());
unsigned int finalSampleLength = params.getFftFrameSize() + ((paddedHopCount - 1) * params.getHopSize());
workspace.buffer.addToSampleCount(finalSampleLength - workspace.buffer.getSampleCount());
chromagramOfBufferedAudio(workspace, params);
}
KeyDetectionResult KeyFinder::keyOfChromagram(
Workspace& workspace,
const Parameters& params
) const {
KeyDetectionResult result;
// working copy of chromagram
Chromagram* ch = new Chromagram(*workspace.chroma);
ch->reduceToOneOctave();
// get harmonic change signal and segment
Segmentation segmenter;
std::vector<unsigned int> segmentBoundaries = segmenter.getSegmentationBoundaries(ch, params);
segmentBoundaries.push_back(ch->getHops()); // sentinel
// get key estimates for each segment
KeyClassifier classifier(
params.getSimilarityMeasure(),
params.getToneProfile(),
params.getOffsetToC(),
params.getCustomToneProfile()
);
std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash?
for (int s = 0; s < (signed) segmentBoundaries.size() - 1; s++) {
KeyDetectionResultSegment segment;
segment.firstHop = segmentBoundaries[s];
segment.lastHop = segmentBoundaries[s+1] - 1;
// collapse segment's time dimension
std::vector<float> segmentChroma(ch->getBands(), 0.0);
for (unsigned int hop = segment.firstHop; hop <= segment.lastHop; hop++) {
for (unsigned int band = 0; band < ch->getBands(); band++) {
float value = ch->getMagnitude(hop, band);
segmentChroma[band] += value;
segment.energy += value;
}
}
segment.chromaVector = segmentChroma;
segment.key = classifier.classify(segmentChroma);
if (segment.key != SILENCE)
keyWeights[segment.key] += segment.energy;
result.segments.push_back(segment);
}
delete ch;
// get global key
result.globalKeyEstimate = SILENCE;
float mostCommonKeyWeight = 0.0;
for (int k = 0; k < (signed)keyWeights.size(); k++) {
if (keyWeights[k] > mostCommonKeyWeight) {
mostCommonKeyWeight = keyWeights[k];
result.globalKeyEstimate = (key_t)k;
}
}
return result;
}
void KeyFinder::preprocess(
AudioData& workingAudio,
Workspace& workspace,
const Parameters& params
) {
workingAudio.reduceToMono();
// TODO: there is presumably some good maths to determine filter frequencies.
// For now, this approximates original experiment values for default params.
float lpfCutoff = params.getLastFrequency() * 1.012;
float dsCutoff = params.getLastFrequency() * 1.10;
unsigned int downsampleFactor = (int) floor(workingAudio.getFrameRate() / 2 / dsCutoff);
// get filter
const LowPassFilter* lpf = lpfFactory.getLowPassFilter(160, workingAudio.getFrameRate(), lpfCutoff, 2048);
lpf->filter(workingAudio, workspace, downsampleFactor); // downsampleFactor shortcut
// note we don't delete the LPF; it's stored in the factory for reuse
workingAudio.downsample(downsampleFactor);
}
void KeyFinder::chromagramOfBufferedAudio(
Workspace& workspace,
const Parameters& params
) {
if (workspace.getFftAdapter() == NULL)
workspace.setFftAdapter(new FftAdapter(params.getFftFrameSize()));
SpectrumAnalyser sa(workspace.buffer.getFrameRate(), params, ctFactory);
Chromagram* c = sa.chromagramOfWholeFrames(workspace.buffer, workspace.getFftAdapter());
// deal with tuning if necessary
if (c->getBandsPerSemitone() > 1) {
if (params.getTuningMethod() == TUNING_BAND_ADAPTIVE) {
c->tuningBandAdaptive(params.getDetunedBandWeight());
} else if (params.getTuningMethod() == TUNING_HARTE) {
c->tuningHarte();
}
}
workspace.buffer.discardFramesFromFront(params.getHopSize() * c->getHops());
if (workspace.chroma == NULL) {
workspace.chroma = c;
} else {
workspace.chroma->append(*c);
delete c;
}
}
}