# this is a minor variation (and I think improvement) on the template matching idea. rather
# that score a note as the dot product between spectrum and template, we use the
# the dot product of the log of the template and spectrum. this has a probabilistic interpretation
# presented in class. one can also think of this as heavily penalizing any note hypothesis
# having spectral energy where it "doesn't belong" since in such a case we are multiplying by
# log(near 0) = very negative.
lop = 58 # the lowest possible midi pitch
hip = 89 # the highest
N = 1024 # num samples per analysis frame
sr = 8000 # the sampling rate
wid = 2 # the "width" (really standard deviation) of peak
decay = 4 # describes how amplitude decays for progressively higher harmonics
tplt = matrix(.01,hip,N/2) # each row corresp to template
for (p in lop:hip) { # for each possible pitch
f = 440*2^((p-69)/12) # freq of pitch in hz
b = f*N/sr # FFT bin number of pitch (could be real-valued)
for (h in 1:15) { # assume 15 possible harmonics
tplt[p,] = tplt[p,] + 2^(-h/decay)*dnorm(1:(N/2),mean=b*h,sd = wid) # peak is normal shaped
}
tplt[p,] = tplt[p,]/sum(tplt[p,]) # normalize to sum to one
# plot(tplt[p,],type='l') # sanity check
# scan() # wait for human to type key
}
library(tuneR)
t = seq(from=0,by=2*pi/N,length=N) # N evenly spaced pts 0 -- 2*pi
win = (1 + cos(t-pi))/2 # the "raised cosine" or "Hanning" window
#w = readWave("sound/chrom.wav") # take actual sound data from file
#w = readWave("sound/octaves.wav") # take actual sound data from file
#w = readWave("sound/bass_oboe.wav") # take actual sound data from file
#w = readWave("sound/hummelflug.wav") # take actual sound data from file
w = readWave("sound/debussy_nocturnes_fete.wav") # take actual sound data from file
sr = w@samp.rate # grab sampling rate from Wave struct
# (try str(w) to see parts of Wave struct)
m = rep(0,hip) # save data likelihoods here
y = w@left # grab the left channel (or only channel if mono)
y = y[1:230000]
frames = length(y)/N -1
hat = rep(0,frames) # save pitch estimates here
for (f in 1:frames) {
s = f*N # start of frame
grain = win*y[s:(s+N-1)] # just analyze short segment of audio
Y = fft(grain) # take the fft
for (p in lop:hip) m[p] = sum(log(tplt[p,])*Mod(Y[1:(N/2)])) # m[p] is "score" of pth note template
hat[f] = lop -1 + which.max(m[lop:hip]) # take best scoring template as pitch estimate
print(hat[f])
}
plot(hat)
t = 0
for (i in 1:frames) {
v = 2*pi*440.*2^((hat[i]-69)/12)/sr
t = c(t,rep(v,N))
}
z = sin(cumsum(t))
u = Wave(round((2^11)*z), samp.rate = sr, bit=16) # make wave struct
play(u,"play") # play it