# a program to use dynamic programming to perform a score match between a short audio
# fragment and a list of pitches. we create an array lat[i,j] of dimensions #notes x #frames
# we consider all paths through this array that start at lat[1,1] = in first note at first frame
# to lat[#notes,#frames] = in last note at last frame. each path can only move either horizontally
# or diagonally down, meaning (i,j) --> (i,j+1) or (i,j) --> (i+1,j+1). In other words, we
# can only move forward through the collection of notes and no note can be skipped, though each
# note has arbitrary length. each note hypothese at each frame is scored with the appropriate
# data likelihood. The optimal path is found with dynamic programming
lop = 61 # the lowest possible pitch
hip = 86 # the highest possible pitch
# lop denotes rest and we use the midi meaning of lop+1 ... hip
list = c(lop, 86, 85, 84, 83, 84, 83,
82, 81, 82, 81, 80, 79, 78,
77, 76, 75, 74, 73, 72, 71,
72, 71, 70, 69, 70, 69, 68,
67, 66, 65, 64, 63, 62, lop) # pitches to 1st 4 measures of hummelflug
library(tuneR)
w = readWave("sound/hummelflug.wav")
bits = w@bit
sr = w@samp.rate
y = w@left
y = y[35000:70000] # this is audio corresp to 1st 4 measures
u = Wave(y, samp.rate = sr, bit=bits) # make wave struct
play(u,"play")
N = 512
skiplen = N/2
frames = floor(length(y)/skiplen)
wid = 1 # create templates for each note. This is exactly as before
decay = 4
tplt = matrix(.01,hip,N/2)
for (j in lop:hip) {
f = 440*2^((j-69)/12)
b = f*N/sr
for (h in 1:15) {
if (j == lop) next # lop is the rest = flat template
tplt[j,] = tplt[j,] + 2^(-h/decay)*dnorm(1:(N/2),mean=b*h,sd = wid)
}
tplt[j,] = tplt[j,]/sum(tplt[j,])
plot(tplt[j,],type='l')
# scan() # wait for human to type key
}
lat = matrix(0,length(list),frames) # the array that holds the dynamic programming scores
best = matrix(0,length(list),frames) # the array of best predecessors. best[i][j] will be either
# i (if the note persists at j) or i-1 (if the note changes at j)
lat[2:length(list),1] = -Inf # impossible to start anywhere but first note
t = seq(from=0,by=2*pi/N,length=N)
win = (1 + cos(t-pi))/2
for (j in 2:frames) { # main loop of program (for all frames)
s = (j-1)*skiplen +1 # start of frame
grain = win*y[s:(s+N-1)] # the frame
Y = fft(grain) # take the fft
for (i in 1:length(list)) { # for all notes (states)
if (i > 1) pred = c(i-1,i) # the possible predecessor notes
else pred = c(i)
lat[i,j] = -Inf
for (ii in pred) if (lat[ii,j-1] > lat[i,j]) { # if predecessor better than best so far ..
lat[i,j] = lat[ii,j-1] # dynamic programming score
best[i,j] = ii # remember optimal predecessor
}
lat[i,j] = lat[i,j] + sum(log(tplt[list[i],])*Mod(Y[1:(N/2)])) # score all notes by data model
}
}
hat = rep(0,frames) # the optimal parse
hat[frames] = length(list) # know the optimal last note
for (j in frames:2) hat[j-1] = best[hat[j],j] # trace back optimal parse
#plot(hat)
plot(list[hat])
for (j in 1:(frames-1)) if (hat[j] != hat[j+1] && hat[j]%%4 == 1) y[j*skiplen] = 32000
#for (j in 1:(frames-1)) if (hat[j] != hat[j+1] ) y[j*skiplen] = 32000
# put in clicks every 4th note in audio
u = Wave(y, samp.rate = sr, bit=bits)
play(u,"play")