# simulate data from a simple 2-state hmm with two possible outputs for every state.
# one can think of alternating between two coins (according to a MC) that have different
# probabilites of giving heads. Use DP to find the most likely sequence of hidden states
# (ie most reasonable labeling of which coin was used) given the data (and the model)
# compute the error rate and plot some sample output.
# This is a *generic* implementation, in the sense that one can use this code to specify
# any HMM and observation sequence by setting
# 1) transition probability matrix Q,
# 2) observation or "output" probabilities R,
# 3) initial distribution p,
# 4) data sequence y
# and then find the most likely sequence of states using DP.
L = 2 # number of states states are 1 .. L
M = 2 # observables -- can observe 1 ... M
N = 100 # number of observations
Q = matrix(c(.9,.1,.1,.9),L,L,byrow=T); # transition prob matrix Q[i,j] is
# prob of gooing from i to j (tend to stay in states for
# long time
#R = matrix(c(.5,.5,.1,.9),L,M,byrow=T); # R[i,j] is prob of observing j when
R = matrix(c(.9,.1,.1,.9),L,M,byrow=T); # R[i,j] is prob of observing j when
# in state i. state 1 is "fair" coin and state 2 is "biased" coin
p = rep(1/L,L); # initial state probs (chosen to be uniform = "we have no idea")
x = rep(0,N); # will contain the true (unobserved) state sequence
xhat = rep(0,N); # will contain estimated state sequence based on y (and model)
y = rep(0,N); # will contain observed sequence
# synthesize the hidden states and observables from the model
x[1] = sample(1:L,1,prob=p); # generate x[1] from initial dist
y[1] = sample(1:M,1,prob=R[x[1],]) # generate y[1] from appropriate cond dist
for (n in 2:N) { # gererate x and y from HMM model
x[n] = sample(1:L,1,prob=Q[x[n-1],])
y[n] = sample(1:M,1,prob=R[x[n],])
}
# this is almost exactly the previous DP implementation used for HMM recognition.
# the only difference is that the score of a path is now the
# *product* of the arcs traversed, rather than the sum
optscore = matrix(0,L,N) # optscore[s,n] is best score trough latice
# to state s at time n
optpred = matrix(0,L,N) # optpred[s,n] optimal predecessor state of state
# s at time n. so optpred[s,n] is state at time n-1
for (s in 1:L) optscore[s,1] = p[s]*R[s,y[1]]; # p(x_1 = s)p(y_1 | x_1= s)
for (n in 2:N) { # for each observation
for (s in 1:L) { # for each possible state
for (r in 1:L) { # for each possible predecessor state
z = optscore[r,n-1]*Q[r,s]*R[s,y[n]]; # optimal score if we come to s from r
if (z > optscore[s,n]) { # if best so far
optscore[s,n] = z;
optpred[s,n] = r;
}
}
}
optscore[,n] = optscore[,n] / sum(optscore[,n]) # rescale to avoid underflow
}
xhat[N] = which.max(optscore[,N]) # best final state
for (n in (N-1):1) xhat[n] = optpred[xhat[n+1],n+1]; # follow pointers back
# through trellis
print("error rate is:")
print(sum(x!=xhat)/N)
K = 50 # number to plot
#color = 1 + (x!=xhat);
#plot(y[1:K],col=color);
plot(1:K,sample(c(1,2,3),K,replace=T),type="n");
points(1:K,rep(3,K),col=1+xhat[1:K]);
text(-1,3,labels="xhat");
points(1:K,rep(2,K),col=1+x[1:K]);
text(-1,2,labels="x");
points(1:K,rep(1,K),col=3+y[1:K]);
text(-1,1,labels="y");