using CSV, PyPlot
traindata = CSV.read("mnist_train.csv"; datarow = 1, delim=',', types=fill(UInt8,785));
testdata = CSV.read("mnist_test.csv"; datarow = 1, delim=',', types=fill(UInt8,785));
traindata = traindata[1:300,:]
#testdata = testdata[1:300,:]
n_train,d = size(traindata)
trainY = Int8.(traindata[:,1])
trainX = ones(n_train,d)
n_test,d = size(testdata)
testY = Int8.(testdata[:,1])
testX = ones(n_test,d)
for i=2:d
trainX[:,i] = Float32.(traindata[:,i])
testX[:,i] = Float32.(testdata[:,i])
end
figure
num = 10
for i=1:(num*num)
subplot(num,num,i)
imshow(reshape(trainX[i,2:end],28,28)', cmap="gray")
axis("Off")
end
println(Int8.(traindata[1:(num*num),1]))
using Convex, ECOS
include("to_one_hot.jl")
X = trainX
m = 10
y = zeros(Int8, n_train, m)
for i=1:n_train
y[i,trainY[i]+1] = 1
end
d = size(X,2)
K = size(y,2)
m = K
theta = Variable(d,m)
yhat = X*theta
cost = 0
for i=1:n_train
yhati = yhat[i,:]
yi = y[i,:]
Mi = yhati - yhati*yi
one_plus_Mi = pos( ones(1,m)-reshape(yi,1,m) + Mi )
cost += maximum(one_plus_Mi)
end
cost += 1.0*vecnorm(theta[2:end,:])
problem = minimize(cost);
solve!(problem, ECOSSolver(verbose=1))
theta_opt = evaluate(theta)
C_train = zeros(K,K)
yhat = trainX*theta_opt
vhat = zeros(Int8, n_train)
for i=1:n_train
maxval, vhat[i] = findmax(yhat[i,:])
C_train[vhat[i],trainY[i]+1] += 1
end
C_test = zeros(K,K)
yhat = testX*theta_opt
vhat = zeros(Int8, n_test)
for i=1:n_test
maxval, vhat[i] = findmax(yhat[i,:])
C_test[vhat[i],testY[i]+1] += 1
end
figure
subplot(121)
imshow(C_train, cmap="gray")
axis("Off")
title("Train confusion matrix")
subplot(122)
imshow(C_test, cmap="gray")
axis("Off")
title("Test confusion matrix")
Error_rate_train = (sum(C_train) - sum(diag(C_train)))/sum(C_train)
Error_rate_test = (sum(C_test) - sum(diag(C_test)))/sum(C_test)
println(Error_rate_test)
C_test