using CSV, DataFrames, Convex, ECOS, PyPlot
include("to_one_hot.jl")
datatypes = Dict(1=>Float64, 2=>Float64, 3=>Float64, 4=>Float64, 5=>Int8)
irisdata = CSV.read("iris.csv", delim=',', types=datatypes)
U = irisdata[:, 1:end-1]
v = irisdata[:, end] + 1
n,d = size(U)
X = ones(n);
for i=1:d
X = [X U[:,i]]
end
##X = [X U[:,1].*U[:,3]]
##X = [X U[:,1].*U[:,4]]
##X = [X U[:,3].*U[:,4]]
y = to_one_hot(v)
d = size(X,2)
K = size(y,2)
m = K
theta = Variable(d,m)
yhat1 = X[v.==1,:]*theta
yhat2 = X[v.==2,:]*theta
yhat3 = X[v.==3,:]*theta
M21 = yhat1[:,2] - yhat1[:,1]
M31 = yhat1[:,3] - yhat1[:,1]
M32 = yhat2[:,3] - yhat2[:,2]
M12 = yhat2[:,1] - yhat2[:,2]
M13 = yhat3[:,1] - yhat3[:,3]
M23 = yhat3[:,2] - yhat3[:,3]
# hinge loss
cost = sum( max( pos(1 + M21) , pos(1 + M31) ) );
cost += sum( max( pos(1 + M32) , pos(1 + M12) ) );
cost += sum( max( pos(1 + M13) , pos(1 + M23) ) );
# logistic loss
#=
cost = logsumexp([zeros(50) M21 M31]);
cost += logsumexp([zeros(50) M32 M12]);
cost += logsumexp([zeros(50) M13 M23]);
=#
problem = minimize(cost);
solve!(problem, ECOSSolver(verbose=1))
theta_opt = evaluate(theta)
C = zeros(K,K)
yhat = X*theta_opt
vhat = zeros(Int8, n)
for i=1:n
maxval, vhat[i] = findmax(yhat[i,:])
C[vhat[i],v[i]] += 1
end
C