In [1]:
# load diabetes.data

using CSV

Diabete_Data = CSV.read("diabetes.data"; datarow = 2, delim='\t',types=fill(Float64,11));

n,d = size(Diabete_Data);
d = d-1;

Diabete_Data
Out[1]:
AGESEXBMIBPS1S2S3S4S5S6Y
159.02.032.1101.0157.093.238.04.04.859887.0151.0
248.01.021.687.0183.0103.270.03.03.891869.075.0
372.02.030.593.0156.093.641.04.04.672885.0141.0
424.01.025.384.0198.0131.440.05.04.890389.0206.0
550.01.023.0101.0192.0125.452.04.04.290580.0135.0
623.01.022.689.0139.064.861.02.04.189768.097.0
736.02.022.090.0160.099.650.03.03.951282.0138.0
866.02.026.2114.0255.0185.056.04.554.248592.063.0
960.02.032.183.0179.0119.442.04.04.477394.0110.0
1029.01.030.085.0180.093.443.04.05.384588.0310.0
1122.01.018.697.0114.057.646.02.03.951283.0101.0
1256.02.028.085.0184.0144.832.06.03.583577.069.0
1353.01.023.792.0186.0109.262.03.04.304181.0179.0
1450.02.026.297.0186.0105.449.04.05.062688.0185.0
1561.01.024.091.0202.0115.472.03.04.290573.0118.0
1634.02.024.7118.0254.0184.239.07.05.03781.0171.0
1747.01.030.3109.0207.0100.270.03.05.214998.0166.0
1868.02.027.5111.0214.0147.039.05.04.941691.0144.0
1938.01.025.484.0162.0103.042.04.04.442787.097.0
2041.01.024.783.0187.0108.260.03.04.543378.0168.0
2135.01.021.182.0156.087.850.03.04.510995.068.0
2225.02.024.395.0162.098.654.03.03.850187.049.0
2325.01.026.092.0187.0120.456.03.03.970388.068.0
2461.02.032.0103.67210.085.235.06.06.107124.0245.0
2531.01.029.788.0167.0103.448.04.04.356778.0184.0
2630.02.025.283.0178.0118.434.05.04.85283.0202.0
2719.01.019.287.0124.054.057.02.04.174490.0137.0
2842.01.031.983.0158.087.653.03.04.4659101.085.0
2963.01.024.473.0160.091.448.03.04.634778.0131.0
3067.02.025.8113.0158.054.264.02.05.2933104.0283.0
In [2]:
# compute the optimal theta

X = zeros(n,d);
y = zeros(n,1);

for i=1:d
    X[:,i] = Diabete_Data[:,i];
end
X = [ones(n,1) X];
y = Diabete_Data[:,end];

theta_opt = X\y
Out[2]:
11-element Array{Float64,1}:
 -334.567    
   -0.0363612
  -22.8596   
    5.60296  
    1.11681  
   -1.09     
    0.74645  
    0.372005 
    6.53383  
   68.4831   
    0.280117 
In [3]:
# achieved loss

MSE = norm(X*theta_opt-y,2)^2/n
Out[3]:
2859.6963475867506
In [4]:
# plot

using PyPlot;

figure();
plot(y,y,"k");
plot(y,X*theta_opt,"o",alpha=0.4);
xlabel("y");
ylabel("predicted y")
axis("square");
grid("on");
/usr/local/lib/python2.7/dist-packages/matplotlib/cbook/deprecation.py:107: MatplotlibDeprecationWarning: Passing one of 'on', 'true', 'off', 'false' as a boolean is deprecated; use an actual boolean (True/False) instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)
In [5]:
# put JHK data into your predictor

#age
#sex
#bmi
#map	   mean arterial pressure				
#s1  tc :   total cholesterol
#s2  ldl    low density lipoprotein
#s3  hdl    high density lipoprotein
#s4  tch
#s5  ltg
#s6  glu
#        age  sex  bmi    map   tc     ldl    hdl     tch   ltg   glu
X_JHK = [41   1    18.3   90    171    80.0   74.9    2     4.75  90.0];
X_JHK = [1 X_JHK];

y_JHK = X_JHK*theta_opt
Out[5]:
1-element Array{Float64,1}:
 108.892
In [6]:
# plot

using PyPlot;

figure();
plot(y,y,"k");
plot(y,X*theta_opt,"o",alpha=0.4);
plot(y_JHK,y_JHK,"ro",alpha=0.4)
xlabel("y");
ylabel("predicted y")
axis("square");
grid("on");
In [ ]: