1 module dnum.stats;
2 
3 import dnum.tensor;
4 import dnum.utils;
5 import std.math : sqrt;
6 
7 // =============================================================================
8 // Basic Statistics
9 // =============================================================================
10 /++
11   Summation
12 +/
13 pure double sum(Tensor t) {
14   double s = 0;
15   foreach (rows; t.data) {
16     foreach (elem; rows) {
17       s += elem;
18     }
19   }
20   return s;
21 }
22 
23 /++
24   Parallel Sum - Effective to many row vector
25 +/
26 double psum(Tensor t) {
27   import std.parallelism : taskPool;
28 
29   double s = 0;
30   foreach (rows; t.data) {
31     s += taskPool.reduce!"a + b"(rows);
32   }
33   return s;
34 }
35 
36 /++
37   Mean (Whole)
38 +/
39 pure double mean(Tensor t) {
40   double s = 0;
41   double l = 0;
42   foreach (rows; t.data) {
43     foreach (elem; rows) {
44       l++;
45       s += elem;
46     }
47   }
48   return s / l;
49 }
50 
51 /++
52   Covariance (Only Single Vector)
53 +/
54 pure double cov(Tensor t1, Tensor t2) {
55   // Column Vector
56   if (t1.isCol && t2.isCol) {
57     assert(t1.nrow == t2.nrow, "Can compare only same length tensor");
58     
59     auto l = t1.nrow;
60     double mx = 0;
61     double my = 0;
62     double c = 0;
63 
64     foreach (i; 0 .. t1.nrow) {
65       mx += t1[i,0];
66       my += t2[i,0];
67       c += t1[i, 0] * t2[i, 0];
68     }
69     return (c / l - (mx * my) / l^^2) * l / (l - 1);
70   } else if (t1.isRow && t2.isRow) {
71     assert(t1.ncol == t2.ncol, "Can compare only same length tensor");
72 
73     auto l = t1.ncol;
74     double mx = 0;
75     double my = 0;
76     double c = 0;
77 
78     foreach (i; 0 .. t1.ncol) {
79       mx += t1[0,i];
80       my += t2[0,i];
81       c += t1[0,i] * t2[0,i];
82     }
83     return (c / l - (mx * my) / l^^2) * l / (l - 1);
84   } else {
85     assert(false, "Can't compare matrices or different dimension vectors directly");
86   }
87 }
88 
89 /++
90   Covariance Tensor
91 +/
92 Tensor cov(Tensor t, Shape byRow = Shape.Col) {
93   // Default : Consider columns as data sets
94   switch (byRow) with (Shape) {
95     case Col:
96       auto cols = t.ncol;
97       auto ten = Tensor(cols, cols);
98 
99       foreach (i; 0 .. cols) {
100         foreach (j; 0 .. cols) {
101           ten[i, j] = cov(t.col(i), t.col(j));
102         }
103       }
104       return ten;
105     default:
106       auto rows = t.nrow;
107       auto ten = Tensor(rows, rows);
108 
109       foreach (i; 0 .. rows) {
110         foreach (j; 0 .. rows) {
111           ten[i,j] = cov(t.row(i), t.row(j));
112         }
113       }
114       return ten;
115   }
116 }
117 
118 /++
119   Variance
120 +/
121 double var(Tensor t) {
122   return cov(t, t);
123 }
124 
125 /++
126   Standard Deviation
127 +/
128 double std(Tensor t) {
129   return t.var.sqrt;
130 }
131 
132 /++
133   Correlation
134 +/
135 double cor(Tensor t1, Tensor t2) {
136   return cov(t1, t2) / (t1.std * t2.std);
137 }
138 
139 /++
140   Generic Correlation
141 +/
142 Tensor cor(Tensor t, Shape byRow = Shape.Col) {
143   switch (byRow) with (Shape) {
144     case Col:
145       auto cols = t.ncol;
146       auto container = Tensor(cols,cols);
147 
148       foreach (i; 0 .. cols) {
149         foreach (j; 0 .. cols) {
150           container[i,j] = cor(t.col(i), t.col(j));
151         }
152       }
153       return container;
154     default:
155       auto rows = t.nrow;
156       auto container = Tensor(rows, rows);
157 
158       foreach (i; 0 .. rows) {
159         foreach (j; 0 .. rows) {
160           container[i, j] = cor(t.row(i), t.row(j));
161         }
162       }
163       return container;
164   }
165 }
166 
167 // =============================================================================
168 // Column or Row Statistics
169 // =============================================================================
170 /++
171   Column Sum
172 +/
173 Tensor csum(Tensor t) {
174   auto temp = Tensor(0, 1, t.ncol);
175 
176   foreach (rows; t.data) {
177     temp = temp + Tensor(rows);
178   }
179   return temp;
180 }
181 
182 /++
183   Row Sum
184 +/
185 Tensor rsum(Tensor t) {
186   auto temp = Tensor(t.nrow, 1);
187 
188   foreach (i, ref row; temp.data) {
189     double s = 0;
190     auto memrow = t.data[i][];
191     foreach (k; 0 .. t.ncol) {
192       s += memrow[k];
193     }
194     row[0] = s;
195   }
196   return temp;
197 }
198 
199 /++
200   Column Mean
201 +/
202 Tensor cmean(Tensor t) {
203   auto temp = Tensor(0, 1, t.ncol);
204 
205   foreach (rows; t.data) {
206     temp = temp + Tensor(rows);
207   }
208   return temp / t.nrow;
209 }
210 
211 /++
212   Row Mean
213 +/
214 Tensor rmean(Tensor t) {
215   auto temp = Tensor(t.nrow, 1);
216 
217   foreach (i, ref row; temp.data) {
218     double s = 0;
219     auto memrow = t.data[i][];
220     foreach (k; 0 .. t.ncol) {
221       s += memrow[k];
222     }
223     row[0] = s;
224   }
225   return temp / t.ncol;
226 }
227 
228 // =============================================================================
229 // Statistical Learning
230 // =============================================================================
231 /++
232   Simple Linear Regression
233 +/
234 struct Linear {
235   double slope;
236   double intercept;
237 
238   this(Tensor X, Tensor Y) {
239     assert(X.nrow == Y.nrow);
240 
241     double n = cast(double)X.nrow;
242 
243     this.slope = (n * sum(X * Y) - sum(X) * sum(Y)) / (n*sum(X^^2) - sum(X)^^2);
244     this.intercept = 1/n * (sum(Y) - this.slope * sum(X));
245   }
246 
247   Tensor opCall(Tensor x) {
248     assert(x.isCol || x.isRow);
249     return slope * x + intercept;
250   }
251 }