1 module dnum.stats; 2 3 import dnum.tensor; 4 import dnum.utils; 5 import std.math : sqrt; 6 7 // ============================================================================= 8 // Basic Statistics 9 // ============================================================================= 10 /++ 11 Summation 12 +/ 13 pure double sum(Tensor t) { 14 double s = 0; 15 foreach (rows; t.data) { 16 foreach (elem; rows) { 17 s += elem; 18 } 19 } 20 return s; 21 } 22 23 /++ 24 Parallel Sum - Effective to many row vector 25 +/ 26 double psum(Tensor t) { 27 import std.parallelism : taskPool; 28 29 double s = 0; 30 foreach (rows; t.data) { 31 s += taskPool.reduce!"a + b"(rows); 32 } 33 return s; 34 } 35 36 /++ 37 Mean (Whole) 38 +/ 39 pure double mean(Tensor t) { 40 double s = 0; 41 double l = 0; 42 foreach (rows; t.data) { 43 foreach (elem; rows) { 44 l++; 45 s += elem; 46 } 47 } 48 return s / l; 49 } 50 51 /++ 52 Covariance (Only Single Vector) 53 +/ 54 pure double cov(Tensor t1, Tensor t2) { 55 // Column Vector 56 if (t1.isCol && t2.isCol) { 57 assert(t1.nrow == t2.nrow, "Can compare only same length tensor"); 58 59 auto l = t1.nrow; 60 double mx = 0; 61 double my = 0; 62 double c = 0; 63 64 foreach (i; 0 .. t1.nrow) { 65 mx += t1[i,0]; 66 my += t2[i,0]; 67 c += t1[i, 0] * t2[i, 0]; 68 } 69 return (c / l - (mx * my) / l^^2) * l / (l - 1); 70 } else if (t1.isRow && t2.isRow) { 71 assert(t1.ncol == t2.ncol, "Can compare only same length tensor"); 72 73 auto l = t1.ncol; 74 double mx = 0; 75 double my = 0; 76 double c = 0; 77 78 foreach (i; 0 .. t1.ncol) { 79 mx += t1[0,i]; 80 my += t2[0,i]; 81 c += t1[0,i] * t2[0,i]; 82 } 83 return (c / l - (mx * my) / l^^2) * l / (l - 1); 84 } else { 85 assert(false, "Can't compare matrices or different dimension vectors directly"); 86 } 87 } 88 89 /++ 90 Covariance Tensor 91 +/ 92 Tensor cov(Tensor t, Shape byRow = Shape.Col) { 93 // Default : Consider columns as data sets 94 switch (byRow) with (Shape) { 95 case Col: 96 auto cols = t.ncol; 97 auto ten = Tensor(cols, cols); 98 99 foreach (i; 0 .. cols) { 100 foreach (j; 0 .. cols) { 101 ten[i, j] = cov(t.col(i), t.col(j)); 102 } 103 } 104 return ten; 105 default: 106 auto rows = t.nrow; 107 auto ten = Tensor(rows, rows); 108 109 foreach (i; 0 .. rows) { 110 foreach (j; 0 .. rows) { 111 ten[i,j] = cov(t.row(i), t.row(j)); 112 } 113 } 114 return ten; 115 } 116 } 117 118 /++ 119 Variance 120 +/ 121 double var(Tensor t) { 122 return cov(t, t); 123 } 124 125 /++ 126 Standard Deviation 127 +/ 128 double std(Tensor t) { 129 return t.var.sqrt; 130 } 131 132 /++ 133 Correlation 134 +/ 135 double cor(Tensor t1, Tensor t2) { 136 return cov(t1, t2) / (t1.std * t2.std); 137 } 138 139 /++ 140 Generic Correlation 141 +/ 142 Tensor cor(Tensor t, Shape byRow = Shape.Col) { 143 switch (byRow) with (Shape) { 144 case Col: 145 auto cols = t.ncol; 146 auto container = Tensor(cols,cols); 147 148 foreach (i; 0 .. cols) { 149 foreach (j; 0 .. cols) { 150 container[i,j] = cor(t.col(i), t.col(j)); 151 } 152 } 153 return container; 154 default: 155 auto rows = t.nrow; 156 auto container = Tensor(rows, rows); 157 158 foreach (i; 0 .. rows) { 159 foreach (j; 0 .. rows) { 160 container[i, j] = cor(t.row(i), t.row(j)); 161 } 162 } 163 return container; 164 } 165 } 166 167 // ============================================================================= 168 // Column or Row Statistics 169 // ============================================================================= 170 /++ 171 Column Sum 172 +/ 173 Tensor csum(Tensor t) { 174 auto temp = Tensor(0, 1, t.ncol); 175 176 foreach (rows; t.data) { 177 temp = temp + Tensor(rows); 178 } 179 return temp; 180 } 181 182 /++ 183 Row Sum 184 +/ 185 Tensor rsum(Tensor t) { 186 auto temp = Tensor(t.nrow, 1); 187 188 foreach (i, ref row; temp.data) { 189 double s = 0; 190 auto memrow = t.data[i][]; 191 foreach (k; 0 .. t.ncol) { 192 s += memrow[k]; 193 } 194 row[0] = s; 195 } 196 return temp; 197 } 198 199 /++ 200 Column Mean 201 +/ 202 Tensor cmean(Tensor t) { 203 auto temp = Tensor(0, 1, t.ncol); 204 205 foreach (rows; t.data) { 206 temp = temp + Tensor(rows); 207 } 208 return temp / t.nrow; 209 } 210 211 /++ 212 Row Mean 213 +/ 214 Tensor rmean(Tensor t) { 215 auto temp = Tensor(t.nrow, 1); 216 217 foreach (i, ref row; temp.data) { 218 double s = 0; 219 auto memrow = t.data[i][]; 220 foreach (k; 0 .. t.ncol) { 221 s += memrow[k]; 222 } 223 row[0] = s; 224 } 225 return temp / t.ncol; 226 } 227 228 // ============================================================================= 229 // Statistical Learning 230 // ============================================================================= 231 /++ 232 Simple Linear Regression 233 +/ 234 struct Linear { 235 double slope; 236 double intercept; 237 238 this(Tensor X, Tensor Y) { 239 assert(X.nrow == Y.nrow); 240 241 double n = cast(double)X.nrow; 242 243 this.slope = (n * sum(X * Y) - sum(X) * sum(Y)) / (n*sum(X^^2) - sum(X)^^2); 244 this.intercept = 1/n * (sum(Y) - this.slope * sum(X)); 245 } 246 247 Tensor opCall(Tensor x) { 248 assert(x.isCol || x.isRow); 249 return slope * x + intercept; 250 } 251 }