a .NET library that can read/write Office formats without Microsoft Office installed. No COM+, no interop.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

281 lines
8.7 KiB

  1. /*
  2. * ====================================================================
  3. * Licensed to the Apache Software Foundation (ASF) under one or more
  4. * contributor license agreements. See the NOTICE file distributed with
  5. * this work for Additional information regarding copyright ownership.
  6. * The ASF licenses this file to You under the Apache License, Version 2.0
  7. * (the "License"); you may not use this file except in compliance with
  8. * the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. * ====================================================================
  18. */
  19. namespace NPOI.SS.Formula.Functions
  20. {
  21. using NPOI.SS.Formula;
  22. using NPOI.SS.Formula.Eval;
  23. using System;
  24. /**
  25. * Base class for linear regression functions.
  26. *
  27. * Calculates the linear regression line that is used to predict y values from x values<br/>
  28. * (http://introcs.cs.princeton.edu/java/97data/LinearRegression.java.html)
  29. * <b>Syntax</b>:<br/>
  30. * <b>INTERCEPT</b>(<b>arrayX</b>, <b>arrayY</b>)<p/>
  31. * or
  32. * <b>SLOPE</b>(<b>arrayX</b>, <b>arrayY</b>)<p/>
  33. *
  34. *
  35. * @author Johan Karlsteen
  36. */
  37. public class LinearRegressionFunction : Fixed2ArgFunction
  38. {
  39. private abstract class ValueArray : ValueVector
  40. {
  41. private readonly int _size;
  42. protected ValueArray(int size)
  43. {
  44. _size = size;
  45. }
  46. public ValueEval GetItem(int index)
  47. {
  48. if (index < 0 || index > _size)
  49. {
  50. throw new ArgumentException("Specified index " + index
  51. + " is outside range (0.." + (_size - 1) + ")");
  52. }
  53. return GetItemInternal(index);
  54. }
  55. protected abstract ValueEval GetItemInternal(int index);
  56. public int Size
  57. {
  58. get
  59. {
  60. return _size;
  61. }
  62. }
  63. }
  64. private sealed class SingleCellValueArray : ValueArray
  65. {
  66. private readonly ValueEval _value;
  67. public SingleCellValueArray(ValueEval value)
  68. : base(1)
  69. {
  70. _value = value;
  71. }
  72. protected override ValueEval GetItemInternal(int index)
  73. {
  74. return _value;
  75. }
  76. }
  77. private sealed class RefValueArray : ValueArray
  78. {
  79. private readonly RefEval _ref;
  80. private readonly int _width;
  81. public RefValueArray(RefEval ref1)
  82. : base(ref1.NumberOfSheets)
  83. {
  84. _ref = ref1;
  85. _width = ref1.NumberOfSheets;
  86. }
  87. protected override ValueEval GetItemInternal(int index)
  88. {
  89. int sIx = (index % _width) + _ref.FirstSheetIndex;
  90. return _ref.GetInnerValueEval(sIx);
  91. }
  92. }
  93. private sealed class AreaValueArray : ValueArray
  94. {
  95. private readonly TwoDEval _ae;
  96. private readonly int _width;
  97. public AreaValueArray(TwoDEval ae)
  98. : base(ae.Width * ae.Height)
  99. {
  100. _ae = ae;
  101. _width = ae.Width;
  102. }
  103. protected override ValueEval GetItemInternal(int index)
  104. {
  105. int rowIx = index / _width;
  106. int colIx = index % _width;
  107. return _ae.GetValue(rowIx, colIx);
  108. }
  109. }
  110. public enum FUNCTION { INTERCEPT, SLOPE };
  111. public FUNCTION function;
  112. public LinearRegressionFunction(FUNCTION function)
  113. {
  114. this.function = function;
  115. }
  116. public override ValueEval Evaluate(int srcRowIndex, int srcColumnIndex,
  117. ValueEval arg0, ValueEval arg1)
  118. {
  119. double result;
  120. try
  121. {
  122. ValueVector vvY = CreateValueVector(arg0);
  123. ValueVector vvX = CreateValueVector(arg1);
  124. int size = vvX.Size;
  125. if (size == 0 || vvY.Size != size)
  126. {
  127. return ErrorEval.NA;
  128. }
  129. result = EvaluateInternal(vvX, vvY, size);
  130. }
  131. catch (EvaluationException e)
  132. {
  133. return e.GetErrorEval();
  134. }
  135. if (Double.IsNaN(result) || Double.IsInfinity(result))
  136. {
  137. return ErrorEval.NUM_ERROR;
  138. }
  139. return new NumberEval(result);
  140. }
  141. private double EvaluateInternal(ValueVector x, ValueVector y, int size)
  142. {
  143. // error handling is as if the x is fully Evaluated before y
  144. ErrorEval firstXerr = null;
  145. ErrorEval firstYerr = null;
  146. bool accumlatedSome = false;
  147. // first pass: read in data, compute xbar and ybar
  148. double sumx = 0.0, sumy = 0.0;
  149. for (int i = 0; i < size; i++)
  150. {
  151. ValueEval vx = x.GetItem(i);
  152. ValueEval vy = y.GetItem(i);
  153. if (vx is ErrorEval eval)
  154. {
  155. if (firstXerr == null)
  156. {
  157. firstXerr = eval;
  158. continue;
  159. }
  160. }
  161. if (vy is ErrorEval errorEval)
  162. {
  163. if (firstYerr == null)
  164. {
  165. firstYerr = errorEval;
  166. continue;
  167. }
  168. }
  169. // only count pairs if both elements are numbers
  170. if (vx is NumberEval nx && vy is NumberEval ny)
  171. {
  172. accumlatedSome = true;
  173. sumx += nx.NumberValue;
  174. sumy += ny.NumberValue;
  175. }
  176. else
  177. {
  178. // all other combinations of value types are silently ignored
  179. }
  180. }
  181. double xbar = sumx / size;
  182. double ybar = sumy / size;
  183. // second pass: compute summary statistics
  184. double xxbar = 0.0, xybar = 0.0;
  185. for (int i = 0; i < size; i++)
  186. {
  187. ValueEval vx = x.GetItem(i);
  188. ValueEval vy = y.GetItem(i);
  189. if (vx is ErrorEval eval)
  190. {
  191. if (firstXerr == null)
  192. {
  193. firstXerr = eval;
  194. continue;
  195. }
  196. }
  197. if (vy is ErrorEval errorEval)
  198. {
  199. if (firstYerr == null)
  200. {
  201. firstYerr = errorEval;
  202. continue;
  203. }
  204. }
  205. // only count pairs if both elements are numbers
  206. if (vx is NumberEval nx && vy is NumberEval ny)
  207. {
  208. xxbar += (nx.NumberValue - xbar) * (nx.NumberValue - xbar);
  209. xybar += (nx.NumberValue - xbar) * (ny.NumberValue - ybar);
  210. }
  211. else
  212. {
  213. // all other combinations of value types are silently ignored
  214. }
  215. }
  216. double beta1 = xybar / xxbar;
  217. double beta0 = ybar - beta1 * xbar;
  218. if (firstXerr != null)
  219. {
  220. throw new EvaluationException(firstXerr);
  221. }
  222. if (firstYerr != null)
  223. {
  224. throw new EvaluationException(firstYerr);
  225. }
  226. if (!accumlatedSome)
  227. {
  228. throw new EvaluationException(ErrorEval.DIV_ZERO);
  229. }
  230. if (function == FUNCTION.INTERCEPT)
  231. {
  232. return beta0;
  233. }
  234. else
  235. {
  236. return beta1;
  237. }
  238. }
  239. private static ValueVector CreateValueVector(ValueEval arg)
  240. {
  241. if (arg is ErrorEval eval)
  242. {
  243. throw new EvaluationException(eval);
  244. }
  245. if (arg is TwoDEval dEval)
  246. {
  247. return new AreaValueArray(dEval);
  248. }
  249. if (arg is RefEval refEval)
  250. {
  251. return new RefValueArray(refEval);
  252. }
  253. return new SingleCellValueArray(arg);
  254. }
  255. }
  256. }