You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
282 lines
8.7 KiB
282 lines
8.7 KiB
/*
|
|
* ====================================================================
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for Additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
* ====================================================================
|
|
*/
|
|
|
|
namespace NPOI.SS.Formula.Functions
|
|
{
|
|
using NPOI.SS.Formula;
|
|
using NPOI.SS.Formula.Eval;
|
|
using System;
|
|
|
|
|
|
/**
|
|
* Base class for linear regression functions.
|
|
*
|
|
* Calculates the linear regression line that is used to predict y values from x values<br/>
|
|
* (http://introcs.cs.princeton.edu/java/97data/LinearRegression.java.html)
|
|
* <b>Syntax</b>:<br/>
|
|
* <b>INTERCEPT</b>(<b>arrayX</b>, <b>arrayY</b>)<p/>
|
|
* or
|
|
* <b>SLOPE</b>(<b>arrayX</b>, <b>arrayY</b>)<p/>
|
|
*
|
|
*
|
|
* @author Johan Karlsteen
|
|
*/
|
|
public class LinearRegressionFunction : Fixed2ArgFunction
|
|
{
|
|
|
|
private abstract class ValueArray : ValueVector
|
|
{
|
|
private readonly int _size;
|
|
protected ValueArray(int size)
|
|
{
|
|
_size = size;
|
|
}
|
|
|
|
public ValueEval GetItem(int index)
|
|
{
|
|
if (index < 0 || index > _size)
|
|
{
|
|
throw new ArgumentException("Specified index " + index
|
|
+ " is outside range (0.." + (_size - 1) + ")");
|
|
}
|
|
return GetItemInternal(index);
|
|
}
|
|
protected abstract ValueEval GetItemInternal(int index);
|
|
|
|
public int Size
|
|
{
|
|
get
|
|
{
|
|
return _size;
|
|
}
|
|
}
|
|
}
|
|
|
|
private sealed class SingleCellValueArray : ValueArray
|
|
{
|
|
private readonly ValueEval _value;
|
|
public SingleCellValueArray(ValueEval value)
|
|
: base(1)
|
|
{
|
|
_value = value;
|
|
}
|
|
|
|
protected override ValueEval GetItemInternal(int index)
|
|
{
|
|
return _value;
|
|
}
|
|
}
|
|
|
|
private sealed class RefValueArray : ValueArray
|
|
{
|
|
private readonly RefEval _ref;
|
|
private readonly int _width;
|
|
public RefValueArray(RefEval ref1)
|
|
: base(ref1.NumberOfSheets)
|
|
{
|
|
_ref = ref1;
|
|
_width = ref1.NumberOfSheets;
|
|
}
|
|
|
|
protected override ValueEval GetItemInternal(int index)
|
|
{
|
|
int sIx = (index % _width) + _ref.FirstSheetIndex;
|
|
return _ref.GetInnerValueEval(sIx);
|
|
}
|
|
}
|
|
|
|
private sealed class AreaValueArray : ValueArray
|
|
{
|
|
private readonly TwoDEval _ae;
|
|
private readonly int _width;
|
|
|
|
public AreaValueArray(TwoDEval ae)
|
|
: base(ae.Width * ae.Height)
|
|
{
|
|
_ae = ae;
|
|
_width = ae.Width;
|
|
}
|
|
|
|
protected override ValueEval GetItemInternal(int index)
|
|
{
|
|
int rowIx = index / _width;
|
|
int colIx = index % _width;
|
|
return _ae.GetValue(rowIx, colIx);
|
|
}
|
|
}
|
|
|
|
public enum FUNCTION { INTERCEPT, SLOPE };
|
|
public FUNCTION function;
|
|
|
|
public LinearRegressionFunction(FUNCTION function)
|
|
{
|
|
this.function = function;
|
|
}
|
|
|
|
|
|
public override ValueEval Evaluate(int srcRowIndex, int srcColumnIndex,
|
|
ValueEval arg0, ValueEval arg1)
|
|
{
|
|
double result;
|
|
try
|
|
{
|
|
ValueVector vvY = CreateValueVector(arg0);
|
|
ValueVector vvX = CreateValueVector(arg1);
|
|
int size = vvX.Size;
|
|
if (size == 0 || vvY.Size != size)
|
|
{
|
|
return ErrorEval.NA;
|
|
}
|
|
result = EvaluateInternal(vvX, vvY, size);
|
|
}
|
|
catch (EvaluationException e)
|
|
{
|
|
return e.GetErrorEval();
|
|
}
|
|
if (Double.IsNaN(result) || Double.IsInfinity(result))
|
|
{
|
|
return ErrorEval.NUM_ERROR;
|
|
}
|
|
return new NumberEval(result);
|
|
}
|
|
|
|
private double EvaluateInternal(ValueVector x, ValueVector y, int size)
|
|
{
|
|
|
|
// error handling is as if the x is fully Evaluated before y
|
|
ErrorEval firstXerr = null;
|
|
ErrorEval firstYerr = null;
|
|
bool accumlatedSome = false;
|
|
// first pass: read in data, compute xbar and ybar
|
|
double sumx = 0.0, sumy = 0.0;
|
|
|
|
for (int i = 0; i < size; i++)
|
|
{
|
|
ValueEval vx = x.GetItem(i);
|
|
ValueEval vy = y.GetItem(i);
|
|
if (vx is ErrorEval eval)
|
|
{
|
|
if (firstXerr == null)
|
|
{
|
|
firstXerr = eval;
|
|
continue;
|
|
}
|
|
}
|
|
if (vy is ErrorEval errorEval)
|
|
{
|
|
if (firstYerr == null)
|
|
{
|
|
firstYerr = errorEval;
|
|
continue;
|
|
}
|
|
}
|
|
// only count pairs if both elements are numbers
|
|
if (vx is NumberEval nx && vy is NumberEval ny)
|
|
{
|
|
accumlatedSome = true;
|
|
sumx += nx.NumberValue;
|
|
sumy += ny.NumberValue;
|
|
}
|
|
else
|
|
{
|
|
// all other combinations of value types are silently ignored
|
|
}
|
|
}
|
|
double xbar = sumx / size;
|
|
double ybar = sumy / size;
|
|
|
|
// second pass: compute summary statistics
|
|
double xxbar = 0.0, xybar = 0.0;
|
|
for (int i = 0; i < size; i++)
|
|
{
|
|
ValueEval vx = x.GetItem(i);
|
|
ValueEval vy = y.GetItem(i);
|
|
|
|
if (vx is ErrorEval eval)
|
|
{
|
|
if (firstXerr == null)
|
|
{
|
|
firstXerr = eval;
|
|
continue;
|
|
}
|
|
}
|
|
if (vy is ErrorEval errorEval)
|
|
{
|
|
if (firstYerr == null)
|
|
{
|
|
firstYerr = errorEval;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// only count pairs if both elements are numbers
|
|
if (vx is NumberEval nx && vy is NumberEval ny)
|
|
{
|
|
xxbar += (nx.NumberValue - xbar) * (nx.NumberValue - xbar);
|
|
xybar += (nx.NumberValue - xbar) * (ny.NumberValue - ybar);
|
|
}
|
|
else
|
|
{
|
|
// all other combinations of value types are silently ignored
|
|
}
|
|
}
|
|
double beta1 = xybar / xxbar;
|
|
double beta0 = ybar - beta1 * xbar;
|
|
|
|
if (firstXerr != null)
|
|
{
|
|
throw new EvaluationException(firstXerr);
|
|
}
|
|
if (firstYerr != null)
|
|
{
|
|
throw new EvaluationException(firstYerr);
|
|
}
|
|
if (!accumlatedSome)
|
|
{
|
|
throw new EvaluationException(ErrorEval.DIV_ZERO);
|
|
}
|
|
|
|
if (function == FUNCTION.INTERCEPT)
|
|
{
|
|
return beta0;
|
|
}
|
|
else
|
|
{
|
|
return beta1;
|
|
}
|
|
}
|
|
|
|
private static ValueVector CreateValueVector(ValueEval arg)
|
|
{
|
|
if (arg is ErrorEval eval)
|
|
{
|
|
throw new EvaluationException(eval);
|
|
}
|
|
if (arg is TwoDEval dEval)
|
|
{
|
|
return new AreaValueArray(dEval);
|
|
}
|
|
if (arg is RefEval refEval)
|
|
{
|
|
return new RefValueArray(refEval);
|
|
}
|
|
return new SingleCellValueArray(arg);
|
|
}
|
|
}
|
|
}
|