#   Copyright 2004 The Apache Software Foundation
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#
#-----------------------------------------------------------------------
# R source file to validate Binomial distribution tests in
# org.apache.commons.math.stat.regression.SimpleRegressionTest
#
# To run the test, install R, put this file and testFunctions
# into the same directory, launch R from this directory and then enter
# source("<name-of-this-file>")
#
# Output will be written to a file named "regTestResults"
# in the directory from which R was launched
#
#------------------------------------------------------------------------------
tol <- 1E-8
#------------------------------------------------------------------------------
# Function definitions

source("testFunctions")           # utility test functions
#------------------------------------------------------------------------------
# infData example

cat("Regresssion test cases\n")

x <- c(15.6, 26.8,37.8,36.4,35.5,18.6,15.3,7.9,0.0)
y <- c(5.2, 6.1, 8.7, 8.5, 8.8, 4.9, 4.5, 2.5, 1.1)
model<-lm(y~x)
coef <- coefficients(summary(model))
intercept <- coef[1, 1]
interceptStd <- coef[1, 2]
slope <- coef[2, 1]
slopeStd <- coef[2, 2]
significance <- coef[2, 4]

output <- "InfData std error test"
if (assertEquals(0.011448491, slopeStd, tol, "Slope Standard Error") &&
    assertEquals(0.286036932, interceptStd, tol, "Intercept Standard Error")) {
    displayPadded(output, SUCCEEDED, WIDTH)
} else {
    displayPadded(output, FAILED, WIDTH)
}

output <- "InfData significance test"
if (assertEquals(4.596e-07, significance, tol, "Significance")) {
    displayPadded(output, SUCCEEDED, WIDTH)
} else {
    displayPadded(output, FAILED, WIDTH)
}     
 
output <- "InfData conf interval test"
ci<-confint(model)
# ci[1,1] = lower 2.5% bound for intercept, ci[1,2] = upper 97.5% for intercept
# ci[2,1] = lower 2.5% bound for slope,     ci[2,2] = upper 97.5% for slope
halfWidth <- ci[2,2] - slope
if (assertEquals(0.0270713794287, halfWidth, tol, 
   "Slope conf. interval half-width")) {
    displayPadded(output, SUCCEEDED, WIDTH)
} else {
    displayPadded(output, FAILED, WIDTH)
}       
#------------------------------------------------------------------------------
# Norris dataset from NIST examples

y <- c(0.1, 338.8, 118.1, 888.0, 9.2, 228.1, 668.5, 998.5, 449.1, 778.9, 559.2,
0.3, 0.1, 778.1, 668.8, 339.3, 448.9, 10.8, 557.7, 228.3, 998.0, 888.8, 119.6,
0.3, 0.6, 557.6, 339.3, 888.0, 998.5, 778.9, 10.2, 117.6, 228.9, 668.4, 449.2,
0.2)
x <- c(0.2, 337.4, 118.2, 884.6, 10.1, 226.5, 666.3, 996.3, 448.6, 777.0, 558.2,
0.4, 0.6, 775.5, 666.9, 338.0, 447.5, 11.6, 556.0, 228.1, 995.8, 887.6, 120.2,
0.3, 0.3, 556.8, 339.1, 887.2, 999.0, 779.0, 11.1, 118.3, 229.2, 669.1, 448.9,
0.5)
model<-lm(y~x)
coef <- coefficients(summary(model))
intercept <- coef[1, 1]
interceptStd <- coef[1, 2]
slope <- coef[2, 1]
slopeStd <- coef[2, 2]

output <- "Norris std error test"
if (assertEquals(0.429796848199937E-03, slopeStd, tol, "Slope Standard Error")
    && assertEquals(0.232818234301152, interceptStd, tol,
   "Intercept Standard Error")) {
    displayPadded(output, SUCCEEDED, WIDTH)
} else {
    displayPadded(output, FAILED, WIDTH)
}
#------------------------------------------------------------------------------
# infData2 -- bad fit example
#
x <- c(1,2,3,4,5,6)
y <- c(1,0,5,2,-1,12)
model<-lm(y~x)
coef <- coefficients(summary(model))
intercept <- coef[1, 1]
interceptStd <- coef[1, 2]
slope <- coef[2, 1]
slopeStd <- coef[2, 2]
significance <- coef[2, 4]

output <- "InfData2 std error test"
if (assertEquals(1.07260253, slopeStd, tol, "Slope Standard Error") &&
    assertEquals(4.17718672, interceptStd, tol, "Intercept Standard Error")) {
    displayPadded(output, SUCCEEDED, WIDTH)
} else {
    displayPadded(output, FAILED, WIDTH)
}

output <- "InfData2 significance test"
if (assertEquals(0.261829133982, significance, tol, "Significance")) {
    displayPadded(output, SUCCEEDED, WIDTH)
} else {
    displayPadded(output, FAILED, WIDTH)
}  

output <- "InfData2 conf interval test"
ci<-confint(model)
# ci[1,1] = lower 2.5% bound for intercept, ci[1,2] = upper 97.5% for intercept
# ci[2,1] = lower 2.5% bound for slope,     ci[2,2] = upper 97.5% for slope
halfWidth <- ci[2,2] - slope
if (assertEquals(2.97802204827, halfWidth, tol, 
   "Slope conf. interval half-width")) {
    displayPadded(output, SUCCEEDED, WIDTH)
} else {
    displayPadded(output, FAILED, WIDTH)
} 
#------------------------------------------------------------------------------
# Correlation example

x <- c(101.0, 100.1, 100.0, 90.6, 86.5, 89.7, 90.6, 82.8, 70.1, 65.4,
       61.3, 62.5, 63.6, 52.6, 59.7, 59.5, 61.3)
y <- c(99.2, 99.0, 100.0, 111.6, 122.2, 117.6, 121.1, 136.0, 154.2, 153.6,
       158.5, 140.6, 136.2, 168.0, 154.3, 149.0, 165.5)  

output <- "Correlation test"
if (assertEquals(-0.94663767742, cor(x,y, method="pearson"), tol, 
   "Correlation coefficient")) {
    displayPadded(output, SUCCEEDED, WIDTH)
} else {
    displayPadded(output, FAILED, WIDTH)
}
 
displayDashes(WIDTH)