# Copyright 2004 The Apache Software Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # #----------------------------------------------------------------------- # R source file to validate Binomial distribution tests in # org.apache.commons.math.stat.regression.SimpleRegressionTest # # To run the test, install R, put this file and testFunctions # into the same directory, launch R from this directory and then enter # source("") # # Output will be written to a file named "regTestResults" # in the directory from which R was launched # #------------------------------------------------------------------------------ tol <- 1E-8 #------------------------------------------------------------------------------ # Function definitions source("testFunctions") # utility test functions #------------------------------------------------------------------------------ # infData example cat("Regresssion test cases\n") x <- c(15.6, 26.8,37.8,36.4,35.5,18.6,15.3,7.9,0.0) y <- c(5.2, 6.1, 8.7, 8.5, 8.8, 4.9, 4.5, 2.5, 1.1) model<-lm(y~x) coef <- coefficients(summary(model)) intercept <- coef[1, 1] interceptStd <- coef[1, 2] slope <- coef[2, 1] slopeStd <- coef[2, 2] significance <- coef[2, 4] output <- "InfData std error test" if (assertEquals(0.011448491, slopeStd, tol, "Slope Standard Error") && assertEquals(0.286036932, interceptStd, tol, "Intercept Standard Error")) { displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) } output <- "InfData significance test" if (assertEquals(4.596e-07, significance, tol, "Significance")) { displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) } output <- "InfData conf interval test" ci<-confint(model) # ci[1,1] = lower 2.5% bound for intercept, ci[1,2] = upper 97.5% for intercept # ci[2,1] = lower 2.5% bound for slope, ci[2,2] = upper 97.5% for slope halfWidth <- ci[2,2] - slope if (assertEquals(0.0270713794287, halfWidth, tol, "Slope conf. interval half-width")) { displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) } #------------------------------------------------------------------------------ # Norris dataset from NIST examples y <- c(0.1, 338.8, 118.1, 888.0, 9.2, 228.1, 668.5, 998.5, 449.1, 778.9, 559.2, 0.3, 0.1, 778.1, 668.8, 339.3, 448.9, 10.8, 557.7, 228.3, 998.0, 888.8, 119.6, 0.3, 0.6, 557.6, 339.3, 888.0, 998.5, 778.9, 10.2, 117.6, 228.9, 668.4, 449.2, 0.2) x <- c(0.2, 337.4, 118.2, 884.6, 10.1, 226.5, 666.3, 996.3, 448.6, 777.0, 558.2, 0.4, 0.6, 775.5, 666.9, 338.0, 447.5, 11.6, 556.0, 228.1, 995.8, 887.6, 120.2, 0.3, 0.3, 556.8, 339.1, 887.2, 999.0, 779.0, 11.1, 118.3, 229.2, 669.1, 448.9, 0.5) model<-lm(y~x) coef <- coefficients(summary(model)) intercept <- coef[1, 1] interceptStd <- coef[1, 2] slope <- coef[2, 1] slopeStd <- coef[2, 2] output <- "Norris std error test" if (assertEquals(0.429796848199937E-03, slopeStd, tol, "Slope Standard Error") && assertEquals(0.232818234301152, interceptStd, tol, "Intercept Standard Error")) { displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) } #------------------------------------------------------------------------------ # infData2 -- bad fit example # x <- c(1,2,3,4,5,6) y <- c(1,0,5,2,-1,12) model<-lm(y~x) coef <- coefficients(summary(model)) intercept <- coef[1, 1] interceptStd <- coef[1, 2] slope <- coef[2, 1] slopeStd <- coef[2, 2] significance <- coef[2, 4] output <- "InfData2 std error test" if (assertEquals(1.07260253, slopeStd, tol, "Slope Standard Error") && assertEquals(4.17718672, interceptStd, tol, "Intercept Standard Error")) { displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) } output <- "InfData2 significance test" if (assertEquals(0.261829133982, significance, tol, "Significance")) { displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) } output <- "InfData2 conf interval test" ci<-confint(model) # ci[1,1] = lower 2.5% bound for intercept, ci[1,2] = upper 97.5% for intercept # ci[2,1] = lower 2.5% bound for slope, ci[2,2] = upper 97.5% for slope halfWidth <- ci[2,2] - slope if (assertEquals(2.97802204827, halfWidth, tol, "Slope conf. interval half-width")) { displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) } #------------------------------------------------------------------------------ # Correlation example x <- c(101.0, 100.1, 100.0, 90.6, 86.5, 89.7, 90.6, 82.8, 70.1, 65.4, 61.3, 62.5, 63.6, 52.6, 59.7, 59.5, 61.3) y <- c(99.2, 99.0, 100.0, 111.6, 122.2, 117.6, 121.1, 136.0, 154.2, 153.6, 158.5, 140.6, 136.2, 168.0, 154.3, 149.0, 165.5) output <- "Correlation test" if (assertEquals(-0.94663767742, cor(x,y, method="pearson"), tol, "Correlation coefficient")) { displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) } displayDashes(WIDTH)