This example uses a data set from Draper and Smith (1981, pp. 629-630). The method compute()
is invoked to find the best regression subset from the four candidate variables. The getSwept()
method is used to label the variables "in" or "out" of the final model.
import com.imsl.stat.*; import com.imsl.math.*; public class StepwiseRegressionEx1 { private static void print(String[] labels, ANOVA anova){ double[] values = anova.getArray(); for (int i=0; i<values.length-2; i++){ System.out.printf("%41s%s\n", labels[i], customDecimalFormat(values[i])); } System.out.println(); } private static String customDecimalFormat(double value) { java.text.DecimalFormat df1 = new java.text.DecimalFormat("##,###.###"); String s = df1.format(value); String[] toc = s.split("\\."); if (toc.length == 1) { s = String.format("%6s", s); } else { toc[0] = String.format("%6s", toc[0]); toc[1] = String.format("%-3s", toc[1]); s = toc[0] + "." + toc[1]; } return s; } public static void main(String[] args) throws Exception { double x[][] = { {7., 26., 6., 60.}, {1., 29., 15., 52.}, {11., 56., 8., 20.}, {11., 31., 8., 47.}, {7., 52., 6., 33.}, {11., 55., 9., 22.}, {3., 71., 17., 6.}, {1., 31., 22., 44.}, {2., 54., 18., 22.}, {21., 47., 4., 26}, {1., 40., 23., 34.}, {11., 66., 9., 12.}, {10.0, 68., 8., 12.} }; double y[] = { 78.5, 74.3, 104.3, 87.6, 95.9, 109.2, 102.7, 72.5, 93.1, 115.9, 83.8, 113.3, 109.4 }; String[] rowLabels = { "degrees of freedom for regression: ", "degrees of freedom for error: ", "total degrees of freedom: ", "sum of squares for regression: ", "sum of squares for error: ", "total sum of squares: ", "regression mean square: ", "error mean square: ", "F-statistic: ", "p-value: ", "R-squared (in percent): ", "adjusted R-squared (in percent): ", "est. standard deviation of within error: " }; StepwiseRegression sr = new StepwiseRegression(x, y); sr.compute(); System.out.printf("%20s%s\n", " ", "*** ANOVA ***"); System.out.printf("%45s%s\n", " ", "Value"); print(rowLabels, sr.getANOVA()); StepwiseRegression.CoefficientTTests coefT = sr.getCoefficientTTests(); double coef[][] = new double[4][4]; for (int i = 0; i < 4; i++) { coef[i][0] = coefT.getCoefficient(i); coef[i][1] = coefT.getStandardError(i); coef[i][2] = coefT.getTStatistic(i); coef[i][3] = coefT.getPValue(i); } String[] cLabels = {"Coef", "Std. Err", "T-Stat", "p-Value"}; PrintMatrix pm = new PrintMatrix(); PrintMatrixFormat pmf = new PrintMatrixFormat(); pmf.setColumnLabels(cLabels); pm.setTitle("*** Coef *** "); pm.print(pmf, coef); pm.setTitle("*** History *** "); pm.print(sr.getHistory()); pm.setTitle("*** VIF *** "); pm.print(sr.getCoefficientVIF()); pm.setTitle("*** CovS *** "); pm.print(sr.getCovariancesSwept()); System.out.println("*** Intercept *** " + sr.getIntercept()); } }
*** ANOVA *** Value degrees of freedom for regression: 2 degrees of freedom for error: 10 total degrees of freedom: 12 sum of squares for regression: 2,641.001 sum of squares for error: 74.762 total sum of squares: 2,715.763 regression mean square: 1,320.5 error mean square: 7.476 F-statistic: 176.627 p-value: 0 R-squared (in percent): 97.247 adjusted R-squared (in percent): 96.697 est. standard deviation of within error: 2.734 *** Coef *** Coef Std. Err T-Stat p-Value 0 1.44 0.138 10.403 0 1 0.416 0.186 2.242 0.052 2 -0.41 0.199 -2.058 0.07 3 -0.614 0.049 -12.621 0 *** History *** 0 0 2 1 0 2 0 3 1 4 0 *** VIF *** 0 0 1.064 1 18.78 2 3.46 3 1.064 *** CovS *** 0 1 2 3 4 0 0.003 -0.029 -0.946 0 1.44 1 -0.029 154.72 -142.8 0.907 64.381 2 -0.946 -142.8 142.302 0.07 -58.35 3 0 0.907 0.07 0 -0.614 4 1.44 64.381 -58.35 -0.614 74.762 *** Intercept *** 103.09738163667471Link to Java source.