Example: StepwiseRegression

This example uses a data set from Draper and Smith (1981, pp. 629-630). The method compute() is invoked to find the best regression subset from the four candidate variables. The getSwept() method is used to label the variables "in" or "out" of the final model.


import com.imsl.stat.*;
import com.imsl.math.*;

public class StepwiseRegressionEx1 {
    
    private static void print(String[] labels, ANOVA anova){
        
        double[] values = anova.getArray();
        
        for (int i=0; i<values.length-2; i++){
            System.out.printf("%41s%s\n", labels[i], customDecimalFormat(values[i]));
        }
        System.out.println();
    }

    private static String customDecimalFormat(double value) {

        java.text.DecimalFormat df1 = new java.text.DecimalFormat("##,###.###");
        String s = df1.format(value);
        String[] toc = s.split("\\.");
        if (toc.length == 1) {
            s = String.format("%6s", s);
        } else {
            toc[0] = String.format("%6s", toc[0]);
            toc[1] = String.format("%-3s", toc[1]);
            s = toc[0] + "." + toc[1];
        }
        return s;
    }

    public static void main(String[] args) throws Exception {
        double x[][] = {
            {7., 26., 6., 60.}, {1., 29., 15., 52.}, {11., 56., 8., 20.},
            {11., 31., 8., 47.}, {7., 52., 6., 33.}, {11., 55., 9., 22.},
            {3., 71., 17., 6.}, {1., 31., 22., 44.}, {2., 54., 18., 22.},
            {21., 47., 4., 26}, {1., 40., 23., 34.}, {11., 66., 9., 12.},
            {10.0, 68., 8., 12.}
        };

        double y[] = {
            78.5, 74.3, 104.3, 87.6, 95.9, 109.2, 102.7,
            72.5, 93.1, 115.9, 83.8, 113.3, 109.4
        };

        String[] rowLabels = {
            "degrees of freedom for regression: ",
            "degrees of freedom for error: ",
            "total degrees of freedom: ",
            "sum of squares for regression: ",
            "sum of squares for error: ",
            "total sum of squares: ",
            "regression mean square: ",
            "error mean square: ",
            "F-statistic: ",
            "p-value: ",
            "R-squared (in percent): ",
            "adjusted R-squared (in percent): ",
            "est. standard deviation of within error: "
        };

        StepwiseRegression sr = new StepwiseRegression(x, y);
        sr.compute();

        System.out.printf("%20s%s\n", " ", "*** ANOVA ***");
        System.out.printf("%45s%s\n", " ", "Value");
        
        print(rowLabels, sr.getANOVA());

        StepwiseRegression.CoefficientTTests coefT = sr.getCoefficientTTests();
        double coef[][] = new double[4][4];
        for (int i = 0; i < 4; i++) {
            coef[i][0] = coefT.getCoefficient(i);
            coef[i][1] = coefT.getStandardError(i);
            coef[i][2] = coefT.getTStatistic(i);
            coef[i][3] = coefT.getPValue(i);
        }

        String[] cLabels = {"Coef", "Std. Err", "T-Stat", "p-Value"};

        PrintMatrix pm = new PrintMatrix();
        PrintMatrixFormat pmf = new PrintMatrixFormat();
        pmf.setColumnLabels(cLabels);

        pm.setTitle("*** Coef *** ");
        pm.print(pmf, coef);
        pm.setTitle("*** History *** ");
        pm.print(sr.getHistory());
        pm.setTitle("*** VIF *** ");
        pm.print(sr.getCoefficientVIF());
        pm.setTitle("*** CovS *** ");
        pm.print(sr.getCovariancesSwept());
        System.out.println("*** Intercept ***   " + sr.getIntercept());
    }
}

Output

                    *** ANOVA ***
                                             Value
      degrees of freedom for regression:      2
           degrees of freedom for error:     10
               total degrees of freedom:     12
          sum of squares for regression:  2,641.001
               sum of squares for error:     74.762
                   total sum of squares:  2,715.763
                 regression mean square:  1,320.5  
                      error mean square:      7.476
                            F-statistic:    176.627
                                p-value:      0
                 R-squared (in percent):     97.247
        adjusted R-squared (in percent):     96.697
est. standard deviation of within error:      2.734

             *** Coef *** 
    Coef   Std. Err  T-Stat   p-Value  
0   1.44    0.138     10.403   0       
1   0.416   0.186      2.242   0.052   
2  -0.41    0.199     -2.058   0.07    
3  -0.614   0.049    -12.621   0       

*** History *** 
   0  
0  2  
1  0  
2  0  
3  1  
4  0  

*** VIF *** 
     0     
0   1.064  
1  18.78   
2   3.46   
3   1.064  

                 *** CovS *** 
     0        1         2        3        4     
0   0.003    -0.029    -0.946   0        1.44   
1  -0.029   154.72   -142.8     0.907   64.381  
2  -0.946  -142.8     142.302   0.07   -58.35   
3   0         0.907     0.07    0       -0.614  
4   1.44     64.381   -58.35   -0.614   74.762  

*** Intercept ***   103.09738163667471
Link to Java source.