Example 1: RandomTrees

This example builds a random forest with ALACART decision trees. A single tree and the random forest are fit to the Kyphosis data and predictions for a test-data set consisting of 10 "new" cases are generated.

The Kyphosis data 81 cases represent 81 children who have undergone surgery to correct a type of spinal deformity known as Kyphosis. The response variable is the presence or absence of Kyphosis after the surgery. The three predictors are:


import com.imsl.math.*;
import com.imsl.stat.*;
import com.imsl.datamining.decisionTree.*;

public class RandomTreesEx1 {

    public static void main(String[] args) throws Exception {
        DecisionTree.VariableType[] kyphosisVarType = {
            DecisionTree.VariableType.CATEGORICAL,
            DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS,
            DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS,
            DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS
        };
        int kyphosisResponseIdx = 0;
        double[][] kyphosisXY = {
            {0, 71, 3, 5}, {0, 158, 3, 14}, {1, 128, 4, 5}, {0, 2, 5, 1},
            {0, 1, 4, 15}, {0, 1, 2, 16}, {0, 61, 2, 17}, {0, 37, 3, 16},
            {0, 113, 2, 16}, {1, 59, 6, 12}, {1, 82, 5, 14}, {0, 148, 3, 16},
            {0, 18, 5, 2}, {0, 1, 4, 12}, {0, 168, 3, 18}, {0, 1, 3, 16},
            {0, 78, 6, 15}, {0, 175, 5, 13}, {0, 80, 5, 16}, {0, 27, 4, 9},
            {0, 22, 2, 16}, {1, 105, 6, 5}, {1, 96, 3, 12}, {0, 131, 2, 3},
            {1, 15, 7, 2}, {0, 9, 5, 13}, {0, 8, 3, 6}, {0, 100, 3, 14},
            {0, 4, 3, 16}, {0, 151, 2, 16}, {0, 31, 3, 16}, {0, 125, 2, 11},
            {0, 130, 5, 13}, {0, 112, 3, 16}, {0, 140, 5, 11}, {0, 93, 3, 16},
            {0, 1, 3, 9}, {1, 52, 5, 6}, {0, 20, 6, 9}, {1, 91, 5, 12},
            {1, 73, 5, 1}, {0, 35, 3, 13}, {0, 143, 9, 3}, {0, 61, 4, 1},
            {0, 97, 3, 16}, {1, 139, 3, 10}, {0, 136, 4, 15}, {0, 131, 5, 13},
            {1, 121, 3, 3}, {0, 177, 2, 14}, {0, 68, 5, 10}, {0, 9, 2, 17},
            {1, 139, 10, 6}, {0, 2, 2, 17}, {0, 140, 4, 15}, {0, 72, 5, 15},
            {0, 2, 3, 13}, {1, 120, 5, 8}, {0, 51, 7, 9}, {0, 102, 3, 13},
            {1, 130, 4, 1}, {1, 114, 7, 8}, {0, 81, 4, 1}, {0, 118, 3, 16},
            {0, 118, 4, 16}, {0, 17, 4, 10}, {0, 195, 2, 17}, {0, 159, 4, 13},
            {0, 18, 4, 11}, {0, 15, 5, 16}, {0, 158, 5, 14}, {0, 127, 4, 12},
            {0, 87, 4, 16}, {0, 206, 4, 10}, {0, 11, 3, 15}, {0, 178, 4, 15},
            {1, 157, 3, 13}, {0, 26, 7, 13}, {0, 120, 2, 13}, {1, 42, 7, 6},
            {0, 36, 4, 13}
        };

        double[][] kyphosisXYTest = {
            {0, 71, 3, 5}, {1, 128, 4, 5}, {0, 1, 4, 15}, {0, 61, 6, 10},
            {0, 113, 2, 16}, {1, 82, 5, 14}, {0, 148, 3, 16}, {0, 1, 4, 12},
            {0, 1, 3, 16}, {0, 175, 5, 13}
        };
        ALACART dt
                = new ALACART(kyphosisXY, kyphosisResponseIdx, kyphosisVarType);

        dt.fitModel();
        double[] singlePredictions = dt.predict(kyphosisXYTest);

        RandomTrees rf = new RandomTrees(dt);
        rf.setRandomObject(new Random(123457));
        rf.setNumberOfRandomFeatures(2);

        double[] rfPredictions = rf.predict(kyphosisXYTest);

        new PrintMatrix("Kyphosis test data single tree predictions"
                + " on the test data:").print(singlePredictions);

        new PrintMatrix("Kyphosis test data random forest predictions"
                + " on the test data:").print(rfPredictions);
    }
}

Output

Kyphosis test data single tree predictions on the test data:
   0  
0  0  
1  0  
2  0  
3  1  
4  0  
5  0  
6  0  
7  0  
8  0  
9  0  

Kyphosis test data random forest predictions on the test data:
   0  
0  0  
1  1  
2  0  
3  0  
4  0  
5  0  
6  0  
7  0  
8  0  
9  0  

Link to Java source.