This example builds a random forest with ALACART decision trees. A single tree and the random forest are fit to the Kyphosis data and predictions for a test-data set consisting of 10 "new" cases are generated.
The Kyphosis data 81 cases represent 81 children who have undergone surgery to correct a type of spinal deformity known as Kyphosis. The response variable is the presence or absence of Kyphosis after the surgery. The three predictors are:
import com.imsl.math.*; import com.imsl.stat.*; import com.imsl.datamining.decisionTree.*; public class RandomTreesEx1 { public static void main(String[] args) throws Exception { DecisionTree.VariableType[] kyphosisVarType = { DecisionTree.VariableType.CATEGORICAL, DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS, DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS, DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS }; int kyphosisResponseIdx = 0; double[][] kyphosisXY = { {0, 71, 3, 5}, {0, 158, 3, 14}, {1, 128, 4, 5}, {0, 2, 5, 1}, {0, 1, 4, 15}, {0, 1, 2, 16}, {0, 61, 2, 17}, {0, 37, 3, 16}, {0, 113, 2, 16}, {1, 59, 6, 12}, {1, 82, 5, 14}, {0, 148, 3, 16}, {0, 18, 5, 2}, {0, 1, 4, 12}, {0, 168, 3, 18}, {0, 1, 3, 16}, {0, 78, 6, 15}, {0, 175, 5, 13}, {0, 80, 5, 16}, {0, 27, 4, 9}, {0, 22, 2, 16}, {1, 105, 6, 5}, {1, 96, 3, 12}, {0, 131, 2, 3}, {1, 15, 7, 2}, {0, 9, 5, 13}, {0, 8, 3, 6}, {0, 100, 3, 14}, {0, 4, 3, 16}, {0, 151, 2, 16}, {0, 31, 3, 16}, {0, 125, 2, 11}, {0, 130, 5, 13}, {0, 112, 3, 16}, {0, 140, 5, 11}, {0, 93, 3, 16}, {0, 1, 3, 9}, {1, 52, 5, 6}, {0, 20, 6, 9}, {1, 91, 5, 12}, {1, 73, 5, 1}, {0, 35, 3, 13}, {0, 143, 9, 3}, {0, 61, 4, 1}, {0, 97, 3, 16}, {1, 139, 3, 10}, {0, 136, 4, 15}, {0, 131, 5, 13}, {1, 121, 3, 3}, {0, 177, 2, 14}, {0, 68, 5, 10}, {0, 9, 2, 17}, {1, 139, 10, 6}, {0, 2, 2, 17}, {0, 140, 4, 15}, {0, 72, 5, 15}, {0, 2, 3, 13}, {1, 120, 5, 8}, {0, 51, 7, 9}, {0, 102, 3, 13}, {1, 130, 4, 1}, {1, 114, 7, 8}, {0, 81, 4, 1}, {0, 118, 3, 16}, {0, 118, 4, 16}, {0, 17, 4, 10}, {0, 195, 2, 17}, {0, 159, 4, 13}, {0, 18, 4, 11}, {0, 15, 5, 16}, {0, 158, 5, 14}, {0, 127, 4, 12}, {0, 87, 4, 16}, {0, 206, 4, 10}, {0, 11, 3, 15}, {0, 178, 4, 15}, {1, 157, 3, 13}, {0, 26, 7, 13}, {0, 120, 2, 13}, {1, 42, 7, 6}, {0, 36, 4, 13} }; double[][] kyphosisXYTest = { {0, 71, 3, 5}, {1, 128, 4, 5}, {0, 1, 4, 15}, {0, 61, 6, 10}, {0, 113, 2, 16}, {1, 82, 5, 14}, {0, 148, 3, 16}, {0, 1, 4, 12}, {0, 1, 3, 16}, {0, 175, 5, 13} }; ALACART dt = new ALACART(kyphosisXY, kyphosisResponseIdx, kyphosisVarType); dt.fitModel(); double[] singlePredictions = dt.predict(kyphosisXYTest); RandomTrees rf = new RandomTrees(dt); rf.setRandomObject(new Random(123457)); rf.setNumberOfRandomFeatures(2); double[] rfPredictions = rf.predict(kyphosisXYTest); new PrintMatrix("Kyphosis test data single tree predictions" + " on the test data:").print(singlePredictions); new PrintMatrix("Kyphosis test data random forest predictions" + " on the test data:").print(rfPredictions); } }
Kyphosis test data single tree predictions on the test data: 0 0 0 1 0 2 0 3 1 4 0 5 0 6 0 7 0 8 0 9 0 Kyphosis test data random forest predictions on the test data: 0 0 0 1 1 2 0 3 0 4 0 5 0 6 0 7 0 8 0 9 0Link to Java source.