This example builds a random forest with C45 decision trees on simulated categorical data. A variable importance measure based on the change in predictive accuracy is highest for variable 0.
import com.imsl.math.*; import com.imsl.stat.*; import com.imsl.datamining.decisionTree.*; public class RandomTreesEx3 { public static void main(String[] args) throws Exception { double[][] sim0XY = { {2, 25.92869, 0, 0}, {1, 51.63245, 1, 1}, {1, 25.78432, 0, 2}, {0, 39.37948, 0, 3}, {2, 24.65058, 0, 2}, {2, 45.20084, 0, 2}, {2, 52.67960, 1, 3}, {1, 44.28342, 1, 3}, {2, 40.63523, 1, 3}, {2, 51.76094, 0, 3}, {2, 26.30368, 0, 1}, {2, 20.70230, 1, 0}, {2, 38.74273, 1, 3}, {2, 19.47333, 0, 0}, {1, 26.42211, 0, 0}, {2, 37.05986, 1, 0}, {1, 51.67043, 1, 3}, {0, 42.40156, 0, 3}, {2, 33.90027, 1, 2}, {1, 35.43282, 0, 0}, {1, 44.30369, 0, 1}, {0, 46.72387, 0, 2}, {1, 46.99262, 0, 2}, {0, 36.05923, 0, 3}, {2, 36.83197, 1, 1}, {1, 61.66257, 1, 2}, {0, 25.67714, 0, 3}, {1, 39.08567, 1, 0}, {0, 48.84341, 1, 1}, {1, 39.34391, 0, 3}, {2, 24.73522, 0, 2}, {1, 50.55251, 1, 3}, {0, 31.34263, 1, 3}, {1, 27.15795, 1, 0}, {0, 31.72685, 0, 2}, {0, 25.00408, 0, 3}, {1, 26.35457, 1, 3}, {2, 38.12343, 0, 1}, {0, 49.94030, 0, 2}, {1, 42.45779, 1, 3}, {0, 38.80948, 1, 1}, {0, 43.22799, 1, 1}, {0, 41.87624, 0, 3}, {2, 48.07820, 0, 2}, {0, 43.23673, 1, 0}, {2, 39.41294, 0, 3}, {1, 23.93346, 0, 2}, {2, 42.84130, 1, 3}, {2, 30.40669, 0, 1}, {0, 37.77389, 0, 2} }; DecisionTree.VariableType[] sim0VarType = { DecisionTree.VariableType.CATEGORICAL, DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS, DecisionTree.VariableType.CATEGORICAL, DecisionTree.VariableType.CATEGORICAL }; int sim0ResponseIdx = 0, n = sim0XY.length; double[] knownY = new double[n]; for (int i = 0; i < n; i++) { knownY[i] = sim0XY[i][sim0ResponseIdx]; } C45 dt = new C45(sim0XY, sim0ResponseIdx, sim0VarType); RandomTrees rf = new RandomTrees(dt); rf.setRandomObject(new Random(123457)); rf.setCalculateVariableImportance(true); rf.fitModel(); double[] outOfBagPredictions = rf.getOutOfBagPredictions(); int[][] classErrors = rf.getClassErrors(knownY, outOfBagPredictions); double[] variableImportance = rf.getVariableImportance(); new PrintMatrix("C45 Random Forest class errors:"). print(classErrors); new PrintMatrix("C45 Random Forest variable importance:"). print(variableImportance); } }
C45 Random Forest class errors: 0 1 0 13 15 1 16 16 2 13 19 3 42 50 C45 Random Forest variable importance: 0 0 -0.018 1 -0.002 2 -0.007Link to Java source.