This example uses the CHAID method on simulated categorical data and demonstrates printing the tree structure with and without custom labels.
import com.imsl.datamining.decisionTree.*; public class CHAIDSimulatedCategoricalData { public static void main(String[] args) throws Exception { double[][] xy = { {2, 0, 2}, {1, 0, 0}, {2, 1, 3}, {0, 1, 0}, {1, 2, 0}, {2, 2, 3}, {2, 2, 3}, {0, 1, 0}, {0, 0, 0}, {0, 1, 0}, {1, 2, 0}, {2, 0, 2}, {0, 2, 0}, {2, 0, 1}, {0, 0, 0}, {2, 0, 1}, {1, 0, 0}, {0, 2, 0}, {2, 0, 1}, {1, 2, 0}, {0, 2, 2}, {2, 1, 3}, {1, 1, 0}, {2, 2, 3}, {1, 2, 0}, {2, 2, 3}, {2, 0, 1}, {2, 1, 3}, {1, 2, 0}, {1, 1, 0} }; DecisionTree.VariableType[] varType = { DecisionTree.VariableType.CATEGORICAL, DecisionTree.VariableType.CATEGORICAL, DecisionTree.VariableType.CATEGORICAL }; String responseName = "Response"; String[] names = {"Var1", "Var2"}; String[] classNames = {"c1", "c2", "c3", "c4"}; String[] varLabels = {"L1", "L2", "L3", "A", "B", "C"}; CHAID dt = new CHAID(xy, 2, varType); dt.setMinObsPerChildNode(5); dt.setMinObsPerNode(10); dt.setMaxNodes(50); dt.fitModel(); System.out.println("\nGenerated labels:"); dt.printDecisionTree(true); System.out.println("\nCustom labels:"); dt.printDecisionTree(responseName, names, classNames, varLabels, false); } }
Generated labels: Decision Tree: Node 0: Cost = 0.467, N= 30, Level = 0, Child nodes: 1 2 P(Y=0)= 0.533 P(Y=1)= 0.133 P(Y=2)= 0.100 P(Y=3)= 0.233 Predicted Y: 0 Node 1: Cost = 0.033, N= 17, Level = 1 Rule: X0 in: { 0 1 } P(Y=0)= 0.941 P(Y=1)= 0.000 P(Y=2)= 0.059 P(Y=3)= 0.000 Predicted Y: 0 Node 2: Cost = 0.200, N= 13, Level = 1, Child nodes: 3 4 Rule: X0 in: { 2 } P(Y=0)= 0.000 P(Y=1)= 0.308 P(Y=2)= 0.154 P(Y=3)= 0.538 Predicted Y: 3 Node 3: Cost = 0.067, N= 6, Level = 2 Rule: X1 in: { 0 } P(Y=0)= 0.000 P(Y=1)= 0.667 P(Y=2)= 0.333 P(Y=3)= 0.000 Predicted Y: 1 Node 4: Cost = 0.000, N= 7, Level = 2 Rule: X1 in: { 1 2 } P(Y=0)= 0.000 P(Y=1)= 0.000 P(Y=2)= 0.000 P(Y=3)= 1.000 Predicted Y: 3 Custom labels: Decision Tree: Node 0: Cost = 0.467, N= 30, Level = 0, Child nodes: 1 2 P(Y=0)= 0.533 P(Y=1)= 0.133 P(Y=2)= 0.100 P(Y=3)= 0.233 Predicted Response: c1 Node 1: Cost = 0.033, N= 17, Level = 1 Rule: Var1 in: { L1 L2 } P(Y=0)= 0.941 P(Y=1)= 0.000 P(Y=2)= 0.059 P(Y=3)= 0.000 Predicted Response: c1 Node 2: Cost = 0.200, N= 13, Level = 1, Child nodes: 3 4 Rule: Var1 in: { L3 } P(Y=0)= 0.000 P(Y=1)= 0.308 P(Y=2)= 0.154 P(Y=3)= 0.538 Predicted Response: c4 Node 3: Cost = 0.067, N= 6, Level = 2 Rule: Var2 in: { A } P(Y=0)= 0.000 P(Y=1)= 0.667 P(Y=2)= 0.333 P(Y=3)= 0.000 Predicted Response: c2 Node 4: Cost = 0.000, N= 7, Level = 2 Rule: Var2 in: { B C } P(Y=0)= 0.000 P(Y=1)= 0.000 P(Y=2)= 0.000 P(Y=3)= 1.000 Predicted Response: c4Link to Java source.