Example 2: Nominal Attribute Usage

This example trains a Naive Bayes classifier using 24 training patterns with four nominal input attributes.

The first nominal attribute has three classifications and the others have two. The target classifications are contact lense prescriptions: hard, soft or neither recommended. These data are benchmark data from the Knowledge Discovery Databases archive maintained at the University of California, Irvine: http://archive.ics.uci.edu/ml/datasets/Lenses.

using System;
using Imsl.DataMining;

public class NaiveBayesClassifierEx2
{
    public static void  Main(String[] args)
    {
    
        int[][] contactLensData = new int[][]{
            new int[]{1, 1, 1, 1}, new int[]{1, 1, 1, 2}, 
            new int[]{1, 1, 2, 1}, new int[]{1, 1, 2, 2}, 
            new int[]{1, 2, 1, 1}, new int[]{1, 2, 1, 2}, 
            new int[]{1, 2, 2, 1}, new int[]{1, 2, 2, 2}, 
            new int[]{2, 1, 1, 1}, new int[]{2, 1, 1, 2}, 
            new int[]{2, 1, 2, 1}, new int[]{2, 1, 2, 2}, 
            new int[]{2, 2, 1, 1}, new int[]{2, 2, 1, 2}, 
            new int[]{2, 2, 2, 1}, new int[]{2, 2, 2, 2}, 
            new int[]{3, 1, 1, 1}, new int[]{3, 1, 1, 2}, 
            new int[]{3, 1, 2, 1}, new int[]{3, 1, 2, 2}, 
            new int[]{3, 2, 1, 1}, new int[]{3, 2, 1, 2}, 
            new int[]{3, 2, 2, 1}, new int[]{3, 2, 2, 2}
        };
        
        int[] classificationData = new int[]{
            3, 2, 3, 1, 3, 2, 3, 1, 3, 2, 3, 1, 
            3, 2, 3, 3, 3, 3, 3, 1, 3, 2, 3, 3
        };
        /* classification values must start at 0 */
        for (int i = 0; i < classificationData.Length; i++)
        {
            classificationData[i] -= 1;
            for (int j = 0; j < contactLensData[0].Length; j++)
            {
                contactLensData[i][j] -= 1;
            }
        }
        NaiveBayesClassifier nbTrainer = 
            new NaiveBayesClassifier(0, 4, 3);
        
        int nNominal = 4;
        int[] categories = new int[]{3, 2, 2, 2};
        for (int i = 0; i < nNominal; i++)
            nbTrainer.CreateNominalAttribute(categories[i]);
        nbTrainer.Train(null, contactLensData, classificationData);
        
        int[][] classErrors = nbTrainer.GetTrainingErrors();
        
        Console.Out.WriteLine("\n    Contact Lense Error Rates");
        Console.Out.WriteLine(
            "------------------------------------------------");
        Console.Out.WriteLine(
            "  Hard        Soft       Neither    |   Total");
        Console.Out.WriteLine("  " + classErrors[0][0] + "/" + 
            classErrors[0][1] + "         " + classErrors[1][0] + "/" + 
            classErrors[1][1] + "        " + classErrors[2][0] + "/" + 
            classErrors[2][1] + "       |   " + classErrors[3][0] + 
            "/" + classErrors[3][1]);
        Console.Out.WriteLine(
            "------------------------------------------------\n\n\n");
        
        
        /*  Classify all patterns with the trained classifier */
        int[] nominalInput = new int[contactLensData[0].Length];
        double[] classifiedProbabilities = new double[3];
        
        Console.Out.WriteLine(
            "Probabilities for Incorrect Classifications");
        Console.Out.WriteLine(" Predicted   ");
        Console.Out.WriteLine("   Class     |  Class       |   " 
            + "" + "P(0)     P(1)     P(2)   |  classification error");
        Console.Out.WriteLine(
            "---------------------------------------" + 
            "-----------------------------------------");
        for (int i = 0; i < contactLensData.Length; i++)
        {
            Array.Copy(contactLensData[i], 0, nominalInput, 0, 
                contactLensData[0].Length);
            
            classifiedProbabilities = 
                nbTrainer.Probabilities(null, nominalInput);
            int classification = 
                nbTrainer.PredictClass(null, nominalInput);
            double error = nbTrainer.ClassError(null, nominalInput, 
                classificationData[i]);
            if (classification == 0)
                Console.Out.Write(" Hard        |");
            else if (classification == 1)
                Console.Out.Write(" Soft        |");
            else if (classification == 2)
                Console.Out.Write(" Neither     |");
            else
                Console.Out.Write(" Missing     |");
            if (classificationData[i] == 0)
                Console.Out.Write(" Hard         |");
            else if (classificationData[i] == 1)
                Console.Out.Write(" Soft         |");
            else if (classificationData[i] == 2)
                Console.Out.Write(" Neither      |");
            else
                Console.Out.Write(" Missing      |");
            
            for (int j = 0; j < 3; j++)
            {
               Object[] pArgs = new Object[] { 
                   (double)classifiedProbabilities[j] };
                Console.Out.Write("   {0, 2:f3} ", pArgs);
            }
            Console.Out.WriteLine(" |  " + error);
        }
    }
}

Output


    Contact Lense Error Rates
------------------------------------------------
  Hard        Soft       Neither    |   Total
  0/4         0/5        1/15       |   1/24
------------------------------------------------



Probabilities for Incorrect Classifications
 Predicted   
   Class     |  Class       |   P(0)     P(1)     P(2)   |  classification error
--------------------------------------------------------------------------------
 Neither     | Neither      |   0.044    0.130    0.827  |  0.173282735372243
 Soft        | Soft         |   0.174    0.622    0.203  |  0.377703751596285
 Neither     | Neither      |   0.186    0.018    0.795  |  0.204814844531185
 Hard        | Hard         |   0.724    0.086    0.190  |  0.27622138817044
 Neither     | Neither      |   0.019    0.154    0.827  |  0.173119280942829
 Soft        | Soft         |   0.076    0.724    0.200  |  0.275800758853599
 Neither     | Neither      |   0.092    0.024    0.884  |  0.116366478280659
 Hard        | Hard         |   0.524    0.166    0.310  |  0.475967127191525
 Neither     | Neither      |   0.025    0.113    0.862  |  0.137948891733597
 Soft        | Soft         |   0.118    0.633    0.248  |  0.366677564499419
 Neither     | Neither      |   0.113    0.017    0.870  |  0.1300941614979
 Hard        | Hard         |   0.606    0.108    0.286  |  0.394395320215918
 Neither     | Neither      |   0.011    0.133    0.856  |  0.143807127534487
 Soft        | Soft         |   0.050    0.714    0.236  |  0.286218909502689
 Neither     | Neither      |   0.054    0.021    0.925  |  0.0747706562988114
 Neither     | Neither      |   0.394    0.187    0.419  |  0.58120710821219
 Neither     | Neither      |   0.023    0.068    0.909  |  0.0907529770941407
 Soft        | Neither      |   0.142    0.509    0.349  |  0.650925868235811
 Neither     | Neither      |   0.099    0.010    0.891  |  0.109251850144518
 Hard        | Hard         |   0.599    0.071    0.330  |  0.401383015922963
 Neither     | Neither      |   0.010    0.081    0.909  |  0.090658834477251
 Soft        | Soft         |   0.062    0.594    0.344  |  0.406256189123439
 Neither     | Neither      |   0.047    0.012    0.941  |  0.0590094638695026
 Neither     | Neither      |   0.391    0.124    0.485  |  0.514955474332508

Link to C# source.