Example: FeedForwardNetwork

This example trains a 2-layer network using 100 training patterns from one nominal and one continuous input attribute. The nominal attribute has three classifications which are encoded using binary encoding. This results in three binary network input columns. The continuous input attribute is scaled to fall in the interval [0,1].

The network training targets were generated using the relationship:

y = 10*X1 + 20*X2 + 30*X3 + 2.0*X4 , where

X1 -X3 are the three binary columns, corresponding to categories 1-3 of the nominal attribute, and X4 is the scaled continuous attribute.

The structure of the network consists of four input nodes and two layers, with three perceptrons in the hidden layer and one in the output layer. The following figure illustrates this structure:

There are a total of 19 weights in this network. The activations functions are all linear. Since the target output is a linear function of the input attributes, linear activation functions guarantee that the network forecasts will exactly match their targets. Of course, this same result could have been obtained using linear multiple regression. Training is conducted using the quasi-newton trainer.

using System;
using Imsl.DataMining.Neural;
using System.Runtime.Serialization;
using System.Runtime.Serialization.Formatters.Binary;

//*****************************************************************************
// Two Layer Feed-Forward Network with 4 inputs: 1 nominal with 3 categories,
// encoded using binary encoding, 1 continuous input attribute, and 1 output 
// target (continuous).  
// There is a perfect linear relationship between the input and output 
// variables:
//
// MODEL:  Y = 10*X1+20*X2+30*X3+2*X4 
//     
// Variables X1-X3 are the binary encoded nominal variable and X4 is the
// continuous variable.
//*****************************************************************************

//[Serializable]
public class FeedForwardNetworkEx1 //: System.Runtime.Serialization.ISerializable
{
   
   // Network Settings
   private static int nObs = 100; // number of training patterns
   private static int nInputs = 4; // four inputs
   private static int nCategorical = 3; // three categorical attributes
   private static int nOutputs = 1; // one continuous output
   private static int nPerceptrons = 3; // perceptrons in hidden layer
   private static IActivation hiddenLayerActivation;
   private static IActivation outputLayerActivation;
   private static System.String errorMsg = "";
   // Error Status Messages for the Least Squares Trainer
   private static System.String errorMsg0 =
      "--> Least Squares Training Completed Successfully";
   private static System.String errorMsg1 =
      "--> Scaled step tolerance was satisfied.  The current solution \n" +
      "may be an approximate local solution, or the algorithm is making\n" +
      "slow progress and is not near a solution, or the Step Tolerance\n" +
      "is too big";
   private static System.String errorMsg2 =
      "--> Scaled actual and predicted reductions in the function are\n" +
      "less than or equal to the relative function convergence\n" +
      "tolerance RelativeTolerance";
   private static System.String errorMsg3 =
      "--> Iterates appear to be converging to a noncritical point.\n" +
      "Incorrect gradient information, a discontinuous function,\n" +
      "or stopping tolerances being too tight may be the cause.";
   private static System.String errorMsg4 =
      "--> Five consecutive steps with the maximum stepsize have\n" +
      "been taken.  Either the function is unbounded below, or has\n" +
      "a finite asymptote in some direction, or the maximum stepsize\n" +
      "is too small.";
   private static System.String errorMsg5 =
      "--> Too many iterations required";
   
   // categoricalAtt[]: A 2D matrix of values for the categorical training
   //                   attribute. In this example, the single categorical 
   //                   attribute has 3 categories that are encoded using 
   //                   binary encoding for input into the network. 
   //                   {1,0,0} = category 1, {0,1,0} = category 2, and
   //                   {0,0,1} = category 3.
   private static double[,] categoricalAtt =  
        {{1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, 
         {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, 
         {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, 
         {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, 
         {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, 
         {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {1, 0, 0}, {0, 1, 0}, 
         {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, 
         {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, 
         {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, 
         {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, 
         {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 0, 1}, 
         {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, 
         {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, 
            {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, 
         {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, 
         {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, 
         {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}};
   //
   // contAtt[]:  A matrix of values for the continuous training attribute
   //
   private static double[] contAtt = new double[]{4.007054658, 7.10028447,
      4.740350984, 5.714553211, 6.205437459, 2.598930065, 8.65089967,
      5.705787357, 2.513348184, 2.723795955, 4.1829356, 1.93280416,
      0.332941608, 6.745567628, 5.593588463, 7.273544478, 3.162117939,
      4.205381208, 0.16414745, 2.883418275, 0.629342241, 1.082223406,
      8.180324708, 8.004894314, 7.856215418, 7.797143157, 8.350033996,
      3.778254431, 6.964837082, 6.13938006, 0.48610387, 5.686627923,
      8.146173848, 5.879852653, 4.587492779, 0.714028533, 7.56324211,
      8.406012623, 4.225261454, 6.369220241, 4.432772218, 9.52166984,
      7.935791508, 4.557155333, 7.976015058, 4.913538616, 1.473658514,
      2.592338905, 1.386872932, 7.046051685, 1.432128376, 1.153580985,
      5.6561491, 3.31163251, 4.648324851, 5.042514515, 0.657054195,
      7.958308093, 7.557870384, 7.901990083, 5.2363088, 6.95582150,
      8.362167045, 4.875903563, 1.729229471, 4.380370223, 8.527875685,
      2.489198107, 3.711472959, 4.17692681, 5.844828801, 4.825754155,
      5.642267843, 5.339937786, 4.440813223, 1.615143829, 7.542969339,
      8.100542684, 0.98625265, 4.744819569, 8.926039258, 8.813441887,
      7.749383991, 6.551841576, 8.637046998, 4.560281415, 1.386055087,
      0.778869034, 3.883379045, 2.364501589, 9.648737525, 1.21754765,
      3.908879368, 4.253313879, 9.31189696, 3.811953836, 5.78471629,
      3.414486452, 9.345413015, 1.024053777};
   //
   // outs[]:  A 2D matrix containing the training outputs for this network
   // In this case there is an exact linear relationship between these
   // outputs and the inputs:  outs = 10*X1+20*X2+30*X3+2*X4, where
   // X1-X3 are the categorical variables and X4=contAtt
   //
   private static double[] outs = new double[]{18.01410932, 24.20056894,
      19.48070197, 21.42910642, 22.41087492, 15.19786013, 27.30179934,
      21.41157471, 15.02669637, 15.44759191, 18.3658712, 13.86560832,
      10.66588322, 23.49113526, 21.18717693, 24.54708896, 16.32423588,
      18.41076242, 10.3282949, 15.76683655, 11.25868448, 12.16444681,
      26.36064942, 26.00978863, 25.71243084, 25.59428631, 26.70006799,
      17.55650886, 23.92967416, 22.27876012, 10.97220774, 21.37325585,
      26.2923477, 21.75970531, 19.17498556, 21.42805707, 35.12648422,
      36.81202525, 28.45052291, 32.73844048, 28.86554444, 39.04333968,
      35.87158302, 29.11431067, 35.95203012, 29.82707723, 22.94731703,
      25.18467781, 22.77374586, 34.09210337, 22.86425675, 22.30716197,
      31.3122982, 26.62326502, 29.2966497, 30.08502903, 21.31410839,
      35.91661619, 35.11574077, 35.80398017, 30.4726176, 33.91164302,
      36.72433409, 29.75180713, 23.45845894, 38.76074045, 47.05575137,
      34.97839621, 37.42294592, 38.35385362, 41.6896576, 39.65150831,
      41.28453569, 40.67987557, 38.88162645, 33.23028766, 45.08593868,
      46.20108537, 31.9725053, 39.48963914, 47.85207852, 47.62688377,
      45.49876798, 43.10368315, 47.274094, 39.1205628, 32.77211017,
      31.55773807, 37.76675809, 34.72900318, 49.29747505, 32.4350953, 
      37.81775874, 38.50662776, 48.62379392, 37.62390767, 41.56943258,
      36.8289729, 48.69082603, 32.04810755};
   // **********************************************************************
   // MAIN
   // **********************************************************************


   public static void  Main(System.String[] args)
   {
      
      double[] weight; // network weights
      double[] gradient; // network gradient after training
      double[,] xData; // Input  Attributes for Trainer
      double[,] yData; // Output Attributes for Trainer
      int i, j; // array indicies
      int nWeights = 0; // Number of weights obtained from network
      System.String networkFileName = "FeedForwardNetworkEx1.ser";
      System.String trainerFileName = "FeedForwardTrainerEx1.ser";
      System.String xDataFileName = "FeedForwardxDataEx1.ser";
      System.String yDataFileName = "FeedForwardyDataEx1.ser";
      // **********************************************************************
      // PREPROCESS TRAINING PATTERNS
      // **********************************************************************
      System.Console.Out.WriteLine(
         "--> Starting Preprocessing of Training Patterns");
      xData = new double[nObs,nInputs];
      // for (int i2 = 0; i2 < nObs; i2++)
      // {
      //    xData[i2] = new double[nInputs];
      // }
      yData = new double[nObs,nOutputs];
      // for (int i3 = 0; i3 < nObs; i3++)
      // {
      //    yData[i3] = new double[nOutputs];
      // }
      for (i = 0; i < nObs; i++)
      {
         for (j = 0; j < nCategorical; j++)
         {
            xData[i,j] = categoricalAtt[i,j];
         }
         xData[i,nCategorical] = contAtt[i] / 10.0; // Scale continuous input
         yData[i,0] = outs[i]; // outputs are unscaled
      }
      // **********************************************************************
      // CREATE FEEDFORWARD NETWORK
      // **********************************************************************
      System.Console.Out.WriteLine("--> Creating Feed Forward Network Object");
      FeedForwardNetwork network = new FeedForwardNetwork();
      // setup input layer with number of inputs = nInputs = 4
      network.InputLayer.CreateInputs(nInputs);
      // create a hidden layer with nPerceptrons=3 perceptrons
      network.CreateHiddenLayer().CreatePerceptrons(nPerceptrons);
      // create output layer with nOutputs=1 output perceptron 
      network.OutputLayer.CreatePerceptrons(nOutputs);
      // link all inputs and perceptrons to all perceptrons in the next layer
      network.LinkAll();
      // Get Network Perceptrons for Setting Their Activation Functions
      Perceptron[] perceptrons = network.Perceptrons;
      // Set all perceptrons to linear activation
      for (i = 0; i < perceptrons.Length - 1; i++)
      {
         perceptrons[i].Activation = hiddenLayerActivation;
      }
      perceptrons[perceptrons.Length - 1].Activation = outputLayerActivation;
      System.Console.Out.WriteLine(
         "--> Feed Forward Network Created with 2 Layers");
      // **********************************************************************
      // TRAIN NETWORK USING QUASI-NEWTON TRAINER
      // **********************************************************************
      System.Console.Out.WriteLine(
         "--> Training Network using Quasi-Newton Trainer");
      // Create Trainer
      QuasiNewtonTrainer trainer = new QuasiNewtonTrainer();
      // Set Training Parameters
      trainer.MaximumTrainingIterations = 1000;
      // Train Network
      trainer.Train(network, xData, yData);
      // Check Training Error Status
      switch (trainer.ErrorStatus)
      {
         
         case 0:  errorMsg = errorMsg0;
            break;
         
         case 1:  errorMsg = errorMsg1;
            break;
         
         case 2:  errorMsg = errorMsg2;
            break;
         
         case 3:  errorMsg = errorMsg3;
            break;
         
         case 4:  errorMsg = errorMsg4;
            break;
         
         case 5:  errorMsg = errorMsg5;
            break;
         
         default:  errorMsg = errorMsg0;
            break;
         
      }
      System.Console.Out.WriteLine(errorMsg);
      // **********************************************************************
      // DISPLAY TRAINING STATISTICS
      // **********************************************************************
      double[] stats = network.ComputeStatistics(xData, yData);
      // Display Network Errors
      System.Console.Out.WriteLine(
         "***********************************************");
      System.Console.Out.WriteLine("--> SSE:                       " +
         (float) stats[0]);
      System.Console.Out.WriteLine("--> RMS:                       " +
         (float) stats[1]);
      System.Console.Out.WriteLine("--> Laplacian Error:           " +
         (float) stats[2]);
      System.Console.Out.WriteLine("--> Scaled Laplacian Error:    " +
         (float) stats[3]);
      System.Console.Out.WriteLine("--> Largest Absolute Residual: " +
         (float) stats[4]);
      System.Console.Out.WriteLine(
         "***********************************************");
      System.Console.Out.WriteLine("");
      // **********************************************************************
      // OBTAIN AND DISPLAY NETWORK WEIGHTS AND GRADIENTS
      // **********************************************************************
      System.Console.Out.WriteLine("--> Getting Network Weights and Gradients");
      // Get weights               
      weight = network.Weights;
      // Get number of weights = number of gradients
      nWeights = network.NumberOfWeights;
      // Obtain Gradient Vector
      gradient = trainer.ErrorGradient;
      // Print Network Weights and Gradients
      System.Console.Out.WriteLine(" ");
      System.Console.Out.WriteLine("--> Network Weights and Gradients:");
      System.Console.Out.WriteLine(
         "***********************************************");
      for (i = 0; i < nWeights; i++)
      {
         System.Console.Out.WriteLine("w[" + i + "]=" + (float) weight[i] +
            " g[" + i + "]=" + (float) gradient[i]);
      }
      System.Console.Out.WriteLine(
         "***********************************************");
      // **********************************************************************
      // SAVE THE TRAINED NETWORK BY SAVING THE SERIALIZED NETWORK OBJECT
      // **********************************************************************
      System.Console.Out.WriteLine("\n--> Saving Trained Network into " +
         networkFileName);
      write(network, networkFileName);
      System.Console.Out.WriteLine("--> Saving xData into " + xDataFileName);
      write(xData, xDataFileName);
      System.Console.Out.WriteLine("--> Saving yData into " + yDataFileName);
      write(yData, yDataFileName);
      System.Console.Out.WriteLine("--> Saving Network Trainer into " +
         trainerFileName);
      write(trainer, trainerFileName);
   }
   // **************************************************************************
   // WRITE SERIALIZED NETWORK TO A FILE
   // **************************************************************************
   static public void  write(System.Object obj, System.String filename)
   {
      System.IO.FileStream fos = new System.IO.FileStream(filename,
         System.IO.FileMode.Create);
      IFormatter oos = new BinaryFormatter();
      oos.Serialize(fos, obj);
      fos.Close();
   }
   static FeedForwardNetworkEx1()
   {
      hiddenLayerActivation = Imsl.DataMining.Neural.Activation.Linear;
      outputLayerActivation = Imsl.DataMining.Neural.Activation.Linear;
   }
}

Output

--> Starting Preprocessing of Training Patterns
--> Creating Feed Forward Network Object
--> Feed Forward Network Created with 2 Layers
--> Training Network using Quasi-Newton Trainer
--> Least Squares Training Completed Successfully
***********************************************
--> SSE:                       1.013444E-15
--> RMS:                       2.007463E-19
--> Laplacian Error:           3.005804E-07
--> Scaled Laplacian Error:    3.535235E-10
--> Largest Absolute Residual: 2.784275E-08
***********************************************

--> Getting Network Weights and Gradients
 
--> Network Weights and Gradients:
***********************************************
w[0]=-1.491785 g[0]=-2.611079E-08
w[1]=-1.491785 g[1]=-2.611079E-08
w[2]=-1.491785 g[2]=-2.611079E-08
w[3]=1.616918 g[3]=6.182035E-08
w[4]=1.616918 g[4]=6.182035E-08
w[5]=1.616918 g[5]=6.182035E-08
w[6]=4.725622 g[6]=-5.273856E-08
w[7]=4.725622 g[7]=-5.273856E-08
w[8]=4.725622 g[8]=-5.273856E-08
w[9]=6.217407 g[9]=-8.733E-10
w[10]=6.217407 g[10]=-8.733E-10
w[11]=6.217407 g[11]=-8.733E-10
w[12]=1.072258 g[12]=-1.690978E-07
w[13]=1.072258 g[13]=-1.690978E-07
w[14]=1.072258 g[14]=-1.690978E-07
w[15]=3.850755 g[15]=-1.7029E-08
w[16]=3.850755 g[16]=-1.7029E-08
w[17]=3.850755 g[17]=-1.7029E-08
w[18]=2.411725 g[18]=-1.588144E-08
***********************************************

--> Saving Trained Network into FeedForwardNetworkEx1.ser
--> Saving xData into FeedForwardxDataEx1.ser
--> Saving yData into FeedForwardyDataEx1.ser
--> Saving Network Trainer into FeedForwardTrainerEx1.ser

Link to C# source.