fix(crossEntropy): implement crossEntropy' to be used in output layer

fix(softmax'): softmax was not correct
2016-09-10 17:43:45 +04:30
parent c23fd14771
commit f379f208db
5 changed files with 20 additions and 18 deletions
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ A simple Machine Learning library.
 # neural network examples
 stack exec example-xor
 stack exec example-424
-# notMNIST dataset, achieves ~87% accuracy using exponential learning rate decay
+# notMNIST dataset, achieves ~87.5% accuracy after 9 epochs (2 minutes)
 stack exec example-notmnist

 # Naive Bayes document classifier, using Reuters dataset
--- a/examples/notmnist.hs
+++ b/examples/notmnist.hs
@@ -21,11 +21,12 @@ module Main where
  import Graphics.Rendering.Chart.Backend.Cairo

  main = do
+    -- random seed, you might comment this line to get real random results
    setStdGen (mkStdGen 100)

    let a         = (sigmoid, sigmoid')
-        o         = (softmax, one)
-        rnetwork  = randomNetwork 0 (-1, 1) (28*28) [(100, a)] (10, a)
+        o         = (softmax, crossEntropy')
+        rnetwork  = randomNetwork 0 (-1, 1) (28*28) [(100, a)] (10, o)

    (inputs, labels) <- dataset

@@ -41,11 +42,11 @@ module Main where
        telabels = take tep . drop trp $ labels

    let session = def { learningRate = 0.5
-                      , batchSize = 32
-                      , epochs = 24
-                      , network = rnetwork
-                      , training = zip trinputs trlabels
-                      , test = zip teinputs telabels
+                      , batchSize    = 32
+                      , epochs       = 10
+                      , network      = rnetwork
+                      , training     = zip trinputs trlabels
+                      , test         = zip teinputs telabels
                      } :: Session

    let initialCost = crossEntropy session
--- a/notmnist.png
+++ b/notmnist.png
--- a/sgd.png
+++ b/sgd.png
--- a/src/Sibe.hs
+++ b/src/Sibe.hs
@@ -22,10 +22,10 @@ module Sibe
     sigmoid',
     softmax,
     softmax',
-     one,
     relu,
     relu',
     crossEntropy,
+     crossEntropy',
     genSeed,
     replaceVector,
     Session(..),
@@ -143,11 +143,10 @@ module Sibe
        where
          s = V.sum $ exp x

-      one :: a -> Double
-      one x = 1
-
      softmax' :: Vector Double -> Vector Double
-      softmax' x = softmax x * (1 - softmax x)
+      softmax' = cmap (\a -> sig a * (1 - sig a))
+        where
+          sig x = 1 / max (1 + exp (-x)) 1e-10

      relu :: Vector Double -> Vector Double
      relu = cmap (max 0.1)
@@ -165,11 +164,13 @@ module Sibe
            outputs = map (toList . (`forward` session)) inputs
            pairs = zip outputs labels
            n = genericLength pairs
-
        in sum (map set pairs) / n
        where
-          set (os, ls) = (-1 / genericLength os) * sum (zipWith (curry f) os ls)
-          f (a, y) = y * log (max 1e-10 a) + (1 - y) * log (max (1 - a) 1e-10)
+          set (os, ls) = (-1 / genericLength os) * sum (zipWith f os ls)
+          f a y = y * log (max 1e-10 a)
+
+      crossEntropy' :: Vector Double -> Vector Double
+      crossEntropy' x = 1 / fromIntegral (V.length x)

      train :: Input
            -> Network
@@ -182,9 +183,9 @@ module Sibe
          run input (O l@(Layer biases weights (fn, fn'))) =
            let y = runLayer input l
                o = fn y
-                delta = o - target
+                delta = o - target 
                de = delta * fn' y
-                -- de = delta -- cross entropy cost
+                -- de = delta / fromIntegral (V.length o) -- cross entropy cost

                biases'  = biases  - scale alpha de
                weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly