diff --git a/README.md b/README.md index cf2ce5e..6a87d73 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ A simple Machine Learning library. # neural network examples stack exec example-xor stack exec example-424 -# notMNIST dataset, achieves ~87% accuracy using exponential learning rate decay +# notMNIST dataset, achieves ~87.5% accuracy after 9 epochs (2 minutes) stack exec example-notmnist # Naive Bayes document classifier, using Reuters dataset diff --git a/examples/notmnist.hs b/examples/notmnist.hs index c14aea5..2628f16 100644 --- a/examples/notmnist.hs +++ b/examples/notmnist.hs @@ -21,11 +21,12 @@ module Main where import Graphics.Rendering.Chart.Backend.Cairo main = do + -- random seed, you might comment this line to get real random results setStdGen (mkStdGen 100) let a = (sigmoid, sigmoid') - o = (softmax, one) - rnetwork = randomNetwork 0 (-1, 1) (28*28) [(100, a)] (10, a) + o = (softmax, crossEntropy') + rnetwork = randomNetwork 0 (-1, 1) (28*28) [(100, a)] (10, o) (inputs, labels) <- dataset @@ -41,11 +42,11 @@ module Main where telabels = take tep . drop trp $ labels let session = def { learningRate = 0.5 - , batchSize = 32 - , epochs = 24 - , network = rnetwork - , training = zip trinputs trlabels - , test = zip teinputs telabels + , batchSize = 32 + , epochs = 10 + , network = rnetwork + , training = zip trinputs trlabels + , test = zip teinputs telabels } :: Session let initialCost = crossEntropy session diff --git a/notmnist.png b/notmnist.png index 27f9c86..ff73904 100644 Binary files a/notmnist.png and b/notmnist.png differ diff --git a/sgd.png b/sgd.png index 2e4e6de..1f69105 100644 Binary files a/sgd.png and b/sgd.png differ diff --git a/src/Sibe.hs b/src/Sibe.hs index 46a0673..521ba79 100644 --- a/src/Sibe.hs +++ b/src/Sibe.hs @@ -22,10 +22,10 @@ module Sibe sigmoid', softmax, softmax', - one, relu, relu', crossEntropy, + crossEntropy', genSeed, replaceVector, Session(..), @@ -143,11 +143,10 @@ module Sibe where s = V.sum $ exp x - one :: a -> Double - one x = 1 - softmax' :: Vector Double -> Vector Double - softmax' x = softmax x * (1 - softmax x) + softmax' = cmap (\a -> sig a * (1 - sig a)) + where + sig x = 1 / max (1 + exp (-x)) 1e-10 relu :: Vector Double -> Vector Double relu = cmap (max 0.1) @@ -165,11 +164,13 @@ module Sibe outputs = map (toList . (`forward` session)) inputs pairs = zip outputs labels n = genericLength pairs - in sum (map set pairs) / n where - set (os, ls) = (-1 / genericLength os) * sum (zipWith (curry f) os ls) - f (a, y) = y * log (max 1e-10 a) + (1 - y) * log (max (1 - a) 1e-10) + set (os, ls) = (-1 / genericLength os) * sum (zipWith f os ls) + f a y = y * log (max 1e-10 a) + + crossEntropy' :: Vector Double -> Vector Double + crossEntropy' x = 1 / fromIntegral (V.length x) train :: Input -> Network @@ -182,9 +183,9 @@ module Sibe run input (O l@(Layer biases weights (fn, fn'))) = let y = runLayer input l o = fn y - delta = o - target + delta = o - target de = delta * fn' y - -- de = delta -- cross entropy cost + -- de = delta / fromIntegral (V.length o) -- cross entropy cost biases' = biases - scale alpha de weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly