fix(crossEntropy): implement crossEntropy' to be used in output layer
fix(softmax'): softmax was not correct
This commit is contained in:
		@@ -44,7 +44,7 @@ A simple Machine Learning library.
 | 
			
		||||
# neural network examples
 | 
			
		||||
stack exec example-xor
 | 
			
		||||
stack exec example-424
 | 
			
		||||
# notMNIST dataset, achieves ~87% accuracy using exponential learning rate decay
 | 
			
		||||
# notMNIST dataset, achieves ~87.5% accuracy after 9 epochs (2 minutes)
 | 
			
		||||
stack exec example-notmnist
 | 
			
		||||
 | 
			
		||||
# Naive Bayes document classifier, using Reuters dataset
 | 
			
		||||
 
 | 
			
		||||
@@ -21,11 +21,12 @@ module Main where
 | 
			
		||||
  import Graphics.Rendering.Chart.Backend.Cairo
 | 
			
		||||
 | 
			
		||||
  main = do
 | 
			
		||||
    -- random seed, you might comment this line to get real random results
 | 
			
		||||
    setStdGen (mkStdGen 100)
 | 
			
		||||
 | 
			
		||||
    let a         = (sigmoid, sigmoid')
 | 
			
		||||
        o         = (softmax, one)
 | 
			
		||||
        rnetwork  = randomNetwork 0 (-1, 1) (28*28) [(100, a)] (10, a)
 | 
			
		||||
        o         = (softmax, crossEntropy')
 | 
			
		||||
        rnetwork  = randomNetwork 0 (-1, 1) (28*28) [(100, a)] (10, o)
 | 
			
		||||
 | 
			
		||||
    (inputs, labels) <- dataset
 | 
			
		||||
 | 
			
		||||
@@ -41,11 +42,11 @@ module Main where
 | 
			
		||||
        telabels = take tep . drop trp $ labels
 | 
			
		||||
 | 
			
		||||
    let session = def { learningRate = 0.5
 | 
			
		||||
                      , batchSize = 32
 | 
			
		||||
                      , epochs = 24
 | 
			
		||||
                      , network = rnetwork
 | 
			
		||||
                      , training = zip trinputs trlabels
 | 
			
		||||
                      , test = zip teinputs telabels
 | 
			
		||||
                      , batchSize    = 32
 | 
			
		||||
                      , epochs       = 10
 | 
			
		||||
                      , network      = rnetwork
 | 
			
		||||
                      , training     = zip trinputs trlabels
 | 
			
		||||
                      , test         = zip teinputs telabels
 | 
			
		||||
                      } :: Session
 | 
			
		||||
 | 
			
		||||
    let initialCost = crossEntropy session
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								notmnist.png
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								notmnist.png
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 33 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								sgd.png
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								sgd.png
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 31 KiB  | 
							
								
								
									
										21
									
								
								src/Sibe.hs
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								src/Sibe.hs
									
									
									
									
									
								
							@@ -22,10 +22,10 @@ module Sibe
 | 
			
		||||
     sigmoid',
 | 
			
		||||
     softmax,
 | 
			
		||||
     softmax',
 | 
			
		||||
     one,
 | 
			
		||||
     relu,
 | 
			
		||||
     relu',
 | 
			
		||||
     crossEntropy,
 | 
			
		||||
     crossEntropy',
 | 
			
		||||
     genSeed,
 | 
			
		||||
     replaceVector,
 | 
			
		||||
     Session(..),
 | 
			
		||||
@@ -143,11 +143,10 @@ module Sibe
 | 
			
		||||
        where
 | 
			
		||||
          s = V.sum $ exp x
 | 
			
		||||
 | 
			
		||||
      one :: a -> Double
 | 
			
		||||
      one x = 1
 | 
			
		||||
 | 
			
		||||
      softmax' :: Vector Double -> Vector Double
 | 
			
		||||
      softmax' x = softmax x * (1 - softmax x)
 | 
			
		||||
      softmax' = cmap (\a -> sig a * (1 - sig a))
 | 
			
		||||
        where
 | 
			
		||||
          sig x = 1 / max (1 + exp (-x)) 1e-10
 | 
			
		||||
 | 
			
		||||
      relu :: Vector Double -> Vector Double
 | 
			
		||||
      relu = cmap (max 0.1)
 | 
			
		||||
@@ -165,11 +164,13 @@ module Sibe
 | 
			
		||||
            outputs = map (toList . (`forward` session)) inputs
 | 
			
		||||
            pairs = zip outputs labels
 | 
			
		||||
            n = genericLength pairs
 | 
			
		||||
 | 
			
		||||
        in sum (map set pairs) / n
 | 
			
		||||
        where
 | 
			
		||||
          set (os, ls) = (-1 / genericLength os) * sum (zipWith (curry f) os ls)
 | 
			
		||||
          f (a, y) = y * log (max 1e-10 a) + (1 - y) * log (max (1 - a) 1e-10)
 | 
			
		||||
          set (os, ls) = (-1 / genericLength os) * sum (zipWith f os ls)
 | 
			
		||||
          f a y = y * log (max 1e-10 a)
 | 
			
		||||
 | 
			
		||||
      crossEntropy' :: Vector Double -> Vector Double
 | 
			
		||||
      crossEntropy' x = 1 / fromIntegral (V.length x)
 | 
			
		||||
 | 
			
		||||
      train :: Input
 | 
			
		||||
            -> Network
 | 
			
		||||
@@ -182,9 +183,9 @@ module Sibe
 | 
			
		||||
          run input (O l@(Layer biases weights (fn, fn'))) =
 | 
			
		||||
            let y = runLayer input l
 | 
			
		||||
                o = fn y
 | 
			
		||||
                delta = o - target
 | 
			
		||||
                delta = o - target 
 | 
			
		||||
                de = delta * fn' y
 | 
			
		||||
                -- de = delta -- cross entropy cost
 | 
			
		||||
                -- de = delta / fromIntegral (V.length o) -- cross entropy cost
 | 
			
		||||
 | 
			
		||||
                biases'  = biases  - scale alpha de
 | 
			
		||||
                weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user