feat(crossEntropy): crossEntropy cost function
This commit is contained in:
		
							
								
								
									
										
											BIN
										
									
								
								examples/xor
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								examples/xor
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -7,20 +7,27 @@ module Main where
 | 
			
		||||
  main = do
 | 
			
		||||
    let learning_rate = 0.5
 | 
			
		||||
        (iterations, epochs) = (2, 1000)
 | 
			
		||||
        rnetwork = randomNetwork 0 2 [8] 1 -- two inputs, 8 nodes in a single hidden layer, 1 output
 | 
			
		||||
        a = (logistic, logistic')
 | 
			
		||||
        rnetwork = randomNetwork 0 2 [(8, a)] (1, a) -- two inputs, 8 nodes in a single hidden layer, 1 output
 | 
			
		||||
 | 
			
		||||
        inputs = [vector [0, 1], vector [1, 0], vector [1, 1], vector [0, 0]]
 | 
			
		||||
        labels = [vector [1], vector [1], vector [0], vector [0]]
 | 
			
		||||
 | 
			
		||||
        initial_cost = zipWith crossEntropy (map (`forward` rnetwork) inputs) labels
 | 
			
		||||
 | 
			
		||||
        network = session inputs rnetwork labels learning_rate (iterations, epochs)
 | 
			
		||||
        results = map (`forward` network) inputs
 | 
			
		||||
        rounded = map (map round . toList) results
 | 
			
		||||
 | 
			
		||||
        cost = zipWith crossEntropy (map (`forward` network) inputs) labels
 | 
			
		||||
 | 
			
		||||
    putStrLn "parameters: "
 | 
			
		||||
    putStrLn $ "- inputs: " ++ show inputs
 | 
			
		||||
    putStrLn $ "- labels: " ++ show labels
 | 
			
		||||
    putStrLn $ "- learning rate: " ++ show learning_rate
 | 
			
		||||
    putStrLn $ "- iterations/epochs: " ++ show (iterations, epochs)
 | 
			
		||||
    putStrLn $ "- initial cost (cross-entropy): " ++ show initial_cost
 | 
			
		||||
    putStrLn "results: "
 | 
			
		||||
    putStrLn $ "- actual result: " ++ show results
 | 
			
		||||
    putStrLn $ "- rounded result: " ++ show rounded
 | 
			
		||||
    putStrLn $ "- cost (cross-entropy): " ++ show cost
 | 
			
		||||
 
 | 
			
		||||
@@ -19,6 +19,7 @@ library
 | 
			
		||||
  build-depends:       base >= 4.7 && < 5
 | 
			
		||||
                     , hmatrix
 | 
			
		||||
                     , random
 | 
			
		||||
                     , deepseq
 | 
			
		||||
  default-language:    Haskell2010
 | 
			
		||||
 | 
			
		||||
executable sibe-exe
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										99
									
								
								src/Sibe.hs
									
									
									
									
									
								
							
							
						
						
									
										99
									
								
								src/Sibe.hs
									
									
									
									
									
								
							@@ -8,49 +8,84 @@ module Sibe
 | 
			
		||||
     Layer,
 | 
			
		||||
     Input,
 | 
			
		||||
     Output,
 | 
			
		||||
     Activation,
 | 
			
		||||
     forward,
 | 
			
		||||
     randomLayer,
 | 
			
		||||
     randomNetwork,
 | 
			
		||||
     saveNetwork,
 | 
			
		||||
     loadNetwork,
 | 
			
		||||
     train,
 | 
			
		||||
     session,
 | 
			
		||||
     shuffle,
 | 
			
		||||
     logistic,
 | 
			
		||||
     logistic',
 | 
			
		||||
     crossEntropy,
 | 
			
		||||
     genSeed,
 | 
			
		||||
     replaceVector
 | 
			
		||||
    ) where
 | 
			
		||||
      import Numeric.LinearAlgebra
 | 
			
		||||
      import System.Random
 | 
			
		||||
      import Debug.Trace
 | 
			
		||||
      import Data.List (foldl', sortBy)
 | 
			
		||||
      import System.IO
 | 
			
		||||
      import Control.DeepSeq
 | 
			
		||||
 | 
			
		||||
      type LearningRate = Double
 | 
			
		||||
      type Input = Vector Double
 | 
			
		||||
      type Output = Vector Double
 | 
			
		||||
      type Activation = (Vector Double -> Vector Double, Vector Double -> Vector Double)
 | 
			
		||||
 | 
			
		||||
      data Layer = L { biases :: !(Vector Double)
 | 
			
		||||
                     , nodes  :: !(Matrix Double)
 | 
			
		||||
                     } deriving (Show)
 | 
			
		||||
      data Layer = L { biases     :: !(Vector Double)
 | 
			
		||||
                     , nodes      :: !(Matrix Double)
 | 
			
		||||
                     , activation :: Activation
 | 
			
		||||
                     }
 | 
			
		||||
 | 
			
		||||
      instance Show Layer where
 | 
			
		||||
        show (L biases nodes _) = "(" ++ show biases ++ "," ++ show nodes ++ ")"
 | 
			
		||||
 | 
			
		||||
      data Network = O Layer
 | 
			
		||||
                   | Layer :- Network
 | 
			
		||||
                   deriving (Show)
 | 
			
		||||
      infixr 5 :-
 | 
			
		||||
 | 
			
		||||
      saveNetwork :: Network -> String -> IO ()
 | 
			
		||||
      saveNetwork network file =
 | 
			
		||||
        writeFile file ((show . reverse) (gen network []))
 | 
			
		||||
        where
 | 
			
		||||
          gen (O (L biases nodes _)) list = (biases, nodes) : list
 | 
			
		||||
          gen (L biases nodes _ :- n) list = gen n $ (biases, nodes) : list
 | 
			
		||||
 | 
			
		||||
      loadNetwork :: [Activation] -> String -> IO Network
 | 
			
		||||
      loadNetwork activations file = do
 | 
			
		||||
        handle <- openFile file ReadMode
 | 
			
		||||
        content <- hGetContents handle
 | 
			
		||||
        let list = read content :: [(Vector Double, Matrix Double)]
 | 
			
		||||
            network = gen list activations
 | 
			
		||||
        content `deepseq` hClose handle
 | 
			
		||||
        return network
 | 
			
		||||
 | 
			
		||||
        where
 | 
			
		||||
          gen [(biases, nodes)] [a] = O (L biases nodes a)
 | 
			
		||||
          gen ((biases, nodes):hs) (a:as) = L biases nodes a :- gen hs as
 | 
			
		||||
 | 
			
		||||
      runLayer :: Input -> Layer -> Output
 | 
			
		||||
      runLayer input (L !biases !weights) = input <# weights + biases
 | 
			
		||||
      runLayer input (L !biases !weights _) = input <# weights + biases
 | 
			
		||||
 | 
			
		||||
      forward :: Input -> Network -> Output
 | 
			
		||||
      forward input (O l) = logistic $ runLayer input l
 | 
			
		||||
      forward input (l :- n) = forward (logistic $ runLayer input l) n
 | 
			
		||||
      forward input (O l@(L _ _ (fn, _))) = fn $ runLayer input l
 | 
			
		||||
      forward input (l@(L _ _ (fn, _)) :- n) = forward ((fst . activation $ l) $ runLayer input l) n
 | 
			
		||||
 | 
			
		||||
      randomLayer :: Seed -> (Int, Int) -> Layer
 | 
			
		||||
      randomLayer :: Seed -> (Int, Int) -> Activation -> Layer
 | 
			
		||||
      randomLayer seed (wr, wc) =
 | 
			
		||||
        let weights = uniformSample seed wr $ replicate wc (-1, 1)
 | 
			
		||||
            biases  = randomVector seed Uniform wc * 2 - 1
 | 
			
		||||
        in L biases weights
 | 
			
		||||
 | 
			
		||||
      randomNetwork :: Seed -> Int -> [Int] -> Int -> Network
 | 
			
		||||
      randomNetwork seed input [] output =
 | 
			
		||||
        O $ randomLayer seed (input, output)
 | 
			
		||||
      randomNetwork seed input (h:hs) output =
 | 
			
		||||
        randomLayer seed (input, h) :-
 | 
			
		||||
      randomNetwork :: Seed -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
 | 
			
		||||
      randomNetwork seed input [] (output, a) =
 | 
			
		||||
        O $ randomLayer seed (input, output) a
 | 
			
		||||
      randomNetwork seed input ((h, a):hs) output =
 | 
			
		||||
        randomLayer seed (input, h) a :-
 | 
			
		||||
        randomNetwork (seed + 1) h hs output
 | 
			
		||||
 | 
			
		||||
      logistic :: Vector Double -> Vector Double
 | 
			
		||||
@@ -59,6 +94,14 @@ module Sibe
 | 
			
		||||
      logistic' :: Vector Double -> Vector Double
 | 
			
		||||
      logistic' x = logistic x * (1 - logistic x)
 | 
			
		||||
 | 
			
		||||
      crossEntropy :: Output -> Output -> Double
 | 
			
		||||
      crossEntropy output target =
 | 
			
		||||
        let pairs = zip (toList output) (toList target)
 | 
			
		||||
            n = fromIntegral (length pairs)
 | 
			
		||||
        in (-1 / n) * sum (map f pairs)
 | 
			
		||||
        where
 | 
			
		||||
          f (a, y) = y * log a + (1 - y) * log (1 - a)
 | 
			
		||||
 | 
			
		||||
      train :: Input
 | 
			
		||||
            -> Network
 | 
			
		||||
            -> Output -- target
 | 
			
		||||
@@ -67,30 +110,31 @@ module Sibe
 | 
			
		||||
      train input network target alpha = fst $ run input network
 | 
			
		||||
        where
 | 
			
		||||
          run :: Input -> Network -> (Network, Vector Double)
 | 
			
		||||
          run input (O l@(L biases weights)) =
 | 
			
		||||
          run input (O l@(L biases weights (fn, fn'))) =
 | 
			
		||||
            let y = runLayer input l
 | 
			
		||||
                o = logistic y
 | 
			
		||||
                o = fn y
 | 
			
		||||
                delta = o - target
 | 
			
		||||
                de = delta * logistic' y
 | 
			
		||||
                -- de = delta * fn' y -- quadratic cost
 | 
			
		||||
                de = delta -- cross entropy cost
 | 
			
		||||
 | 
			
		||||
                biases'  = biases  - scale alpha de
 | 
			
		||||
                weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
 | 
			
		||||
                layer    = L biases' weights' -- updated layer
 | 
			
		||||
                layer    = L biases' weights' (fn, fn') -- updated layer
 | 
			
		||||
 | 
			
		||||
                pass = weights #> de
 | 
			
		||||
                -- pass = weights #> de
 | 
			
		||||
 | 
			
		||||
            in (O layer, pass)
 | 
			
		||||
          run input (l@(L biases weights) :- n) =
 | 
			
		||||
          run input (l@(L biases weights (fn, fn')) :- n) =
 | 
			
		||||
            let y = runLayer input l
 | 
			
		||||
                o = logistic y
 | 
			
		||||
                o = fn y
 | 
			
		||||
                (n', delta) = run o n
 | 
			
		||||
 | 
			
		||||
                de = delta * logistic' y
 | 
			
		||||
                de = delta * fn' y -- quadratic cost
 | 
			
		||||
 | 
			
		||||
                biases'  = biases  - scale alpha de
 | 
			
		||||
                weights' = weights - scale alpha (input `outer` de)
 | 
			
		||||
                layer = L biases' weights'
 | 
			
		||||
                layer = L biases' weights' (fn, fn')
 | 
			
		||||
 | 
			
		||||
                pass = weights #> de
 | 
			
		||||
                -- pass = weights #> de
 | 
			
		||||
@@ -115,3 +159,18 @@ module Sibe
 | 
			
		||||
        in map snd $ sortBy (\x y -> fst x) (zip ords list)
 | 
			
		||||
        where ord x | x == 0 = LT
 | 
			
		||||
                    | x == 1 = GT
 | 
			
		||||
 | 
			
		||||
      genSeed :: IO Seed
 | 
			
		||||
      genSeed = do
 | 
			
		||||
        (seed, _) <- random <$> newStdGen :: IO (Int, StdGen)
 | 
			
		||||
        return seed
 | 
			
		||||
 | 
			
		||||
      replaceVector :: Vector Double -> Int -> Double -> Vector Double
 | 
			
		||||
      replaceVector vec index value =
 | 
			
		||||
        let list = toList vec
 | 
			
		||||
        in fromList $ rrow index list
 | 
			
		||||
        where
 | 
			
		||||
          rrow index [] = []
 | 
			
		||||
          rrow index (x:xs)
 | 
			
		||||
            | index == index = value:xs
 | 
			
		||||
            | otherwise = x : rrow (index + 1) xs
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user