feat(crossEntropy): crossEntropy cost function
This commit is contained in:
parent
49606406d1
commit
493a20eb0a
BIN
examples/xor
BIN
examples/xor
Binary file not shown.
@ -7,20 +7,27 @@ module Main where
|
|||||||
main = do
|
main = do
|
||||||
let learning_rate = 0.5
|
let learning_rate = 0.5
|
||||||
(iterations, epochs) = (2, 1000)
|
(iterations, epochs) = (2, 1000)
|
||||||
rnetwork = randomNetwork 0 2 [8] 1 -- two inputs, 8 nodes in a single hidden layer, 1 output
|
a = (logistic, logistic')
|
||||||
|
rnetwork = randomNetwork 0 2 [(8, a)] (1, a) -- two inputs, 8 nodes in a single hidden layer, 1 output
|
||||||
|
|
||||||
inputs = [vector [0, 1], vector [1, 0], vector [1, 1], vector [0, 0]]
|
inputs = [vector [0, 1], vector [1, 0], vector [1, 1], vector [0, 0]]
|
||||||
labels = [vector [1], vector [1], vector [0], vector [0]]
|
labels = [vector [1], vector [1], vector [0], vector [0]]
|
||||||
|
|
||||||
|
initial_cost = zipWith crossEntropy (map (`forward` rnetwork) inputs) labels
|
||||||
|
|
||||||
network = session inputs rnetwork labels learning_rate (iterations, epochs)
|
network = session inputs rnetwork labels learning_rate (iterations, epochs)
|
||||||
results = map (`forward` network) inputs
|
results = map (`forward` network) inputs
|
||||||
rounded = map (map round . toList) results
|
rounded = map (map round . toList) results
|
||||||
|
|
||||||
|
cost = zipWith crossEntropy (map (`forward` network) inputs) labels
|
||||||
|
|
||||||
putStrLn "parameters: "
|
putStrLn "parameters: "
|
||||||
putStrLn $ "- inputs: " ++ show inputs
|
putStrLn $ "- inputs: " ++ show inputs
|
||||||
putStrLn $ "- labels: " ++ show labels
|
putStrLn $ "- labels: " ++ show labels
|
||||||
putStrLn $ "- learning rate: " ++ show learning_rate
|
putStrLn $ "- learning rate: " ++ show learning_rate
|
||||||
putStrLn $ "- iterations/epochs: " ++ show (iterations, epochs)
|
putStrLn $ "- iterations/epochs: " ++ show (iterations, epochs)
|
||||||
|
putStrLn $ "- initial cost (cross-entropy): " ++ show initial_cost
|
||||||
putStrLn "results: "
|
putStrLn "results: "
|
||||||
putStrLn $ "- actual result: " ++ show results
|
putStrLn $ "- actual result: " ++ show results
|
||||||
putStrLn $ "- rounded result: " ++ show rounded
|
putStrLn $ "- rounded result: " ++ show rounded
|
||||||
|
putStrLn $ "- cost (cross-entropy): " ++ show cost
|
||||||
|
@ -19,6 +19,7 @@ library
|
|||||||
build-depends: base >= 4.7 && < 5
|
build-depends: base >= 4.7 && < 5
|
||||||
, hmatrix
|
, hmatrix
|
||||||
, random
|
, random
|
||||||
|
, deepseq
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable sibe-exe
|
executable sibe-exe
|
||||||
|
95
src/Sibe.hs
95
src/Sibe.hs
@ -8,49 +8,84 @@ module Sibe
|
|||||||
Layer,
|
Layer,
|
||||||
Input,
|
Input,
|
||||||
Output,
|
Output,
|
||||||
|
Activation,
|
||||||
forward,
|
forward,
|
||||||
randomLayer,
|
randomLayer,
|
||||||
randomNetwork,
|
randomNetwork,
|
||||||
|
saveNetwork,
|
||||||
|
loadNetwork,
|
||||||
train,
|
train,
|
||||||
session,
|
session,
|
||||||
shuffle,
|
shuffle,
|
||||||
|
logistic,
|
||||||
|
logistic',
|
||||||
|
crossEntropy,
|
||||||
|
genSeed,
|
||||||
|
replaceVector
|
||||||
) where
|
) where
|
||||||
import Numeric.LinearAlgebra
|
import Numeric.LinearAlgebra
|
||||||
import System.Random
|
import System.Random
|
||||||
import Debug.Trace
|
import Debug.Trace
|
||||||
import Data.List (foldl', sortBy)
|
import Data.List (foldl', sortBy)
|
||||||
|
import System.IO
|
||||||
|
import Control.DeepSeq
|
||||||
|
|
||||||
type LearningRate = Double
|
type LearningRate = Double
|
||||||
type Input = Vector Double
|
type Input = Vector Double
|
||||||
type Output = Vector Double
|
type Output = Vector Double
|
||||||
|
type Activation = (Vector Double -> Vector Double, Vector Double -> Vector Double)
|
||||||
|
|
||||||
data Layer = L { biases :: !(Vector Double)
|
data Layer = L { biases :: !(Vector Double)
|
||||||
, nodes :: !(Matrix Double)
|
, nodes :: !(Matrix Double)
|
||||||
} deriving (Show)
|
, activation :: Activation
|
||||||
|
}
|
||||||
|
|
||||||
|
instance Show Layer where
|
||||||
|
show (L biases nodes _) = "(" ++ show biases ++ "," ++ show nodes ++ ")"
|
||||||
|
|
||||||
data Network = O Layer
|
data Network = O Layer
|
||||||
| Layer :- Network
|
| Layer :- Network
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
infixr 5 :-
|
infixr 5 :-
|
||||||
|
|
||||||
|
saveNetwork :: Network -> String -> IO ()
|
||||||
|
saveNetwork network file =
|
||||||
|
writeFile file ((show . reverse) (gen network []))
|
||||||
|
where
|
||||||
|
gen (O (L biases nodes _)) list = (biases, nodes) : list
|
||||||
|
gen (L biases nodes _ :- n) list = gen n $ (biases, nodes) : list
|
||||||
|
|
||||||
|
loadNetwork :: [Activation] -> String -> IO Network
|
||||||
|
loadNetwork activations file = do
|
||||||
|
handle <- openFile file ReadMode
|
||||||
|
content <- hGetContents handle
|
||||||
|
let list = read content :: [(Vector Double, Matrix Double)]
|
||||||
|
network = gen list activations
|
||||||
|
content `deepseq` hClose handle
|
||||||
|
return network
|
||||||
|
|
||||||
|
where
|
||||||
|
gen [(biases, nodes)] [a] = O (L biases nodes a)
|
||||||
|
gen ((biases, nodes):hs) (a:as) = L biases nodes a :- gen hs as
|
||||||
|
|
||||||
runLayer :: Input -> Layer -> Output
|
runLayer :: Input -> Layer -> Output
|
||||||
runLayer input (L !biases !weights) = input <# weights + biases
|
runLayer input (L !biases !weights _) = input <# weights + biases
|
||||||
|
|
||||||
forward :: Input -> Network -> Output
|
forward :: Input -> Network -> Output
|
||||||
forward input (O l) = logistic $ runLayer input l
|
forward input (O l@(L _ _ (fn, _))) = fn $ runLayer input l
|
||||||
forward input (l :- n) = forward (logistic $ runLayer input l) n
|
forward input (l@(L _ _ (fn, _)) :- n) = forward ((fst . activation $ l) $ runLayer input l) n
|
||||||
|
|
||||||
randomLayer :: Seed -> (Int, Int) -> Layer
|
randomLayer :: Seed -> (Int, Int) -> Activation -> Layer
|
||||||
randomLayer seed (wr, wc) =
|
randomLayer seed (wr, wc) =
|
||||||
let weights = uniformSample seed wr $ replicate wc (-1, 1)
|
let weights = uniformSample seed wr $ replicate wc (-1, 1)
|
||||||
biases = randomVector seed Uniform wc * 2 - 1
|
biases = randomVector seed Uniform wc * 2 - 1
|
||||||
in L biases weights
|
in L biases weights
|
||||||
|
|
||||||
randomNetwork :: Seed -> Int -> [Int] -> Int -> Network
|
randomNetwork :: Seed -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
|
||||||
randomNetwork seed input [] output =
|
randomNetwork seed input [] (output, a) =
|
||||||
O $ randomLayer seed (input, output)
|
O $ randomLayer seed (input, output) a
|
||||||
randomNetwork seed input (h:hs) output =
|
randomNetwork seed input ((h, a):hs) output =
|
||||||
randomLayer seed (input, h) :-
|
randomLayer seed (input, h) a :-
|
||||||
randomNetwork (seed + 1) h hs output
|
randomNetwork (seed + 1) h hs output
|
||||||
|
|
||||||
logistic :: Vector Double -> Vector Double
|
logistic :: Vector Double -> Vector Double
|
||||||
@ -59,6 +94,14 @@ module Sibe
|
|||||||
logistic' :: Vector Double -> Vector Double
|
logistic' :: Vector Double -> Vector Double
|
||||||
logistic' x = logistic x * (1 - logistic x)
|
logistic' x = logistic x * (1 - logistic x)
|
||||||
|
|
||||||
|
crossEntropy :: Output -> Output -> Double
|
||||||
|
crossEntropy output target =
|
||||||
|
let pairs = zip (toList output) (toList target)
|
||||||
|
n = fromIntegral (length pairs)
|
||||||
|
in (-1 / n) * sum (map f pairs)
|
||||||
|
where
|
||||||
|
f (a, y) = y * log a + (1 - y) * log (1 - a)
|
||||||
|
|
||||||
train :: Input
|
train :: Input
|
||||||
-> Network
|
-> Network
|
||||||
-> Output -- target
|
-> Output -- target
|
||||||
@ -67,30 +110,31 @@ module Sibe
|
|||||||
train input network target alpha = fst $ run input network
|
train input network target alpha = fst $ run input network
|
||||||
where
|
where
|
||||||
run :: Input -> Network -> (Network, Vector Double)
|
run :: Input -> Network -> (Network, Vector Double)
|
||||||
run input (O l@(L biases weights)) =
|
run input (O l@(L biases weights (fn, fn'))) =
|
||||||
let y = runLayer input l
|
let y = runLayer input l
|
||||||
o = logistic y
|
o = fn y
|
||||||
delta = o - target
|
delta = o - target
|
||||||
de = delta * logistic' y
|
-- de = delta * fn' y -- quadratic cost
|
||||||
|
de = delta -- cross entropy cost
|
||||||
|
|
||||||
biases' = biases - scale alpha de
|
biases' = biases - scale alpha de
|
||||||
weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
|
weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
|
||||||
layer = L biases' weights' -- updated layer
|
layer = L biases' weights' (fn, fn') -- updated layer
|
||||||
|
|
||||||
pass = weights #> de
|
pass = weights #> de
|
||||||
-- pass = weights #> de
|
-- pass = weights #> de
|
||||||
|
|
||||||
in (O layer, pass)
|
in (O layer, pass)
|
||||||
run input (l@(L biases weights) :- n) =
|
run input (l@(L biases weights (fn, fn')) :- n) =
|
||||||
let y = runLayer input l
|
let y = runLayer input l
|
||||||
o = logistic y
|
o = fn y
|
||||||
(n', delta) = run o n
|
(n', delta) = run o n
|
||||||
|
|
||||||
de = delta * logistic' y
|
de = delta * fn' y -- quadratic cost
|
||||||
|
|
||||||
biases' = biases - scale alpha de
|
biases' = biases - scale alpha de
|
||||||
weights' = weights - scale alpha (input `outer` de)
|
weights' = weights - scale alpha (input `outer` de)
|
||||||
layer = L biases' weights'
|
layer = L biases' weights' (fn, fn')
|
||||||
|
|
||||||
pass = weights #> de
|
pass = weights #> de
|
||||||
-- pass = weights #> de
|
-- pass = weights #> de
|
||||||
@ -115,3 +159,18 @@ module Sibe
|
|||||||
in map snd $ sortBy (\x y -> fst x) (zip ords list)
|
in map snd $ sortBy (\x y -> fst x) (zip ords list)
|
||||||
where ord x | x == 0 = LT
|
where ord x | x == 0 = LT
|
||||||
| x == 1 = GT
|
| x == 1 = GT
|
||||||
|
|
||||||
|
genSeed :: IO Seed
|
||||||
|
genSeed = do
|
||||||
|
(seed, _) <- random <$> newStdGen :: IO (Int, StdGen)
|
||||||
|
return seed
|
||||||
|
|
||||||
|
replaceVector :: Vector Double -> Int -> Double -> Vector Double
|
||||||
|
replaceVector vec index value =
|
||||||
|
let list = toList vec
|
||||||
|
in fromList $ rrow index list
|
||||||
|
where
|
||||||
|
rrow index [] = []
|
||||||
|
rrow index (x:xs)
|
||||||
|
| index == index = value:xs
|
||||||
|
| otherwise = x : rrow (index + 1) xs
|
||||||
|
Loading…
Reference in New Issue
Block a user