sibe/src/Sibe.hs

{-# LANGUAGE GADTs #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}

module Sibe
    (Network(..),
     Layer(..),
     Input,
     Output,
     Activation,
     forward,
     forward',
     runLayer,
     runLayer',
     randomLayer,
     randomNetwork,
     buildNetwork,
     saveNetwork,
     loadNetwork,
     train,
     gd,
     sgd,
     run,
     sigmoid,
     sigmoid',
     softmax,
     softmax',
     sampledSoftmax,
     relu,
     relu',
     crossEntropy,
     crossEntropy',
     genSeed,
     replaceVector,
     Session(..),
     accuracy,
     learningRateDecay,
     ignoreBiases,
     one
    ) where
      import Numeric.LinearAlgebra
      import System.Random
      import System.Random.Shuffle
      import Debug.Trace
      import Data.List (foldl', sortBy, genericLength, permutations)
      import System.IO
      import Control.DeepSeq
      import Control.Monad
      import qualified Data.Vector.Storable as V
      import Data.Default.Class
      import System.Exit

      import qualified Graphics.Rendering.Chart.Easy as Chart
      import Graphics.Rendering.Chart.Backend.Cairo

      type LearningRate = Double
      type Input = Vector Double
      type Output = Vector Double
      type Activation = (Vector Double -> Vector Double, Vector Double -> Vector Double)

      data Layer = Layer { biases     :: !(Vector Double)
                         , nodes      :: !(Matrix Double)
                         , activation :: Activation
                         }

      instance Show Layer where
        show (Layer biases nodes _) = "(" ++ show biases ++ "," ++ show nodes ++ ")"

      data Network = O Layer
                   | Layer :- Network

      instance Show Network where
        show (Layer biases nodes _ :- n) =
          (show . length $ toLists nodes) ++ "x" ++ (show . length . head . toLists $ nodes) ++ " " ++ (show . length . toList $ biases) ++ " :- " ++ show n
        show (O (Layer biases nodes _)) =
          (show . length $ toLists nodes) ++ "x" ++ (show . length . head . toLists $ nodes) ++ " " ++ (show . length . toList $ biases)

      infixr 5 :-

      data Session = Session  { network      :: Network
                              , training     :: [(Vector Double, Vector Double)]
                              , test         :: [(Vector Double, Vector Double)]
                              , learningRate :: Double
                              , epochs       :: Int
                              , epoch        :: Int
                              , batchSize    :: Int
                              , chart        :: [(Int, Double, Double)]
                              , drawChart    :: Bool
                              , chartName    :: String
                              , momentum     :: Double
                              , debug        :: Bool
                              } deriving (Show)

      emptyNetwork = randomNetwork 0 (0, 0) 0 [] (0, (id, id))
      instance Default Session where
        def = Session { network      = seq (die "You have not specified a network parameter") emptyNetwork
                      , training     = seq (die "You have not specified training data") []
                      , test         = seq (die "You have not specified test data") []
                      , learningRate = 0.5
                      , epochs       = 35
                      , epoch        = 0
                      , batchSize    = 0
                      , chart        = []
                      , drawChart    = False
                      , chartName    = "chart.png"
                      , momentum     = 0
                      , debug        = False
                      }

      saveNetwork :: Network -> String -> IO ()
      saveNetwork network file =
        writeFile file ((show . reverse) (gen network []))
        where
          gen (O (Layer biases nodes _)) list = (biases, nodes) : list
          gen (Layer biases nodes _ :- n) list = gen n $ (biases, nodes) : list

      loadNetwork :: [Activation] -> String -> IO Network
      loadNetwork activations file = do
        handle <- openFile file ReadMode
        content <- hGetContents handle
        let list = read content :: [(Vector Double, Matrix Double)]
            network = gen list activations
        content `deepseq` hClose handle
        return network

        where
          gen [(biases, nodes)] [a] = O (Layer biases nodes a)
          gen ((biases, nodes):hs) (a:as) = Layer biases nodes a :- gen hs as

      runLayer :: Input -> Layer -> Output
      runLayer input (Layer !biases !weights _) = input <# weights + biases

      runLayer' :: Input -> Layer -> Output
      runLayer' input (Layer !biases !weights _) = input <# weights

      forward :: Input -> Session -> Output
      forward input session = compute input (network session)
        where
          compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l
          compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer input l) n

      forward' :: Input -> Session -> Output
      forward' input session = compute input (network session)
        where
          compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer' input l
          compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer' input l) n

      randomLayer :: Seed -> (Int, Int) -> (Double, Double) -> Activation -> Layer
      randomLayer seed (wr, wc) (l, u) =
        let weights = uniformSample seed wr $ replicate wc (l, u)
            biases  = randomVector seed Uniform wc * realToFrac u - realToFrac l
        in Layer biases weights

      randomNetwork :: Seed -> (Double, Double) -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
      randomNetwork seed bound input [] (output, a) =
        O $ randomLayer seed (input, output) bound a
      randomNetwork seed bound input ((h, a):hs) output =
        randomLayer seed (input, h) bound a :-
        randomNetwork (seed + 1) bound h hs output

      buildNetwork :: Seed -> (Double, Double) -> Int -> [(Int, Int, Activation)] -> (Int, Int, Activation) -> Network
      buildNetwork seed bound input [] (outputRows, outputColumns, a) =
        O $ randomLayer seed (input, outputColumns) bound a
      buildNetwork seed bound input ((rows, columns, a):hs) output =
        randomLayer seed (input, columns) bound a :-
        buildNetwork (seed + 1) bound columns hs output

      sigmoid :: Vector Double -> Vector Double
      sigmoid x = 1 / max (1 + exp (-x)) 1e-10

      sigmoid' :: Vector Double -> Vector Double
      sigmoid' x = sigmoid x * (1 - sigmoid x)

      softmax :: Vector Double -> Vector Double
      softmax x = cmap (\a -> exp a / s) x
        where
          s = V.sum $ exp x

      softmax' :: Vector Double -> Vector Double
      softmax' = cmap (\a -> sig a * (1 - sig a))
        where
          sig x = 1 / max (1 + exp (-x)) 1e-10

      -- used for negative sampling
      sampledSoftmax :: Int -> Vector Double -> Vector Double
      sampledSoftmax n x = cmap (\a -> exp a / s) x
        where
          s = V.sum . exp $ V.take n x

      relu :: Vector Double -> Vector Double
      relu = cmap (max 0.1)

      relu' :: Vector Double -> Vector Double
      relu' = cmap dev
        where dev x
                | x < 0 = 0
                | otherwise = 1

      crossEntropy :: Session -> Double
      crossEntropy session =
        let inputs = map fst (test session)
            labels = map (toList . snd) (test session)
            outputs = map (toList . (`forward` session)) inputs
            pairs = zip outputs labels
            n = genericLength pairs
        in sum (map set pairs) / n
        where
          set (os, ls) = (-1 / genericLength os) * sum (zipWith f os ls)
          f a y = y * log (max 1e-10 a)

      crossEntropy' :: Vector Double -> Vector Double
      crossEntropy' x = 1 / fromIntegral (V.length x)

      one :: Vector Double -> Vector Double
      one v = vector $ replicate (V.length v) 1

      train :: Input
            -> Network
            -> Output -- target
            -> Double -- learning rate
            -> Network -- network's output
      train input network target alpha = fst $ run input network
        where
          run :: Input -> Network -> (Network, Vector Double)
          run input (O l@(Layer biases weights (fn, fn'))) =
            let y = runLayer input l
                o = fn y
                delta = o - target
                de = delta * fn' y

                biases'  = biases  - scale alpha de
                weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
                layer    = Layer biases' weights' (fn, fn') -- updated layer

                pass = weights #> de

            in (O layer, pass)
          run input (l@(Layer biases weights (fn, fn')) :- n) =
            let y = runLayer input l
                o = fn y
                (n', delta) = run o n

                de = delta * fn' y

                biases'  = biases  - scale alpha de
                weights' = weights - scale alpha (input `outer` de)
                layer = Layer biases' weights' (fn, fn')

                pass = weights #> de
            in (layer :- n', pass)

      gd :: Session -> IO Session
      gd session = do
        seed <- newStdGen

        let pairs = training session
            alpha = learningRate session
            net = network session

        let n = length pairs

        shuffled <- shuffleM pairs

        let newnet = foldl' (\n (input, label) -> train input n label alpha) net pairs
            cost = crossEntropy (session { network = newnet })

        let el = map (\(e, l, _) -> (e, l)) (chart session)
            ea = map (\(e, _, a) -> (e, a)) (chart session)

        when (drawChart session) $ do
          toFile Chart.def (chartName session) $ do
            Chart.layoutlr_title Chart..= "loss over time"
            Chart.plotLeft (Chart.line "loss" [el])
            Chart.plotRight (Chart.line "learningRate" [ea])

        return session { network = newnet
                       , epoch = epoch session + 1
                       , chart = (epoch session, cost, learningRate session):chart session
                       }

      sgd :: Session -> IO Session
      sgd session = do
        seed <- newStdGen

        let pairs = training session
            bsize = batchSize session
            alpha = learningRate session
            net = network session

        let n = length pairs
            iterations = n `div` bsize - 1

        shuffled <- shuffleM pairs

        let iter net i =
              let n = length pairs
                  batch = take bsize . drop (i * bsize) $ shuffled
                  batchInputs = map fst batch
                  batchLabels = map snd batch
                  batchPair = zip batchInputs batchLabels
              in foldl' (\n (input, label) -> train input n label alpha) net batchPair

        let newnet = foldl' iter net [0..iterations]
            cost = crossEntropy (session { network = newnet })

        let el = map (\(e, l, _) -> (e, l)) (chart session)
            ea = map (\(e, _, a) -> (e, a)) (chart session)

        when (drawChart session) $ do
          toFile Chart.def (chartName session) $ do
            Chart.layoutlr_title Chart..= "loss over time"
            Chart.plotLeft (Chart.line "loss" [el])
            Chart.plotRight (Chart.line "learningRate" [ea])

        return session { network = newnet
                       , epoch = epoch session + 1
                       , chart = (epoch session, cost, learningRate session):chart session
                       }


      accuracy :: Session -> Double
      accuracy session =
        let inputs = map fst (test session)
            labels = map snd (test session)

            results = map (`forward` session) inputs
            rounded = map (map round . toList) results

            equals = zipWith (==) rounded (map (map round . toList) labels)
        in genericLength (filter (== True) equals) / genericLength inputs

      learningRateDecay :: (Double, Double) -> Session -> Session
      learningRateDecay (step, m) session =
        session { learningRate = max m $ learningRate session / step }

      ignoreBiases :: Session -> Session
      ignoreBiases session =
        session { network = rmbias (network session) }
        where
          rmbias (O (Layer biases nodes a)) = O $ Layer (biases * 0) nodes a
          rmbias ((Layer biases nodes a) :- n) = Layer (biases * 0) nodes a :- rmbias n

      run :: (Session -> IO Session)
          ->  Session -> IO Session
      run fn session = foldM (\s i -> fn s) session [0..epochs session]

      factorial :: Int -> Int
      factorial 0 = 1
      factorial x = x * factorial (x - 1)

      genSeed :: IO Seed
      genSeed = do
        (seed, _) <- random <$> newStdGen :: IO (Int, StdGen)
        return seed

      replaceVector :: Vector Double -> Int -> Double -> Vector Double
      replaceVector vec index value =
        let list = toList vec
        in fromList $ take index list ++ value : drop (index + 1) list

      clip :: Double -> (Double, Double) -> Double
      clip x (l, u) = min u (max l x)