feat(notmnist): notmnist example using SGD + learning rate decay

2016-09-10 00:36:15 +04:30
parent ace0a18653
commit b26347e19f
21 changed files with 619 additions and 320 deletions
--- a/src/Sibe.hs
+++ b/src/Sibe.hs
@ -15,22 +15,37 @@ module Sibe
     saveNetwork,
     loadNetwork,
     train,
-     session,
-     shuffle,
+     gd,
+     sgd,
+     run,
     sigmoid,
     sigmoid',
+     softmax,
+     softmax',
+     one,
     relu,
     relu',
     crossEntropy,
     genSeed,
-     replaceVector
+     replaceVector,
+     Session(..),
+     accuracy,
+     learningRateDecay
    ) where
      import Numeric.LinearAlgebra
      import System.Random
+      import System.Random.Shuffle
      import Debug.Trace
-      import Data.List (foldl', sortBy)
+      import Data.List (foldl', sortBy, genericLength, permutations)
      import System.IO
      import Control.DeepSeq
+      import Control.Monad
+      import qualified Data.Vector.Storable as V
+      import Data.Default.Class
+      import System.Exit
+
+      import qualified Graphics.Rendering.Chart.Easy as Chart
+      import Graphics.Rendering.Chart.Backend.Cairo

      type LearningRate = Double
      type Input = Vector Double
@ -48,8 +63,33 @@ module Sibe
      data Network = O Layer
                   | Layer :- Network
                   deriving (Show)
+                   
      infixr 5 :-

+      data Session = Session  { network      :: Network
+                              , training     :: [(Vector Double, Vector Double)]
+                              , test         :: [(Vector Double, Vector Double)]
+                              , learningRate :: Double
+                              , epochs       :: Int
+                              , epoch        :: Int
+                              , batchSize    :: Int
+                              , chart        :: [(Int, Double, Double)]
+                              , momentum     :: Double
+                              }
+
+      emptyNetwork = randomNetwork 0 (0, 0) 0 [] (0, (id, id))
+      instance Default Session where
+        def = Session { network      = seq (die "You have not specified a network parameter") emptyNetwork
+                      , training     = seq (die "You have not specified training data") []
+                      , test         = seq (die "You have not specified test data") []
+                      , learningRate = 0.5
+                      , epochs       = 35
+                      , epoch        = 0
+                      , batchSize    = 0
+                      , chart        = []
+                      , momentum     = 0
+                      }
+
      saveNetwork :: Network -> String -> IO ()
      saveNetwork network file =
        writeFile file ((show . reverse) (gen network []))
@ -73,22 +113,24 @@ module Sibe
      runLayer :: Input -> Layer -> Output
      runLayer input (Layer !biases !weights _) = input <# weights + biases

-      forward :: Input -> Network -> Output
-      forward input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l
-      forward input (l@(Layer _ _ (fn, _)) :- n) = forward ((fst . activation $ l) $ runLayer input l) n
+      forward :: Input -> Session -> Output
+      forward input session = compute input (network session)
+        where
+          compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l
+          compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer input l) n

-      randomLayer :: Seed -> (Int, Int) -> Activation -> Layer
-      randomLayer seed (wr, wc) =
-        let weights = uniformSample seed wr $ replicate wc (-1, 1)
-            biases  = randomVector seed Uniform wc * 2 - 1
+      randomLayer :: Seed -> (Int, Int) -> (Double, Double) -> Activation -> Layer
+      randomLayer seed (wr, wc) (l, u) =
+        let weights = uniformSample seed wr $ replicate wc (l, u)
+            biases  = randomVector seed Uniform wc * realToFrac u - realToFrac l
        in Layer biases weights

-      randomNetwork :: Seed -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
-      randomNetwork seed input [] (output, a) =
-        O $ randomLayer seed (input, output) a
-      randomNetwork seed input ((h, a):hs) output =
-        randomLayer seed (input, h) a :-
-        randomNetwork (seed + 1) h hs output
+      randomNetwork :: Seed -> (Double, Double) -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
+      randomNetwork seed bound input [] (output, a) =
+        O $ randomLayer seed (input, output) bound a
+      randomNetwork seed bound input ((h, a):hs) output =
+        randomLayer seed (input, h) bound a :-
+        randomNetwork (seed + 1) bound h hs output

      sigmoid :: Vector Double -> Vector Double
      sigmoid x = 1 / max (1 + exp (-x)) 1e-10
@ -96,18 +138,37 @@ module Sibe
      sigmoid' :: Vector Double -> Vector Double
      sigmoid' x = sigmoid x * (1 - sigmoid x)

+      softmax :: Vector Double -> Vector Double
+      softmax x = cmap (\a -> exp a / s) x
+        where
+          s = V.sum $ exp x
+
+      one :: a -> Double
+      one x = 1
+
+      softmax' :: Vector Double -> Vector Double
+      softmax' x = softmax x * (1 - softmax x)
+
      relu :: Vector Double -> Vector Double
-      relu x = log (max (1 + exp x) 1e-10)
+      relu = cmap (max 0.1)

      relu' :: Vector Double -> Vector Double
-      relu' = sigmoid
+      relu' = cmap dev
+        where dev x
+                | x < 0 = 0
+                | otherwise = 1

-      crossEntropy :: Output -> Output -> Double
-      crossEntropy output target =
-        let pairs = zip (toList output) (toList target)
-            n = fromIntegral (length pairs)
-        in (-1 / n) * sum (map f pairs)
+      crossEntropy :: Session -> Double
+      crossEntropy session =
+        let inputs = map fst (test session)
+            labels = map (toList . snd) (test session)
+            outputs = map (toList . (`forward` session)) inputs
+            pairs = zip outputs labels
+            n = genericLength pairs
+
+        in sum (map set pairs) / n
        where
+          set (os, ls) = (-1 / genericLength os) * sum (zipWith (curry f) os ls)
          f (a, y) = y * log (max 1e-10 a) + (1 - y) * log (max (1 - a) 1e-10)

      train :: Input
@ -138,35 +199,137 @@ module Sibe
                o = fn y
                (n', delta) = run o n

-                de = delta * fn' y -- quadratic cost
+                de = delta * fn' y

-                biases'  = biases  - scale alpha de
-                weights' = weights - scale alpha (input `outer` de)
+                biases'  = biases  - cmap (*alpha) de
+                weights' = weights - cmap (*alpha) (input `outer` de)
                layer = Layer biases' weights' (fn, fn')

                pass = weights #> de
                -- pass = weights #> de
            in (layer :- n', pass)

-      session :: [Input] -> Network -> [Output] -> Double -> (Int, Int) -> Network
-      session inputs network labels alpha (iterations, epochs) =
-        let n = length inputs
-            indexes = shuffle n (map (`mod` n) [0..n * epochs])
-        in foldl' iter network indexes
+      {-trainMomentum :: Input
+                    -> Network
+                    -> Output -- target
+                    -> Double -- learning rate
+                    -> (Double, Double) -- momentum
+                    -> Network -- network's output
+      trainMomentum input network target alpha (m, v) = fst $ run input network
        where
-          iter net i =
-            let n = length inputs
-                index = i `mod` n
-                input = inputs !! index
-                label = labels !! index
-            in foldl' (\net _ -> train input net label alpha) net [0..iterations]
+          run :: Input -> Network -> (Network, Vector Double)
+          run input (O l@(Layer biases weights (fn, fn'))) =
+            let y = runLayer input l
+                o = fn y
+                delta = o - target
+                de = delta * fn' y
+                v = 
+                -- de = delta -- cross entropy cost

-      shuffle :: Seed -> [a] -> [a]
-      shuffle seed list =
-        let ords = map ord $ take (length list) (randomRs (0, 1) (mkStdGen seed) :: [Int])
-        in map snd $ sortBy (\x y -> fst x) (zip ords list)
-        where ord x | x == 0 = LT
-                    | x == 1 = GT
+                biases'  = biases  - scale alpha de
+                weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
+                layer    = Layer biases' weights' (fn, fn') -- updated layer
+
+                pass = weights #> de
+                -- pass = weights #> de
+
+            in (O layer, pass)
+          run input (l@(Layer biases weights (fn, fn')) :- n) =
+            let y = runLayer input l
+                o = fn y
+                (n', delta) = run o n
+
+                de = delta * fn' y
+
+                biases'  = biases  - cmap (*alpha) de
+                weights' = weights - cmap (*alpha) (input `outer` de)
+                layer = Layer biases' weights' (fn, fn')
+
+                pass = weights #> de
+                -- pass = weights #> de
+            in (layer :- n', pass)-}
+
+      gd :: Session -> IO Session
+      gd session = do
+        seed <- newStdGen
+
+        let pairs = training session
+            alpha = learningRate session
+            net = network session
+
+        let n = length pairs
+
+        shuffled <- shuffleM pairs
+
+        let newnet = foldl' (\n (input, label) -> train input n label alpha) net pairs
+
+        return session { network = newnet
+                       , epoch = epoch session + 1
+                       }
+
+      sgd :: Session -> IO Session
+      sgd session = do
+        seed <- newStdGen
+
+        let pairs = training session
+            bsize = batchSize session
+            alpha = learningRate session
+            net = network session
+
+        let n = length pairs
+            iterations = n `div` bsize - 1
+
+        shuffled <- shuffleM pairs
+
+        let iter net i =
+              let n = length pairs
+                  batch = take bsize . drop (i * bsize) $ shuffled
+                  batchInputs = map fst batch
+                  batchLabels = map snd batch
+                  batchPair = zip batchInputs batchLabels
+              in foldl' (\n (input, label) -> train input n label alpha) net batchPair
+
+        let newnet = foldl' iter net [0..iterations]
+            cost = crossEntropy (session { network = newnet })
+
+        let el = map (\(e, l, _) -> (e, l)) (chart session)
+            ea = map (\(e, _, a) -> (e, a)) (chart session)
+
+        putStrLn $ (show $ epoch session) ++ " => " ++ (show cost) ++ " @ " ++ (show $ learningRate session)
+
+        toFile Chart.def "sgd.png" $ do
+          Chart.layoutlr_title Chart..= "loss over time"
+          Chart.plotLeft (Chart.line "loss" [el])
+          Chart.plotRight (Chart.line "learningRate" [ea])
+
+        return session { network = newnet
+                       , epoch = epoch session + 1
+                       , chart = (epoch session, cost, learningRate session):chart session
+                       }
+
+
+      accuracy :: Session -> Double
+      accuracy session = 
+        let inputs = map fst (test session)
+            labels = map snd (test session)
+
+            results = map (`forward` session) inputs
+            rounded = map (map round . toList) results
+
+            equals = zipWith (==) rounded (map (map round . toList) labels)
+        in genericLength (filter (== True) equals) / genericLength inputs
+
+      learningRateDecay :: (Double, Double) -> Session -> Session
+      learningRateDecay (step, m) session =
+        session { learningRate = max m $ learningRate session / step }
+
+      run :: (Session -> IO Session)
+          ->  Session -> IO Session
+      run fn session = foldM (\s i -> fn s) session [0..epochs session]
+
+      factorial :: Int -> Int
+      factorial 0 = 1
+      factorial x = x * factorial (x - 1)

      genSeed :: IO Seed
      genSeed = do
@ -176,12 +339,7 @@ module Sibe
      replaceVector :: Vector Double -> Int -> Double -> Vector Double
      replaceVector vec index value =
        let list = toList vec
-        in fromList $ rrow index list
-        where
-          rrow index [] = []
-          rrow index (x:xs)
-            | index == index = value:xs
-            | otherwise = x : rrow (index + 1) xs
+        in fromList $ take index list ++ value : drop (index + 1) list

      clip :: Double -> (Double, Double) -> Double
      clip x (l, u) = min u (max l x)
--- a/src/Sibe/LogisticRegression.hs
+++ b/src/Sibe/LogisticRegression.hs
--- a/src/Sibe/NLP.hs
+++ b/src/Sibe/NLP.hs
@ -0,0 +1,129 @@
+module Sibe.NLP
+  (Class,
+   Document(..),
+   ordNub,
+   accuracy,
+   recall,
+   precision,
+   fmeasure,
+   cleanText,
+   cleanDocuments,
+   removeWords,
+   removeStopwords,
+   ngram,
+   ngramText,
+  )
+  where
+    import Data.List
+    import Debug.Trace
+    import qualified Data.Set as Set
+    import Data.List.Split
+    import Data.Maybe
+    import Control.Arrow ((&&&))
+    import Text.Regex.PCRE
+    import Data.Char (isSpace, isNumber, toLower)
+    import NLP.Stemmer
+
+    type Class = Int;
+
+    data Document = Document { text :: String
+                             , c    :: Class
+                             } deriving (Eq, Show, Read)
+
+
+    cleanText :: String -> String
+    cleanText string =
+      let puncs = filter (`notElem` ['!', '"', '#', '$', '%', '(', ')', '.', '?']) (trim string)
+          spacify = foldl (\acc x -> replace x ' ' acc) puncs [',', '/', '-', '\n', '\r']
+          stemmed = unwords $ map (stem Porter) (words spacify)
+          nonumber = filter (not . isNumber) stemmed
+          lower = map toLower nonumber
+      in (unwords . words) lower -- remove unnecessary spaces
+      where
+        trim = f . f
+          where
+            f = reverse . dropWhile isSpace
+        replace needle replacement =
+          map (\c -> if c == needle then replacement else c)
+
+    cleanDocuments :: [Document] -> [Document]
+    cleanDocuments documents =
+      let cleaned = map (\(Document text c) -> Document (cleanText text) c) documents
+      in cleaned
+
+    removeWords :: [String] -> [Document] -> [Document]
+    removeWords ws documents =
+      map (\(Document text c) -> Document (rm ws text) c) documents
+      where
+          rm list text =
+            unwords $ filter (`notElem` list) (words text)
+
+    removeStopwords :: Int -> [Document] -> [Document]
+    removeStopwords i documents =
+      let wc = wordCounts (concatDocs documents)
+          wlist = sortBy (\(_, a) (_, b) -> b `compare` a) wc
+          stopwords = map fst (take i wlist)
+      in removeWords stopwords documents
+      where
+        vocabulary x = ordNub (words x)
+        countWordInDoc d w = genericLength (filter (==w) d)
+        wordCounts x =
+          let voc = vocabulary x
+          in zip voc $ map (countWordInDoc (words x)) voc
+
+        concatDocs = concatMap (\(Document text _) -> text ++ " ")
+
+    ordNub :: (Ord a) => [a] -> [a]
+    ordNub = go Set.empty
+      where
+        go _ [] = []
+        go s (x:xs) = if x `Set.member` s then go s xs
+                                          else x : go (Set.insert x s) xs
+
+    accuracy :: [(Int, (Int, Double))] -> Double
+    accuracy results =
+      let pairs = map (\(a, b) -> (a, fst b)) results
+          correct = filter (uncurry (==)) pairs
+      in genericLength correct / genericLength results
+
+    recall :: [(Int, (Int, Double))] -> Double
+    recall results =
+      let classes = ordNub (map fst results)
+          s = sum (map rec classes) / genericLength classes
+      in s
+      where
+        rec a =
+          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
+              y = genericLength $ filter (\(c, (r, _)) -> c == a) results
+          in t / y
+
+    precision :: [(Int, (Int, Double))] -> Double
+    precision results =
+      let classes = ordNub (map fst results)
+          s = sum (map prec classes) / genericLength classes
+      in s
+      where
+        prec a =
+          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
+              y = genericLength $ filter (\(c, (r, _)) -> r == a) results
+          in
+            if y == 0
+              then 0
+              else t / y
+
+    fmeasure :: [(Int, (Int, Double))] -> Double
+    fmeasure results =
+      let r = recall results
+          p = precision results
+      in (2 * p * r) / (p + r)
+
+    ngram :: Int -> [Document] -> [Document]
+    ngram n documents =
+      map (\(Document text c) -> Document (ngramText n text) c) documents
+
+    ngramText :: Int -> String -> String
+    ngramText n text =
+      let ws = words text
+          pairs = zip [0..] ws
+          grams = map (\(i, w) -> concat . intersperse "_" $ w:((take (n - 1) . drop (i+1)) ws)) pairs
+      in unwords ("<b>_":grams)
--- a/src/Sibe/NaiveBayes.hs
+++ b/src/Sibe/NaiveBayes.hs
@ -1,7 +1,7 @@
 module Sibe.NaiveBayes
  (Document(..),
   NB(..),
-   train,
+   initialize,
   run,
   session,
   ordNub,
@ -19,21 +19,13 @@ module Sibe.NaiveBayes
   removeStopwords,
  )
  where
+    import Sibe.NLP
    import Data.List
    import Debug.Trace
    import qualified Data.Set as Set
    import Data.List.Split
    import Data.Maybe
    import Control.Arrow ((&&&))
-    import Text.Regex.PCRE
-    import Data.Char (isSpace, isNumber, toLower)
-    import NLP.Stemmer
-
-    type Class = Int;
-
-    data Document = Document { text :: String
-                             , c    :: Class
-                             } deriving (Eq, Show, Read)

    data NB = NB { documents  :: [Document]
                 , classes    :: [(Class, Double)]
@ -44,8 +36,8 @@ module Sibe.NaiveBayes
                 , cgram      :: [(Class, [(String, Int)])]
                 } deriving (Eq, Show, Read)

-    train :: [Document] -> [Class] -> NB
-    train documents classes =
+    initialize :: [Document] -> [Class] -> NB
+    initialize documents classes =
      let megadoc = concatDocs documents
          vocabulary = genericLength ((ordNub . words) megadoc)
          -- (class, prior probability)
@ -83,17 +75,6 @@ module Sibe.NaiveBayes
        classWordsCounts x = wordsCount (classWords x) (classVocabulary x)
        classNGramCounts x = wordsCount (classNGramWords x) (ordNub $ classNGramWords x)

-    ngram :: Int -> [Document] -> [Document]
-    ngram n documents =
-      map (\(Document text c) -> Document (ngramText n text) c) documents
-
-    ngramText :: Int -> String -> String
-    ngramText n text =
-      let ws = words text
-          pairs = zip [0..] ws
-          grams = map (\(i, w) -> concat . intersperse "_" $ w:((take (n - 1) . drop (i+1)) ws)) pairs
-      in unwords ("<b>_":grams)
-
    session :: [Document] -> NB -> [(Class, (Class, Double))]
    session docs nb =
      let results = map (\(Document text c) -> (c, run text nb)) docs
@ -143,91 +124,5 @@ module Sibe.NaiveBayes
          variance = sum (map ((^2) . subtract avg) x) / (genericLength x - 1)
      in sqrt variance

-    cleanText :: String -> String
-    cleanText string =
-      let puncs = filter (`notElem` ['!', '"', '#', '$', '%', '(', ')', '.', '?']) (trim string)
-          spacify = foldl (\acc x -> replace x ' ' acc) puncs [',', '/', '-', '\n', '\r']
-          stemmed = unwords $ map (stem Porter) (words spacify)
-          nonumber = filter (not . isNumber) stemmed
-          lower = map toLower nonumber
-      in (unwords . words) lower -- remove unnecessary spaces
-      where
-        trim = f . f
-          where
-            f = reverse . dropWhile isSpace
-        replace needle replacement =
-          map (\c -> if c == needle then replacement else c)
-
-    cleanDocuments :: [Document] -> [Document]
-    cleanDocuments documents =
-      let cleaned = map (\(Document text c) -> Document (cleanText text) c) documents
-      in cleaned
-
-    removeWords :: [String] -> [Document] -> [Document]
-    removeWords ws documents =
-      map (\(Document text c) -> Document (rm ws text) c) documents
-      where
-          rm list text =
-            unwords $ filter (`notElem` list) (words text)
-
-    removeStopwords :: Int -> [Document] -> [Document]
-    removeStopwords i documents =
-      let wc = wordCounts (concatDocs documents)
-          wlist = sortBy (\(_, a) (_, b) -> b `compare` a) wc
-          stopwords = map fst (take i wlist)
-      in removeWords stopwords documents
-      where
-        vocabulary x = ordNub (words x)
-        countWordInDoc d w = genericLength (filter (==w) d)
-        wordCounts x =
-          let voc = vocabulary x
-          in zip voc $ map (countWordInDoc (words x)) voc
-
-        concatDocs = concatMap (\(Document text _) -> text ++ " ")
-
    l :: (Show a) => a -> a
    l a = trace (show a) a
-
-    ordNub :: (Ord a) => [a] -> [a]
-    ordNub = go Set.empty
-      where
-        go _ [] = []
-        go s (x:xs) = if x `Set.member` s then go s xs
-                                          else x : go (Set.insert x s) xs
-
-    accuracy :: [(Int, (Int, Double))] -> Double
-    accuracy results =
-      let pairs = map (\(a, b) -> (a, fst b)) results
-          correct = filter (uncurry (==)) pairs
-      in genericLength correct / genericLength results
-
-    recall :: [(Int, (Int, Double))] -> Double
-    recall results =
-      let classes = ordNub (map fst results)
-          s = sum (map rec classes) / genericLength classes
-      in s
-      where
-        rec a =
-          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
-              y = genericLength $ filter (\(c, (r, _)) -> c == a) results
-          in t / y
-
-    precision :: [(Int, (Int, Double))] -> Double
-    precision results =
-      let classes = ordNub (map fst results)
-          s = sum (map prec classes) / genericLength classes
-      in s
-      where
-        prec a =
-          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
-              y = genericLength $ filter (\(c, (r, _)) -> r == a) results
-          in
-            if y == 0
-              then 0
-              else t / y
-
-    fmeasure :: [(Int, (Int, Double))] -> Double
-    fmeasure results =
-      let r = recall results
-          p = precision results
-      in (2 * p * r) / (p + r)