diff --git a/.gitmodules b/.gitmodules
index ff2a935..17f4304 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "examples/doc-classifier-data"]
 	path = examples/doc-classifier-data
 	url = git@github.com:mdibaiee/doc-classifier-data
+[submodule "examples/notMNIST"]
+	path = examples/notMNIST
+	url = git@github.com:mdibaiee/notMNIST
diff --git a/examples/424encoder.hs b/examples/424encoder.hs
new file mode 100644
index 0000000..4b93941
--- /dev/null
+++ b/examples/424encoder.hs
@@ -0,0 +1,49 @@
+module Main where
+  import Sibe
+  import Numeric.LinearAlgebra
+  import Data.List
+  import Debug.Trace
+  import Data.Default.Class
+
+  main = do
+    let alpha = 0.5
+        epochs = 1000
+        a = (sigmoid, sigmoid')
+        rnetwork = randomNetwork 0 (-0.1, 0.1) 4 [(2, a)] (4, a)
+
+        inputs = [vector [1, 0, 0, 0],
+                  vector [0, 1, 0, 0],
+                  vector [0, 0, 1, 0],
+                  vector [0, 0, 0, 1]]
+
+        labels = [vector [1, 0, 0, 0],
+                  vector [0, 1, 0, 0],
+                  vector [0, 0, 1, 0],
+                  vector [0, 0, 0, 1]]
+
+        session = def { network = rnetwork
+                      , learningRate = 0.5
+                      , epochs = 1000
+                      , training = zip inputs labels
+                      , test = zip inputs labels
+                      } :: Session
+
+    let initialCost = crossEntropy session
+
+    newsession <- run gd session
+
+    let results = map (`forward` newsession) inputs
+        rounded = map (map round . toList) results
+
+        cost = crossEntropy newsession
+
+    putStrLn "parameters: "
+    putStrLn $ "- inputs: " ++ show inputs
+    putStrLn $ "- labels: " ++ show labels
+    putStrLn $ "- learning rate: " ++ show alpha
+    putStrLn $ "- epochs: " ++ show epochs
+    putStrLn $ "- initial cost (cross-entropy): " ++ show initialCost
+    putStrLn "results: "
+    putStrLn $ "- actual result: " ++ show results
+    putStrLn $ "- rounded result: " ++ show rounded
+    putStrLn $ "- cost (cross-entropy): " ++ show cost
diff --git a/examples/naivebayes-doc-classifier.hs b/examples/naivebayes-doc-classifier.hs
index f0d4985..0e7581e 100644
--- a/examples/naivebayes-doc-classifier.hs
+++ b/examples/naivebayes-doc-classifier.hs
@@ -1,6 +1,7 @@
 module Main
   where
     -- import Sibe
+    import Sibe.NLP
     import Sibe.NaiveBayes
     import Text.Printf
     import Data.List
@@ -28,14 +29,14 @@ module Main
           documents = cleanDocuments . removeWords sws $ createDocuments classes dataset
           testDocuments = cleanDocuments $ createDocuments classes test
 
-          nb = train documents intClasses
+          nb = initialize documents intClasses
 
           -- top-ten
           topClasses = take 10 . reverse $ sortBy (compare `on` (length . snd)) (cd nb)
           filtered = map (\(c, ds) -> (c, take 100 ds)) topClasses
           filteredClasses = map fst filtered
           ttDocs = concatMap snd filtered
-          ttNB = train ttDocs filteredClasses
+          ttNB = initialize ttDocs filteredClasses
 
           ttTestDocuments = filter ((`elem` filteredClasses) . c) . cleanDocuments $ createDocuments classes test
 
diff --git a/examples/naivebayes-sentiment-analysis.hs b/examples/naivebayes-sentiment-analysis.hs
deleted file mode 100644
index b97f83d..0000000
--- a/examples/naivebayes-sentiment-analysis.hs
+++ /dev/null
@@ -1,54 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-module Main
-  where
-    -- import Sibe
-    import Sibe.NaiveBayes
-    import Text.Printf
-    import Data.List
-    import Data.Maybe
-    import Debug.Trace
-    import Data.List.Split
-    import System.Directory
-    import Control.DeepSeq
-    import System.IO
-
-    main = do
-      putStr "Reading documents... "
-      neg_documents <- createDocuments "examples/sentiment-analysis-data/train/neg/"
-      pos_documents <- createDocuments "examples/sentiment-analysis-data/train/pos/"
-
-      test_neg_documents  <- createDocuments "examples/sentiment-analysis-data/test/neg/"
-      test_pos_documents <- createDocuments "examples/sentiment-analysis-data/test/pos/"
-      putStrLn "done"
-
-      let classes = [0..9] -- rating, from 0 to 9 (1 to 10)
-          documents = neg_documents ++ pos_documents
-          nb = train documents classes
-
-          testDocuments = neg_documents ++ pos_documents
-
-          results = map (\(Document text c) -> (c, run text nb)) testDocuments
-          -- results = map (\(Document text c) -> (c, determine text nb intClasses documents)) devTestDocuments
-      print results
-
-      -- let showResults (c, r) = putStrLn (show (classes !! c) ++ " ~ " ++ show (classes !! r))
-      -- mapM_ showResults results
-      --
-      -- putStrLn $ "Recall: " ++ show (recall results)
-      -- putStrLn $ "Precision: " ++ show (precision results)
-      -- putStrLn $ "F Measure: " ++ show (fmeasure results)
-      -- putStrLn $ "Accuracy: " ++ show (accuracy results)
-
-    createDocuments :: FilePath -> IO [Document]
-    createDocuments path = do
-      files <- drop 2 <$> getDirectoryContents path
-      let ratings = map (subtract 1 . read . take 1 . last . splitOn "_") files :: [Int]
-      contents <- mapM (forceReadFile . (path ++)) files
-      return $ zipWith Document contents ratings
-
-    forceReadFile :: FilePath -> IO String
-    forceReadFile file = do
-      handle <- openFile file ReadMode
-      content <- hGetContents handle
-      content `deepseq` hClose handle
-      return content
diff --git a/examples/notMNIST b/examples/notMNIST
new file mode 160000
index 0000000..0dbdfd4
--- /dev/null
+++ b/examples/notMNIST
@@ -0,0 +1 @@
+Subproject commit 0dbdfd43ffb8e90a3657ed040fd1fb3d25654b51
diff --git a/examples/notmnist.hs b/examples/notmnist.hs
new file mode 100644
index 0000000..d913377
--- /dev/null
+++ b/examples/notmnist.hs
@@ -0,0 +1,111 @@
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module Main where
+  import Sibe
+  import Numeric.LinearAlgebra
+  import Data.List
+  import Debug.Trace
+  import System.IO
+  import System.Directory
+  import Codec.Picture
+  import Codec.Picture.Types
+  import qualified Data.Vector.Storable as V
+  import Data.Either
+  import System.Random
+  import System.Random.Shuffle
+  import Data.Default.Class
+
+  import qualified Graphics.Rendering.Chart.Easy as Chart
+  import Graphics.Rendering.Chart.Backend.Cairo
+
+  main = do
+    setStdGen (mkStdGen 100)
+
+    let a         = (sigmoid, sigmoid')
+        o         = (softmax, one)
+        rnetwork  = randomNetwork 0 (-1, 1) (28*28) [(100, a)] (10, a)
+
+    (inputs, labels) <- dataset
+
+    let trp      = length inputs * 70 `div` 100
+        tep      = length inputs * 30 `div` 100
+
+        -- training data
+        trinputs = take trp inputs
+        trlabels = take trp labels
+
+        -- test data
+        teinputs = take tep . drop trp $ inputs
+        telabels = take tep . drop trp $ labels
+
+    let session = def { learningRate = 0.5
+                      , batchSize = 32
+                      , epochs = 35
+                      , network = rnetwork
+                      , training = zip trinputs trlabels
+                      , test = zip teinputs telabels
+                      } :: Session
+
+    let initialCost = crossEntropy session
+
+    newsession <- run (sgd . learningRateDecay (1.1, 5e-2)) session
+
+    let el = map (\(e, l, _) -> (e, l)) (chart newsession)
+        ea = map (\(e, _, a) -> (e, a)) (chart newsession)
+    toFile Chart.def "notmnist.png" $ do
+      Chart.layoutlr_title Chart..= "loss over time"
+      Chart.plotLeft (Chart.line "loss" [el])
+      Chart.plotRight (Chart.line "learningRate" [ea])
+
+
+    let cost = crossEntropy newsession
+
+    putStrLn "parameters: "
+    putStrLn $ "- batch size: " ++ show (batchSize session)
+    putStrLn $ "- learning rate: " ++ show (learningRate session)
+    putStrLn $ "- epochs: " ++ show (epochs session)
+    putStrLn $ "- initial cost (cross-entropy): " ++ show initialCost
+    putStrLn "results: "
+    putStrLn $ "- accuracy: " ++ show (accuracy newsession)
+    putStrLn $ "- cost (cross-entropy): " ++ show cost
+
+  dataset :: IO ([Vector Double], [Vector Double])
+  dataset = do
+    let dir = "examples/notMNIST/"
+    
+    groups <- filter ((/= '.') . head) <$> listDirectory dir
+
+    inputFiles <- mapM (listDirectory . (dir ++)) groups
+
+    let n = 512 {-- minimum (map length inputFiles) --}
+        numbers = map (`div` n) [0..n * length groups - 1]
+        inputFilesFull = map (\(i, g) -> map ((dir ++ i ++ "/") ++) g) (zip groups inputFiles)
+
+
+    inputImages <- mapM (mapM readImage . take n) inputFilesFull
+
+    let names = map (take n) inputFilesFull
+
+    let (l, r) = partitionEithers $ concat inputImages
+        inputs = map (fromPixels . convertRGB8) r
+        labels = map (\i -> V.replicate i 0 `V.snoc` 1 V.++ V.replicate (9 - i) 0) numbers
+
+        pairs  = zip inputs labels
+
+    shuffled <- shuffleM pairs
+    return (map fst shuffled, map snd shuffled)
+
+    where
+      fromPixels :: Image PixelRGB8 -> Vector Double
+      fromPixels img@Image { .. } =
+        let pairs = [(x, y) | x <- [0..imageWidth - 1], y <- [0..imageHeight - 1]]
+        in V.fromList $ map iter pairs
+        where
+          iter (x, y) =
+            let (PixelRGB8 r g b) = convertPixel $ pixelAt img x y
+            in
+              if r == 0 && g == 0 && b == 0 then 0 else 1
+
+    
diff --git a/examples/recurrent-doc-classifier.hs b/examples/recurrent-doc-classifier.hs
new file mode 100644
index 0000000..e69de29
diff --git a/examples/sentiment-analysis-data b/examples/sentiment-analysis-data
deleted file mode 100755
index f0a3f50..0000000
--- a/examples/sentiment-analysis-data
+++ /dev/null
@@ -1,5 +0,0 @@
-XSym
-0040
-3666c4cacaf995ebd11ef25aab70de99
-../../sibe-repos/sentiment-analysis-data
-                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
\ No newline at end of file
diff --git a/examples/sin.hs b/examples/sin.hs
new file mode 100644
index 0000000..8dc2616
--- /dev/null
+++ b/examples/sin.hs
@@ -0,0 +1,35 @@
+module Main where
+  import Sibe
+  import Numeric.LinearAlgebra
+  import Data.List
+  import Debug.Trace
+
+  main = do
+    let alpha = 0.5
+        epochs = 1000
+        a = (sigmoid, sigmoid')
+        lo = (sigmoid, (\_ -> 1)) -- cross entropy
+        -- a = (relu, relu')
+        rnetwork = randomNetwork 0 (-1, 1) 1 [(50, a)] (1, lo)
+
+        inputs = map (\a -> vector [a]) (reverse [0, 30, 45, 60, 90])
+        labels = map (\deg -> vector $ [sin $ deg * pi/180]) (reverse [0, 30, 45, 60, 90])
+
+        initial_cost = zipWith crossEntropy (map (`forward` rnetwork) inputs) labels
+
+    network <- run session inputs rnetwork labels alpha epochs
+
+    let results = map (`forward` network) inputs
+        rounded = map (map round . toList) results
+
+        cost = zipWith crossEntropy (map (`forward` network) inputs) labels
+
+    putStrLn "parameters: "
+    putStrLn $ "- inputs: " ++ show inputs
+    putStrLn $ "- labels: " ++ show labels
+    putStrLn $ "- learning rate: " ++ show alpha
+    putStrLn $ "- epochs: " ++ show epochs
+    {-putStrLn $ "- initial cost (cross-entropy): " ++ show initial_cost-}
+    putStrLn "results: "
+    putStrLn $ "- actual result: " ++ show results
+    {-putStrLn $ "- cost (cross-entropy): " ++ show cost-}
diff --git a/examples/xor.hs b/examples/xor.hs
index 5ed8d27..92eedcf 100644
--- a/examples/xor.hs
+++ b/examples/xor.hs
@@ -3,30 +3,37 @@ module Main where
   import Numeric.LinearAlgebra
   import Data.List
   import Debug.Trace
+  import Data.Default.Class
 
   main = do
-    let learning_rate = 0.5
-        (iterations, epochs) = (2, 1000)
-        a = (sigmoid, sigmoid')
-        rnetwork = randomNetwork 0 2 [(8, a)] (1, a) -- two inputs, 8 nodes in a single hidden layer, 1 output
+    let a = (sigmoid, sigmoid')
+        rnetwork = randomNetwork 0 (-1, 1) 2 [(2, a)] (1, a) -- two inputs, 8 nodes in a single hidden layer, 1 output
 
         inputs = [vector [0, 1], vector [1, 0], vector [1, 1], vector [0, 0]]
         labels = [vector [1], vector [1], vector [0], vector [0]]
 
-        initial_cost = zipWith crossEntropy (map (`forward` rnetwork) inputs) labels
+        session = def { network = rnetwork
+                      , learningRate = 0.5
+                      , epochs = 1000
+                      , training = zip inputs labels
+                      , test = zip inputs labels
+                      } :: Session
 
-        network = session inputs rnetwork labels learning_rate (iterations, epochs)
-        results = map (`forward` network) inputs
+        initialCost = crossEntropy session
+
+    newsession <- run gd session
+
+    let results = map (`forward` newsession) inputs
         rounded = map (map round . toList) results
 
-        cost = zipWith crossEntropy (map (`forward` network) inputs) labels
+        cost = crossEntropy newsession
 
     putStrLn "parameters: "
     putStrLn $ "- inputs: " ++ show inputs
     putStrLn $ "- labels: " ++ show labels
-    putStrLn $ "- learning rate: " ++ show learning_rate
-    putStrLn $ "- iterations/epochs: " ++ show (iterations, epochs)
-    putStrLn $ "- initial cost (cross-entropy): " ++ show initial_cost
+    putStrLn $ "- learning rate: " ++ show (learningRate session)
+    putStrLn $ "- epochs: " ++ show (epochs session)
+    putStrLn $ "- initial cost (cross-entropy): " ++ show initialCost
     putStrLn "results: "
     putStrLn $ "- actual result: " ++ show results
     putStrLn $ "- rounded result: " ++ show rounded
diff --git a/notmnist-0.png b/notmnist-0.png
new file mode 100644
index 0000000..1bc6967
Binary files /dev/null and b/notmnist-0.png differ
diff --git a/notmnist-1.png b/notmnist-1.png
new file mode 100644
index 0000000..a6aa761
Binary files /dev/null and b/notmnist-1.png differ
diff --git a/notmnist-2.png b/notmnist-2.png
new file mode 100644
index 0000000..c3252d9
Binary files /dev/null and b/notmnist-2.png differ
diff --git a/notmnist.png b/notmnist.png
new file mode 100644
index 0000000..867fd5f
Binary files /dev/null and b/notmnist.png differ
diff --git a/sgd.png b/sgd.png
new file mode 100644
index 0000000..1c0c683
Binary files /dev/null and b/sgd.png differ
diff --git a/sibe.cabal b/sibe.cabal
index d401501..876c55b 100644
--- a/sibe.cabal
+++ b/sibe.cabal
@@ -15,7 +15,7 @@ cabal-version:       >=1.10
 
 library
   hs-source-dirs:      src
-  exposed-modules:     Sibe, Sibe.NaiveBayes
+  exposed-modules:     Sibe, Sibe.NaiveBayes, Sibe.NLP
   build-depends:       base >= 4.7 && < 5
                      , hmatrix
                      , random
@@ -26,16 +26,21 @@ library
                      , regex-pcre
                      , text
                      , stemmer
+                     , vector
+                     , random-shuffle
+                     , data-default-class
+                     , Chart
+                     , Chart-cairo
   default-language:    Haskell2010
 
-executable sibe-exe
-  hs-source-dirs:      app
-  main-is:             Main.hs
-  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
-  build-depends:       base
-                     , sibe
-                     , hmatrix
-  default-language:    Haskell2010
+--executable sibe-exe
+  --hs-source-dirs:      app
+  --main-is:             Main.hs
+  --ghc-options:         -threaded -rtsopts -with-rtsopts=-N
+  --build-depends:       base
+                     --, sibe
+                     --, hmatrix
+  --default-language:    Haskell2010
 
 executable example-xor
   hs-source-dirs:      examples
@@ -44,6 +49,43 @@ executable example-xor
   build-depends:       base
                      , sibe
                      , hmatrix
+                     , data-default-class
+  default-language:    Haskell2010
+
+--executable example-sin
+  --hs-source-dirs:      examples
+  --main-is:             sin.hs
+  --ghc-options:         -threaded -rtsopts -with-rtsopts=-N
+  --build-depends:       base
+                     --, sibe
+                     --, hmatrix
+  --default-language:    Haskell2010
+
+executable example-424
+  hs-source-dirs:      examples
+  main-is:             424encoder.hs
+  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
+  build-depends:       base
+                     , sibe
+                     , hmatrix
+                     , data-default-class
+  default-language:    Haskell2010
+
+executable example-notmnist
+  hs-source-dirs:      examples
+  main-is:             notmnist.hs
+  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
+  build-depends:       base
+                     , sibe
+                     , hmatrix
+                     , directory >= 1.2.5.0
+                     , JuicyPixels == 3.2.7.2
+                     , vector == 0.11.0.0
+                     , random
+                     , random-shuffle
+                     , data-default-class
+                     , Chart
+                     , Chart-cairo
   default-language:    Haskell2010
 
 executable example-naivebayes-doc-classifier
@@ -57,19 +99,6 @@ executable example-naivebayes-doc-classifier
                      , split
   default-language:    Haskell2010
 
-executable example-naivebayes-sentiment-analysis
-  hs-source-dirs:      examples
-  main-is:             naivebayes-sentiment-analysis.hs
-  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
-  build-depends:       base
-                     , sibe
-                     , hmatrix
-                     , containers
-                     , split
-                     , directory
-                     , deepseq
-  default-language:    Haskell2010
-
 test-suite sibe-test
   type:                exitcode-stdio-1.0
   hs-source-dirs:      test
diff --git a/src/Sibe.hs b/src/Sibe.hs
index 79d4f08..46a0673 100644
--- a/src/Sibe.hs
+++ b/src/Sibe.hs
@@ -15,22 +15,37 @@ module Sibe
      saveNetwork,
      loadNetwork,
      train,
-     session,
-     shuffle,
+     gd,
+     sgd,
+     run,
      sigmoid,
      sigmoid',
+     softmax,
+     softmax',
+     one,
      relu,
      relu',
      crossEntropy,
      genSeed,
-     replaceVector
+     replaceVector,
+     Session(..),
+     accuracy,
+     learningRateDecay
     ) where
       import Numeric.LinearAlgebra
       import System.Random
+      import System.Random.Shuffle
       import Debug.Trace
-      import Data.List (foldl', sortBy)
+      import Data.List (foldl', sortBy, genericLength, permutations)
       import System.IO
       import Control.DeepSeq
+      import Control.Monad
+      import qualified Data.Vector.Storable as V
+      import Data.Default.Class
+      import System.Exit
+
+      import qualified Graphics.Rendering.Chart.Easy as Chart
+      import Graphics.Rendering.Chart.Backend.Cairo
 
       type LearningRate = Double
       type Input = Vector Double
@@ -48,8 +63,33 @@ module Sibe
       data Network = O Layer
                    | Layer :- Network
                    deriving (Show)
+                   
       infixr 5 :-
 
+      data Session = Session  { network      :: Network
+                              , training     :: [(Vector Double, Vector Double)]
+                              , test         :: [(Vector Double, Vector Double)]
+                              , learningRate :: Double
+                              , epochs       :: Int
+                              , epoch        :: Int
+                              , batchSize    :: Int
+                              , chart        :: [(Int, Double, Double)]
+                              , momentum     :: Double
+                              }
+
+      emptyNetwork = randomNetwork 0 (0, 0) 0 [] (0, (id, id))
+      instance Default Session where
+        def = Session { network      = seq (die "You have not specified a network parameter") emptyNetwork
+                      , training     = seq (die "You have not specified training data") []
+                      , test         = seq (die "You have not specified test data") []
+                      , learningRate = 0.5
+                      , epochs       = 35
+                      , epoch        = 0
+                      , batchSize    = 0
+                      , chart        = []
+                      , momentum     = 0
+                      }
+
       saveNetwork :: Network -> String -> IO ()
       saveNetwork network file =
         writeFile file ((show . reverse) (gen network []))
@@ -73,22 +113,24 @@ module Sibe
       runLayer :: Input -> Layer -> Output
       runLayer input (Layer !biases !weights _) = input <# weights + biases
 
-      forward :: Input -> Network -> Output
-      forward input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l
-      forward input (l@(Layer _ _ (fn, _)) :- n) = forward ((fst . activation $ l) $ runLayer input l) n
+      forward :: Input -> Session -> Output
+      forward input session = compute input (network session)
+        where
+          compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l
+          compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer input l) n
 
-      randomLayer :: Seed -> (Int, Int) -> Activation -> Layer
-      randomLayer seed (wr, wc) =
-        let weights = uniformSample seed wr $ replicate wc (-1, 1)
-            biases  = randomVector seed Uniform wc * 2 - 1
+      randomLayer :: Seed -> (Int, Int) -> (Double, Double) -> Activation -> Layer
+      randomLayer seed (wr, wc) (l, u) =
+        let weights = uniformSample seed wr $ replicate wc (l, u)
+            biases  = randomVector seed Uniform wc * realToFrac u - realToFrac l
         in Layer biases weights
 
-      randomNetwork :: Seed -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
-      randomNetwork seed input [] (output, a) =
-        O $ randomLayer seed (input, output) a
-      randomNetwork seed input ((h, a):hs) output =
-        randomLayer seed (input, h) a :-
-        randomNetwork (seed + 1) h hs output
+      randomNetwork :: Seed -> (Double, Double) -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
+      randomNetwork seed bound input [] (output, a) =
+        O $ randomLayer seed (input, output) bound a
+      randomNetwork seed bound input ((h, a):hs) output =
+        randomLayer seed (input, h) bound a :-
+        randomNetwork (seed + 1) bound h hs output
 
       sigmoid :: Vector Double -> Vector Double
       sigmoid x = 1 / max (1 + exp (-x)) 1e-10
@@ -96,18 +138,37 @@ module Sibe
       sigmoid' :: Vector Double -> Vector Double
       sigmoid' x = sigmoid x * (1 - sigmoid x)
 
+      softmax :: Vector Double -> Vector Double
+      softmax x = cmap (\a -> exp a / s) x
+        where
+          s = V.sum $ exp x
+
+      one :: a -> Double
+      one x = 1
+
+      softmax' :: Vector Double -> Vector Double
+      softmax' x = softmax x * (1 - softmax x)
+
       relu :: Vector Double -> Vector Double
-      relu x = log (max (1 + exp x) 1e-10)
+      relu = cmap (max 0.1)
 
       relu' :: Vector Double -> Vector Double
-      relu' = sigmoid
+      relu' = cmap dev
+        where dev x
+                | x < 0 = 0
+                | otherwise = 1
 
-      crossEntropy :: Output -> Output -> Double
-      crossEntropy output target =
-        let pairs = zip (toList output) (toList target)
-            n = fromIntegral (length pairs)
-        in (-1 / n) * sum (map f pairs)
+      crossEntropy :: Session -> Double
+      crossEntropy session =
+        let inputs = map fst (test session)
+            labels = map (toList . snd) (test session)
+            outputs = map (toList . (`forward` session)) inputs
+            pairs = zip outputs labels
+            n = genericLength pairs
+
+        in sum (map set pairs) / n
         where
+          set (os, ls) = (-1 / genericLength os) * sum (zipWith (curry f) os ls)
           f (a, y) = y * log (max 1e-10 a) + (1 - y) * log (max (1 - a) 1e-10)
 
       train :: Input
@@ -138,35 +199,137 @@ module Sibe
                 o = fn y
                 (n', delta) = run o n
 
-                de = delta * fn' y -- quadratic cost
+                de = delta * fn' y
 
-                biases'  = biases  - scale alpha de
-                weights' = weights - scale alpha (input `outer` de)
+                biases'  = biases  - cmap (*alpha) de
+                weights' = weights - cmap (*alpha) (input `outer` de)
                 layer = Layer biases' weights' (fn, fn')
 
                 pass = weights #> de
                 -- pass = weights #> de
             in (layer :- n', pass)
 
-      session :: [Input] -> Network -> [Output] -> Double -> (Int, Int) -> Network
-      session inputs network labels alpha (iterations, epochs) =
-        let n = length inputs
-            indexes = shuffle n (map (`mod` n) [0..n * epochs])
-        in foldl' iter network indexes
+      {-trainMomentum :: Input
+                    -> Network
+                    -> Output -- target
+                    -> Double -- learning rate
+                    -> (Double, Double) -- momentum
+                    -> Network -- network's output
+      trainMomentum input network target alpha (m, v) = fst $ run input network
         where
-          iter net i =
-            let n = length inputs
-                index = i `mod` n
-                input = inputs !! index
-                label = labels !! index
-            in foldl' (\net _ -> train input net label alpha) net [0..iterations]
+          run :: Input -> Network -> (Network, Vector Double)
+          run input (O l@(Layer biases weights (fn, fn'))) =
+            let y = runLayer input l
+                o = fn y
+                delta = o - target
+                de = delta * fn' y
+                v = 
+                -- de = delta -- cross entropy cost
 
-      shuffle :: Seed -> [a] -> [a]
-      shuffle seed list =
-        let ords = map ord $ take (length list) (randomRs (0, 1) (mkStdGen seed) :: [Int])
-        in map snd $ sortBy (\x y -> fst x) (zip ords list)
-        where ord x | x == 0 = LT
-                    | x == 1 = GT
+                biases'  = biases  - scale alpha de
+                weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
+                layer    = Layer biases' weights' (fn, fn') -- updated layer
+
+                pass = weights #> de
+                -- pass = weights #> de
+
+            in (O layer, pass)
+          run input (l@(Layer biases weights (fn, fn')) :- n) =
+            let y = runLayer input l
+                o = fn y
+                (n', delta) = run o n
+
+                de = delta * fn' y
+
+                biases'  = biases  - cmap (*alpha) de
+                weights' = weights - cmap (*alpha) (input `outer` de)
+                layer = Layer biases' weights' (fn, fn')
+
+                pass = weights #> de
+                -- pass = weights #> de
+            in (layer :- n', pass)-}
+
+      gd :: Session -> IO Session
+      gd session = do
+        seed <- newStdGen
+
+        let pairs = training session
+            alpha = learningRate session
+            net = network session
+
+        let n = length pairs
+
+        shuffled <- shuffleM pairs
+
+        let newnet = foldl' (\n (input, label) -> train input n label alpha) net pairs
+
+        return session { network = newnet
+                       , epoch = epoch session + 1
+                       }
+
+      sgd :: Session -> IO Session
+      sgd session = do
+        seed <- newStdGen
+
+        let pairs = training session
+            bsize = batchSize session
+            alpha = learningRate session
+            net = network session
+
+        let n = length pairs
+            iterations = n `div` bsize - 1
+
+        shuffled <- shuffleM pairs
+
+        let iter net i =
+              let n = length pairs
+                  batch = take bsize . drop (i * bsize) $ shuffled
+                  batchInputs = map fst batch
+                  batchLabels = map snd batch
+                  batchPair = zip batchInputs batchLabels
+              in foldl' (\n (input, label) -> train input n label alpha) net batchPair
+
+        let newnet = foldl' iter net [0..iterations]
+            cost = crossEntropy (session { network = newnet })
+
+        let el = map (\(e, l, _) -> (e, l)) (chart session)
+            ea = map (\(e, _, a) -> (e, a)) (chart session)
+
+        putStrLn $ (show $ epoch session) ++ " => " ++ (show cost) ++ " @ " ++ (show $ learningRate session)
+
+        toFile Chart.def "sgd.png" $ do
+          Chart.layoutlr_title Chart..= "loss over time"
+          Chart.plotLeft (Chart.line "loss" [el])
+          Chart.plotRight (Chart.line "learningRate" [ea])
+
+        return session { network = newnet
+                       , epoch = epoch session + 1
+                       , chart = (epoch session, cost, learningRate session):chart session
+                       }
+
+
+      accuracy :: Session -> Double
+      accuracy session = 
+        let inputs = map fst (test session)
+            labels = map snd (test session)
+
+            results = map (`forward` session) inputs
+            rounded = map (map round . toList) results
+
+            equals = zipWith (==) rounded (map (map round . toList) labels)
+        in genericLength (filter (== True) equals) / genericLength inputs
+
+      learningRateDecay :: (Double, Double) -> Session -> Session
+      learningRateDecay (step, m) session =
+        session { learningRate = max m $ learningRate session / step }
+
+      run :: (Session -> IO Session)
+          ->  Session -> IO Session
+      run fn session = foldM (\s i -> fn s) session [0..epochs session]
+
+      factorial :: Int -> Int
+      factorial 0 = 1
+      factorial x = x * factorial (x - 1)
 
       genSeed :: IO Seed
       genSeed = do
@@ -176,12 +339,7 @@ module Sibe
       replaceVector :: Vector Double -> Int -> Double -> Vector Double
       replaceVector vec index value =
         let list = toList vec
-        in fromList $ rrow index list
-        where
-          rrow index [] = []
-          rrow index (x:xs)
-            | index == index = value:xs
-            | otherwise = x : rrow (index + 1) xs
+        in fromList $ take index list ++ value : drop (index + 1) list
 
       clip :: Double -> (Double, Double) -> Double
       clip x (l, u) = min u (max l x)
diff --git a/src/Sibe/LogisticRegression.hs b/src/Sibe/LogisticRegression.hs
new file mode 100644
index 0000000..e69de29
diff --git a/src/Sibe/NLP.hs b/src/Sibe/NLP.hs
new file mode 100644
index 0000000..29e63c5
--- /dev/null
+++ b/src/Sibe/NLP.hs
@@ -0,0 +1,129 @@
+module Sibe.NLP
+  (Class,
+   Document(..),
+   ordNub,
+   accuracy,
+   recall,
+   precision,
+   fmeasure,
+   cleanText,
+   cleanDocuments,
+   removeWords,
+   removeStopwords,
+   ngram,
+   ngramText,
+  )
+  where
+    import Data.List
+    import Debug.Trace
+    import qualified Data.Set as Set
+    import Data.List.Split
+    import Data.Maybe
+    import Control.Arrow ((&&&))
+    import Text.Regex.PCRE
+    import Data.Char (isSpace, isNumber, toLower)
+    import NLP.Stemmer
+
+    type Class = Int;
+
+    data Document = Document { text :: String
+                             , c    :: Class
+                             } deriving (Eq, Show, Read)
+
+
+    cleanText :: String -> String
+    cleanText string =
+      let puncs = filter (`notElem` ['!', '"', '#', '$', '%', '(', ')', '.', '?']) (trim string)
+          spacify = foldl (\acc x -> replace x ' ' acc) puncs [',', '/', '-', '\n', '\r']
+          stemmed = unwords $ map (stem Porter) (words spacify)
+          nonumber = filter (not . isNumber) stemmed
+          lower = map toLower nonumber
+      in (unwords . words) lower -- remove unnecessary spaces
+      where
+        trim = f . f
+          where
+            f = reverse . dropWhile isSpace
+        replace needle replacement =
+          map (\c -> if c == needle then replacement else c)
+
+    cleanDocuments :: [Document] -> [Document]
+    cleanDocuments documents =
+      let cleaned = map (\(Document text c) -> Document (cleanText text) c) documents
+      in cleaned
+
+    removeWords :: [String] -> [Document] -> [Document]
+    removeWords ws documents =
+      map (\(Document text c) -> Document (rm ws text) c) documents
+      where
+          rm list text =
+            unwords $ filter (`notElem` list) (words text)
+
+    removeStopwords :: Int -> [Document] -> [Document]
+    removeStopwords i documents =
+      let wc = wordCounts (concatDocs documents)
+          wlist = sortBy (\(_, a) (_, b) -> b `compare` a) wc
+          stopwords = map fst (take i wlist)
+      in removeWords stopwords documents
+      where
+        vocabulary x = ordNub (words x)
+        countWordInDoc d w = genericLength (filter (==w) d)
+        wordCounts x =
+          let voc = vocabulary x
+          in zip voc $ map (countWordInDoc (words x)) voc
+
+        concatDocs = concatMap (\(Document text _) -> text ++ " ")
+
+    ordNub :: (Ord a) => [a] -> [a]
+    ordNub = go Set.empty
+      where
+        go _ [] = []
+        go s (x:xs) = if x `Set.member` s then go s xs
+                                          else x : go (Set.insert x s) xs
+
+    accuracy :: [(Int, (Int, Double))] -> Double
+    accuracy results =
+      let pairs = map (\(a, b) -> (a, fst b)) results
+          correct = filter (uncurry (==)) pairs
+      in genericLength correct / genericLength results
+
+    recall :: [(Int, (Int, Double))] -> Double
+    recall results =
+      let classes = ordNub (map fst results)
+          s = sum (map rec classes) / genericLength classes
+      in s
+      where
+        rec a =
+          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
+              y = genericLength $ filter (\(c, (r, _)) -> c == a) results
+          in t / y
+
+    precision :: [(Int, (Int, Double))] -> Double
+    precision results =
+      let classes = ordNub (map fst results)
+          s = sum (map prec classes) / genericLength classes
+      in s
+      where
+        prec a =
+          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
+              y = genericLength $ filter (\(c, (r, _)) -> r == a) results
+          in
+            if y == 0
+              then 0
+              else t / y
+
+    fmeasure :: [(Int, (Int, Double))] -> Double
+    fmeasure results =
+      let r = recall results
+          p = precision results
+      in (2 * p * r) / (p + r)
+
+    ngram :: Int -> [Document] -> [Document]
+    ngram n documents =
+      map (\(Document text c) -> Document (ngramText n text) c) documents
+
+    ngramText :: Int -> String -> String
+    ngramText n text =
+      let ws = words text
+          pairs = zip [0..] ws
+          grams = map (\(i, w) -> concat . intersperse "_" $ w:((take (n - 1) . drop (i+1)) ws)) pairs
+      in unwords ("<b>_":grams)
diff --git a/src/Sibe/NaiveBayes.hs b/src/Sibe/NaiveBayes.hs
index 7afe1f6..85657c4 100644
--- a/src/Sibe/NaiveBayes.hs
+++ b/src/Sibe/NaiveBayes.hs
@@ -1,7 +1,7 @@
 module Sibe.NaiveBayes
   (Document(..),
    NB(..),
-   train,
+   initialize,
    run,
    session,
    ordNub,
@@ -19,21 +19,13 @@ module Sibe.NaiveBayes
    removeStopwords,
   )
   where
+    import Sibe.NLP
     import Data.List
     import Debug.Trace
     import qualified Data.Set as Set
     import Data.List.Split
     import Data.Maybe
     import Control.Arrow ((&&&))
-    import Text.Regex.PCRE
-    import Data.Char (isSpace, isNumber, toLower)
-    import NLP.Stemmer
-
-    type Class = Int;
-
-    data Document = Document { text :: String
-                             , c    :: Class
-                             } deriving (Eq, Show, Read)
 
     data NB = NB { documents  :: [Document]
                  , classes    :: [(Class, Double)]
@@ -44,8 +36,8 @@ module Sibe.NaiveBayes
                  , cgram      :: [(Class, [(String, Int)])]
                  } deriving (Eq, Show, Read)
 
-    train :: [Document] -> [Class] -> NB
-    train documents classes =
+    initialize :: [Document] -> [Class] -> NB
+    initialize documents classes =
       let megadoc = concatDocs documents
           vocabulary = genericLength ((ordNub . words) megadoc)
           -- (class, prior probability)
@@ -83,17 +75,6 @@ module Sibe.NaiveBayes
         classWordsCounts x = wordsCount (classWords x) (classVocabulary x)
         classNGramCounts x = wordsCount (classNGramWords x) (ordNub $ classNGramWords x)
 
-    ngram :: Int -> [Document] -> [Document]
-    ngram n documents =
-      map (\(Document text c) -> Document (ngramText n text) c) documents
-
-    ngramText :: Int -> String -> String
-    ngramText n text =
-      let ws = words text
-          pairs = zip [0..] ws
-          grams = map (\(i, w) -> concat . intersperse "_" $ w:((take (n - 1) . drop (i+1)) ws)) pairs
-      in unwords ("<b>_":grams)
-
     session :: [Document] -> NB -> [(Class, (Class, Double))]
     session docs nb =
       let results = map (\(Document text c) -> (c, run text nb)) docs
@@ -143,91 +124,5 @@ module Sibe.NaiveBayes
           variance = sum (map ((^2) . subtract avg) x) / (genericLength x - 1)
       in sqrt variance
 
-    cleanText :: String -> String
-    cleanText string =
-      let puncs = filter (`notElem` ['!', '"', '#', '$', '%', '(', ')', '.', '?']) (trim string)
-          spacify = foldl (\acc x -> replace x ' ' acc) puncs [',', '/', '-', '\n', '\r']
-          stemmed = unwords $ map (stem Porter) (words spacify)
-          nonumber = filter (not . isNumber) stemmed
-          lower = map toLower nonumber
-      in (unwords . words) lower -- remove unnecessary spaces
-      where
-        trim = f . f
-          where
-            f = reverse . dropWhile isSpace
-        replace needle replacement =
-          map (\c -> if c == needle then replacement else c)
-
-    cleanDocuments :: [Document] -> [Document]
-    cleanDocuments documents =
-      let cleaned = map (\(Document text c) -> Document (cleanText text) c) documents
-      in cleaned
-
-    removeWords :: [String] -> [Document] -> [Document]
-    removeWords ws documents =
-      map (\(Document text c) -> Document (rm ws text) c) documents
-      where
-          rm list text =
-            unwords $ filter (`notElem` list) (words text)
-
-    removeStopwords :: Int -> [Document] -> [Document]
-    removeStopwords i documents =
-      let wc = wordCounts (concatDocs documents)
-          wlist = sortBy (\(_, a) (_, b) -> b `compare` a) wc
-          stopwords = map fst (take i wlist)
-      in removeWords stopwords documents
-      where
-        vocabulary x = ordNub (words x)
-        countWordInDoc d w = genericLength (filter (==w) d)
-        wordCounts x =
-          let voc = vocabulary x
-          in zip voc $ map (countWordInDoc (words x)) voc
-
-        concatDocs = concatMap (\(Document text _) -> text ++ " ")
-
     l :: (Show a) => a -> a
     l a = trace (show a) a
-
-    ordNub :: (Ord a) => [a] -> [a]
-    ordNub = go Set.empty
-      where
-        go _ [] = []
-        go s (x:xs) = if x `Set.member` s then go s xs
-                                          else x : go (Set.insert x s) xs
-
-    accuracy :: [(Int, (Int, Double))] -> Double
-    accuracy results =
-      let pairs = map (\(a, b) -> (a, fst b)) results
-          correct = filter (uncurry (==)) pairs
-      in genericLength correct / genericLength results
-
-    recall :: [(Int, (Int, Double))] -> Double
-    recall results =
-      let classes = ordNub (map fst results)
-          s = sum (map rec classes) / genericLength classes
-      in s
-      where
-        rec a =
-          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
-              y = genericLength $ filter (\(c, (r, _)) -> c == a) results
-          in t / y
-
-    precision :: [(Int, (Int, Double))] -> Double
-    precision results =
-      let classes = ordNub (map fst results)
-          s = sum (map prec classes) / genericLength classes
-      in s
-      where
-        prec a =
-          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
-              y = genericLength $ filter (\(c, (r, _)) -> r == a) results
-          in
-            if y == 0
-              then 0
-              else t / y
-
-    fmeasure :: [(Int, (Int, Double))] -> Double
-    fmeasure results =
-      let r = recall results
-          p = precision results
-      in (2 * p * r) / (p + r)
diff --git a/stack.yaml b/stack.yaml
index d02fe3e..5960cdb 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -1,40 +1,5 @@
-# This file was automatically generated by 'stack init'
-#
-# Some commonly used options have been documented as comments in this file.
-# For advanced use and comprehensive documentation of the format, please see:
-# http://docs.haskellstack.org/en/stable/yaml_configuration/
-
-# Resolver to choose a 'specific' stackage snapshot or a compiler version.
-# A snapshot resolver dictates the compiler version and the set of packages
-# to be used for project dependencies. For example:
-#
-# resolver: lts-3.5
-# resolver: nightly-2015-09-21
-# resolver: ghc-7.10.2
-# resolver: ghcjs-0.1.0_ghc-7.10.2
-# resolver:
-#  name: custom-snapshot
-#  location: "./custom-snapshot.yaml"
 resolver: lts-6.7
 
-# User packages to be built.
-# Various formats can be used as shown in the example below.
-#
-# packages:
-# - some-directory
-# - https://example.com/foo/bar/baz-0.0.2.tar.gz
-# - location:
-#    git: https://github.com/commercialhaskell/stack.git
-#    commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
-# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
-#   extra-dep: true
-#  subdirs:
-#  - auto-update
-#  - wai
-#
-# A package marked 'extra-dep: true' will only be built if demanded by a
-# non-dependency (i.e. a user package), and its test suites and benchmarks
-# will not be run. This is useful for tweaking upstream packages.
 packages:
 - location:
     git: git@github.com:albertoruiz/hmatrix.git
@@ -42,36 +7,11 @@ packages:
   subdirs:
     - packages/base
 - .
-- http://hackage.haskell.org/package/containers-0.5.7.1/containers-0.5.7.1.tar.gz
-- http://hackage.haskell.org/package/text-1.2.2.1/text-1.2.2.1.tar.gz
-- http://hackage.haskell.org/package/stemmer-0.5.2/stemmer-0.5.2.tar.gz
 
-# Dependency packages to be pulled from upstream that are not in the resolver
-# (e.g., acme-missiles-0.3)
-extra-deps: []
-
-# Override default flag values for local packages and extra-deps
-flags: {}
-
-# Extra package databases containing global packages
-extra-package-dbs: []
-
-# Control whether we use the GHC we find on the path
-# system-ghc: true
-#
-# Require a specific version of stack, using version ranges
-# require-stack-version: -any # Default
-# require-stack-version: ">=1.1"
-#
-# Override the architecture used by stack, especially useful on Windows
-# arch: i386
-# arch: x86_64
-#
-# Extra directories used by stack for building
-# extra-include-dirs: [/path/to/dir]
-# extra-lib-dirs: [/path/to/dir]
-#
-# Allow a newer minor version of GHC than the snapshot specifies
-# compiler-check: newer-minor
-
-system-ghc: false
+extra-deps:
+  - directory-1.2.7.0
+  - text-1.2.2.1
+  - stemmer-0.5.2
+  - containers-0.5.7.1
+  - Chart-1.8
+  - Chart-cairo-1.8