feat(Numeric): move all modules to Numeric

2016-10-17 01:54:35 +03:30
parent 506b180498
commit ed6d2b3021
16 changed files with 20 additions and 65 deletions
--- a/src/Numeric/Sibe.hs
+++ b/src/Numeric/Sibe.hs
@@ -0,0 +1,362 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE TypeOperators #-}
+
+module Numeric.Sibe
+    (Network(..),
+     Layer(..),
+     Input,
+     Output,
+     Activation,
+     forward,
+     forward',
+     runLayer,
+     runLayer',
+     randomLayer,
+     randomNetwork,
+     buildNetwork,
+     saveNetwork,
+     loadNetwork,
+     train,
+     gd,
+     sgd,
+     run,
+     sigmoid,
+     sigmoid',
+     softmax,
+     softmax',
+     relu,
+     relu',
+     crossEntropy,
+     crossEntropy',
+     genSeed,
+     replaceVector,
+     Session(..),
+     accuracy,
+     learningRateDecay,
+     ignoreBiases,
+     one
+    ) where
+      import Numeric.LinearAlgebra
+      import System.Random
+      import System.Random.Shuffle
+      import Debug.Trace
+      import Data.List (foldl', sortBy, genericLength, permutations)
+      import System.IO
+      import Control.DeepSeq
+      import Control.Monad
+      import qualified Data.Vector.Storable as V
+      import Data.Default.Class
+      import System.Exit
+
+      import qualified Graphics.Rendering.Chart.Easy as Chart
+      import Graphics.Rendering.Chart.Backend.Cairo
+
+      type LearningRate = Double
+      type Input = Vector Double
+      type Output = Vector Double
+      type Activation = (Vector Double -> Vector Double, Vector Double -> Vector Double)
+
+      data Layer = Layer { biases     :: !(Vector Double)
+                         , nodes      :: !(Matrix Double)
+                         , activation :: Activation
+                         }
+
+      instance Show Layer where
+        show (Layer biases nodes _) = "(" ++ show biases ++ "," ++ show nodes ++ ")"
+
+      data Network = O Layer
+                   | Layer :- Network
+      
+      instance Show Network where
+        show (Layer biases nodes _ :- n) =
+          (show . length $ toLists nodes) ++ "x" ++ (show . length . head . toLists $ nodes) ++ " " ++ (show . length . toList $ biases) ++ " :- " ++ show n
+        show (O (Layer biases nodes _)) =
+          (show . length $ toLists nodes) ++ "x" ++ (show . length . head . toLists $ nodes) ++ " " ++ (show . length . toList $ biases)
+                   
+      infixr 5 :-
+
+      data Session = Session  { network      :: Network
+                              , training     :: [(Vector Double, Vector Double)]
+                              , test         :: [(Vector Double, Vector Double)]
+                              , learningRate :: Double
+                              , epochs       :: Int
+                              , epoch        :: Int
+                              , batchSize    :: Int
+                              , chart        :: [(Int, Double, Double)]
+                              , drawChart    :: Bool
+                              , chartName    :: String
+                              , momentum     :: Double
+                              , debug        :: Bool
+                              } deriving (Show)
+
+      emptyNetwork = randomNetwork 0 (0, 0) 0 [] (0, (id, id))
+      instance Default Session where
+        def = Session { network      = seq (die "You have not specified a network parameter") emptyNetwork
+                      , training     = seq (die "You have not specified training data") []
+                      , test         = seq (die "You have not specified test data") []
+                      , learningRate = 0.5
+                      , epochs       = 35
+                      , epoch        = 0
+                      , batchSize    = 0
+                      , chart        = []
+                      , drawChart    = False
+                      , chartName    = "chart.png"
+                      , momentum     = 0
+                      , debug        = False
+                      }
+
+      saveNetwork :: Network -> String -> IO ()
+      saveNetwork network file =
+        writeFile file ((show . reverse) (gen network []))
+        where
+          gen (O (Layer biases nodes _)) list = (biases, nodes) : list
+          gen (Layer biases nodes _ :- n) list = gen n $ (biases, nodes) : list
+
+      loadNetwork :: [Activation] -> String -> IO Network
+      loadNetwork activations file = do
+        handle <- openFile file ReadMode
+        content <- hGetContents handle
+        let list = read content :: [(Vector Double, Matrix Double)]
+            network = gen list activations
+        content `deepseq` hClose handle
+        return network
+
+        where
+          gen [(biases, nodes)] [a] = O (Layer biases nodes a)
+          gen ((biases, nodes):hs) (a:as) = Layer biases nodes a :- gen hs as
+
+      runLayer :: Input -> Layer -> Output
+      runLayer input (Layer !biases !weights _) = input <# weights + biases
+
+      runLayer' :: Input -> Layer -> Output
+      runLayer' input (Layer !biases !weights _) = input <# weights
+
+      forward :: Input -> Session -> Output
+      forward input session = compute input (network session)
+        where
+          compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l
+          compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer input l) n
+
+      forward' :: Input -> Session -> Output
+      forward' input session = compute input (network session)
+        where
+          compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer' input l
+          compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer' input l) n
+
+      randomLayer :: Seed -> (Int, Int) -> (Double, Double) -> Activation -> Layer
+      randomLayer seed (wr, wc) (l, u) =
+        let weights = uniformSample seed wr $ replicate wc (l, u)
+            biases  = randomVector seed Uniform wc * realToFrac u - realToFrac l
+        in Layer biases weights
+
+      randomNetwork :: Seed -> (Double, Double) -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
+      randomNetwork seed bound input [] (output, a) =
+        O $ randomLayer seed (input, output) bound a
+      randomNetwork seed bound input ((h, a):hs) output =
+        randomLayer seed (input, h) bound a :-
+        randomNetwork (seed + 1) bound h hs output
+
+      buildNetwork :: Seed -> (Double, Double) -> Int -> [(Int, Int, Activation)] -> (Int, Int, Activation) -> Network
+      buildNetwork seed bound input [] (outputRows, outputColumns, a) =
+        O $ randomLayer seed (input, outputColumns) bound a
+      buildNetwork seed bound input ((rows, columns, a):hs) output =
+        randomLayer seed (input, columns) bound a :-
+        buildNetwork (seed + 1) bound columns hs output
+
+      sigmoid :: Vector Double -> Vector Double
+      sigmoid x = 1 / max (1 + exp (-x)) 1e-10
+
+      sigmoid' :: Vector Double -> Vector Double
+      sigmoid' x = sigmoid x * (1 - sigmoid x)
+
+      softmax :: Vector Double -> Vector Double
+      softmax x = cmap (\a -> exp a / s) x
+        where
+          s = V.sum $ exp x
+
+      softmax' :: Vector Double -> Vector Double
+      softmax' = cmap (\a -> sig a * (1 - sig a))
+        where
+          sig x = 1 / max (1 + exp (-x)) 1e-10
+
+      -- used for negative sampling
+      {-sampledSoftmax :: Vector Double -> Vector Double-}
+      {-sampledSoftmax x = cmap (\a -> exp a / s) x-}
+        {-where-}
+          {-s = V.sum . exp $ x-}
+
+      relu :: Vector Double -> Vector Double
+      relu = cmap (max 0.1)
+
+      relu' :: Vector Double -> Vector Double
+      relu' = cmap dev
+        where dev x
+                | x < 0 = 0
+                | otherwise = 1
+
+      crossEntropy :: Session -> Double
+      crossEntropy session =
+        let inputs = map fst (test session)
+            labels = map (toList . snd) (test session)
+            outputs = map (toList . (`forward` session)) inputs
+            pairs = zip outputs labels
+            n = genericLength pairs
+        in sum (map set pairs) / n
+        where
+          set (os, ls) = (-1 / genericLength os) * sum (zipWith f os ls)
+          f a y = y * log (max 1e-10 a)
+
+      crossEntropy' :: Vector Double -> Vector Double
+      crossEntropy' x = 1 / fromIntegral (V.length x)
+
+      one :: Vector Double -> Vector Double
+      one v = vector $ replicate (V.length v) 1
+
+      train :: Input
+            -> Network
+            -> Output -- target
+            -> Double -- learning rate
+            -> Network -- network's output
+      train input network target alpha = fst $ run input network
+        where
+          run :: Input -> Network -> (Network, Vector Double)
+          run input (O l@(Layer biases weights (fn, fn'))) =
+            let y = runLayer input l
+                o = fn y
+                delta = o - target 
+                de = delta * fn' y
+
+                biases'  = biases  - scale alpha de
+                weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
+                layer    = Layer biases' weights' (fn, fn') -- updated layer
+
+                pass = weights #> de
+
+            in (O layer, pass)
+          run input (l@(Layer biases weights (fn, fn')) :- n) =
+            let y = runLayer input l
+                o = fn y
+                (n', delta) = run o n
+
+                de = delta * fn' y
+
+                biases'  = biases  - scale alpha de
+                weights' = weights - scale alpha (input `outer` de)
+                layer = Layer biases' weights' (fn, fn')
+
+                pass = weights #> de
+            in (layer :- n', pass)
+
+      gd :: Session -> IO Session
+      gd session = do
+        seed <- newStdGen
+
+        let pairs = training session
+            alpha = learningRate session
+            net = network session
+
+        let n = length pairs
+
+        shuffled <- shuffleM pairs
+
+        let newnet = foldl' (\n (input, label) -> train input n label alpha) net pairs
+            cost = crossEntropy (session { network = newnet })
+
+        let el = map (\(e, l, _) -> (e, l)) (chart session)
+            ea = map (\(e, _, a) -> (e, a)) (chart session)
+
+        when (drawChart session) $ do
+          toFile Chart.def (chartName session) $ do
+            Chart.layoutlr_title Chart..= "loss over time"
+            Chart.plotLeft (Chart.line "loss" [el])
+            Chart.plotRight (Chart.line "learningRate" [ea])
+
+        return session { network = newnet
+                       , epoch = epoch session + 1
+                       , chart = (epoch session, cost, learningRate session):chart session
+                       }
+
+      sgd :: Session -> IO Session
+      sgd session = do
+        seed <- newStdGen
+
+        let pairs = training session
+            bsize = batchSize session
+            alpha = learningRate session
+            net = network session
+
+        let n = length pairs
+            iterations = n `div` bsize - 1
+
+        shuffled <- shuffleM pairs
+
+        let iter net i =
+              let n = length pairs
+                  batch = take bsize . drop (i * bsize) $ shuffled
+                  batchInputs = map fst batch
+                  batchLabels = map snd batch
+                  batchPair = zip batchInputs batchLabels
+              in foldl' (\n (input, label) -> train input n label alpha) net batchPair
+
+        let newnet = foldl' iter net [0..iterations]
+            cost = crossEntropy (session { network = newnet })
+
+        let el = map (\(e, l, _) -> (e, l)) (chart session)
+            ea = map (\(e, _, a) -> (e, a)) (chart session)
+
+        when (drawChart session) $ do
+          toFile Chart.def (chartName session) $ do
+            Chart.layoutlr_title Chart..= "loss over time"
+            Chart.plotLeft (Chart.line "loss" [el])
+            Chart.plotRight (Chart.line "learningRate" [ea])
+
+        return session { network = newnet
+                       , epoch = epoch session + 1
+                       , chart = (epoch session, cost, learningRate session):chart session
+                       }
+
+
+      accuracy :: Session -> Double
+      accuracy session = 
+        let inputs = map fst (test session)
+            labels = map snd (test session)
+
+            results = map (`forward` session) inputs
+            rounded = map (map round . toList) results
+
+            equals = zipWith (==) rounded (map (map round . toList) labels)
+        in genericLength (filter (== True) equals) / genericLength inputs
+
+      learningRateDecay :: (Double, Double) -> Session -> Session
+      learningRateDecay (step, m) session =
+        session { learningRate = max m $ learningRate session / step }
+
+      ignoreBiases :: Session -> Session
+      ignoreBiases session =
+        session { network = rmbias (network session) }
+        where
+          rmbias (O (Layer biases nodes a)) = O $ Layer (biases * 0) nodes a
+          rmbias ((Layer biases nodes a) :- n) = Layer (biases * 0) nodes a :- rmbias n
+
+      run :: (Session -> IO Session)
+          ->  Session -> IO Session
+      run fn session = foldM (\s i -> fn s) session [0..epochs session]
+
+      factorial :: Int -> Int
+      factorial 0 = 1
+      factorial x = x * factorial (x - 1)
+
+      genSeed :: IO Seed
+      genSeed = do
+        (seed, _) <- random <$> newStdGen :: IO (Int, StdGen)
+        return seed
+
+      replaceVector :: Vector Double -> Int -> Double -> Vector Double
+      replaceVector vec index value =
+        let list = toList vec
+        in fromList $ take index list ++ value : drop (index + 1) list
+
+      clip :: Double -> (Double, Double) -> Double
+      clip x (l, u) = min u (max l x)
--- a/src/Numeric/Sibe/NLP.hs
+++ b/src/Numeric/Sibe/NLP.hs
@@ -0,0 +1,122 @@
+module Numeric.Sibe.NLP
+  (Class,
+   Document(..),
+   accuracy,
+   recall,
+   precision,
+   fmeasure,
+   cleanText,
+   cleanDocuments,
+   removeWords,
+   removeStopwords,
+   ngram,
+   ngramText,
+  )
+  where
+    import Numeric.Sibe.Utils
+    import Data.List
+    import Debug.Trace
+    import Data.List.Split
+    import Data.Maybe
+    import Control.Arrow ((&&&))
+    import Text.Regex.PCRE
+    import Data.Char (isSpace, isNumber, toLower)
+    import NLP.Stemmer
+    import qualified Data.Set as Set
+
+    type Class = Int;
+
+    data Document = Document { text :: String
+                             , c    :: Class
+                             } deriving (Eq, Show, Read)
+
+
+    cleanText :: String -> String
+    cleanText string =
+      let puncs = filter (`notElem` ['!', '"', '#', '$', '%', '(', ')', '.', '?']) (trim string)
+          spacify = foldl (\acc x -> replace x ' ' acc) puncs [',', '/', '-', '\n', '\r']
+          stemmed = unwords $ map (stem Porter) (words spacify)
+          nonumber = filter (not . isNumber) stemmed
+          lower = map toLower nonumber
+      in (unwords . words) lower -- remove unnecessary spaces
+      where
+        trim = f . f
+          where
+            f = reverse . dropWhile isSpace
+        replace needle replacement =
+          map (\c -> if c == needle then replacement else c)
+
+    cleanDocuments :: [Document] -> [Document]
+    cleanDocuments documents =
+      let cleaned = map (\(Document text c) -> Document (cleanText text) c) documents
+      in cleaned
+
+    removeWords :: [String] -> [Document] -> [Document]
+    removeWords ws documents =
+      map (\(Document text c) -> Document (rm ws text) c) documents
+      where
+          rm list text =
+            unwords $ filter (`notElem` list) (words text)
+
+    removeStopwords :: Int -> [Document] -> [Document]
+    removeStopwords i documents =
+      let wc = wordCounts (concatDocs documents)
+          wlist = sortBy (\(_, a) (_, b) -> b `compare` a) wc
+          stopwords = map fst (take i wlist)
+      in removeWords stopwords documents
+      where
+        vocabulary x = ordNub (words x)
+        countWordInDoc d w = genericLength (filter (==w) d)
+        wordCounts x =
+          let voc = vocabulary x
+          in zip voc $ map (countWordInDoc (words x)) voc
+
+        concatDocs = concatMap (\(Document text _) -> text ++ " ")
+
+    accuracy :: [(Int, (Int, Double))] -> Double
+    accuracy results =
+      let pairs = map (\(a, b) -> (a, fst b)) results
+          correct = filter (uncurry (==)) pairs
+      in genericLength correct / genericLength results
+
+    recall :: [(Int, (Int, Double))] -> Double
+    recall results =
+      let classes = ordNub (map fst results)
+          s = sum (map rec classes) / genericLength classes
+      in s
+      where
+        rec a =
+          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
+              y = genericLength $ filter (\(c, (r, _)) -> c == a) results
+          in t / y
+
+    precision :: [(Int, (Int, Double))] -> Double
+    precision results =
+      let classes = ordNub (map fst results)
+          s = sum (map prec classes) / genericLength classes
+      in s
+      where
+        prec a =
+          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
+              y = genericLength $ filter (\(c, (r, _)) -> r == a) results
+          in
+            if y == 0
+              then 0
+              else t / y
+
+    fmeasure :: [(Int, (Int, Double))] -> Double
+    fmeasure results =
+      let r = recall results
+          p = precision results
+      in (2 * p * r) / (p + r)
+
+    ngram :: Int -> [Document] -> [Document]
+    ngram n documents =
+      map (\(Document text c) -> Document (ngramText n text) c) documents
+
+    ngramText :: Int -> String -> String
+    ngramText n text =
+      let ws = words text
+          pairs = zip [0..] ws
+          grams = map (\(i, w) -> concat . intersperse "_" $ w:((take (n - 1) . drop (i+1)) ws)) pairs
+      in unwords ("<b>_":grams)
--- a/src/Numeric/Sibe/NaiveBayes.hs
+++ b/src/Numeric/Sibe/NaiveBayes.hs
@@ -0,0 +1,128 @@
+module Numeric.Sibe.NaiveBayes
+  (Document(..),
+   NB(..),
+   initialize,
+   run,
+   session,
+   accuracy,
+   precision,
+   recall,
+   fmeasure,
+   mean,
+   stdev,
+   cleanText,
+   cleanDocuments,
+   ngram,
+   ngramText,
+   removeWords,
+   removeStopwords,
+  )
+  where
+    import Numeric.Sibe.Utils
+    import Numeric.Sibe.NLP
+    import Data.List
+    import Debug.Trace
+    import qualified Data.Set as Set
+    import Data.List.Split
+    import Data.Maybe
+    import Control.Arrow ((&&&))
+
+    data NB = NB { documents  :: [Document]
+                 , classes    :: [(Class, Double)]
+                 , vocabulary :: Int
+                 , megadoc    :: String
+                 , cd         :: [(Class, [Document])]
+                 , cw         :: [(Class, [(String, Int)])]
+                 , cgram      :: [(Class, [(String, Int)])]
+                 } deriving (Eq, Show, Read)
+
+    initialize :: [Document] -> [Class] -> NB
+    initialize documents classes =
+      let megadoc = concatDocs documents
+          vocabulary = genericLength ((ordNub . words) megadoc)
+          -- (class, prior probability)
+          cls = zip classes (map classPrior classes)
+
+          -- (class, [document])
+          cd = zip classes (map classDocs classes)
+
+          -- (class, [(word, count)])
+          cw = zip classes $ map classWordsCounts classes
+
+          cgram = zip classes $ map classNGramCounts classes
+
+      in NB { documents  = documents
+            , classes    = cls
+            , vocabulary = vocabulary
+            , megadoc    = megadoc
+            , cd         = cd
+            , cw         = cw
+            , cgram      = cgram
+            }
+      where
+        concatDocs = concatMap (\(Document text _) -> text ++ " ")
+
+        classDocs x = filter ((==x) . c) documents
+        classMegadoc = concatMap (\(Document text _) -> text ++ " ") . classDocs
+        classWords = words . classMegadoc
+        classNGram = concatMap (\(Document text _) -> text ++ " ") . ngram 2 . classDocs
+        classNGramWords = words . classNGram
+        classVocabulary = ordNub . classWords
+        classPrior x = genericLength (classDocs x) / genericLength documents
+        countWordInDoc d w = genericLength (filter (==w) d)
+        wordsCount ws voc =
+          zip voc $ map (countWordInDoc ws) voc
+        classWordsCounts x = wordsCount (classWords x) (classVocabulary x)
+        classNGramCounts x = wordsCount (classNGramWords x) (ordNub $ classNGramWords x)
+
+    session :: [Document] -> NB -> [(Class, (Class, Double))]
+    session docs nb =
+      let results = map (\(Document text c) -> (c, run text nb)) docs
+      in results
+
+    run :: String -> NB -> (Class, Double)
+    run txt (NB documents classes vocabulary megadoc cd cw cgram) =
+      let scores = map (score . fst) classes
+          index = argmax scores
+          m = maximum scores
+      in (fst (classes !! index), m)
+      where
+        score c =
+          let prior = snd (fromJust $ find ((==c) . fst) classes)
+
+          -- below is the formula according to Multinominal Naive Bayes, but it seems
+          -- using a uniform prior probability seems to work better when working with imbalanced
+          -- training datasets, instead, we help rare classes get higher scores using
+          -- alpha = (1 - prior * ALPHA), we use ALPHA = 1 here
+          -- in prior * product (map (prob c) (words txt))
+
+              alpha = 1 - prior
+
+          in alpha * product (map (prob c) (words txt))
+
+        prob c w =
+          let fcw  = fromJust $ find ((==c) . fst) cw
+              fcg  = fromJust $ find ((==c) . fst) cgram
+              tctM = find ((== w) . fst) (snd fcw)
+              tct  = if isJust tctM then (snd . fromJust) tctM else 0
+              cvoc = sum $ map snd (snd fcw)
+              voc  = vocabulary
+              gram = find ((==w) . last . splitOn "_" . fst) (snd fcg)
+              pg   = if isJust gram then (snd . fromJust) gram else 0
+          -- in realToFrac (tct * pg + 1) / realToFrac (cvoc + voc) -- uncomment to enable ngrams
+          in realToFrac (tct + 1) / realToFrac (cvoc + voc)
+
+    argmax :: (Ord a) => [a] -> Int
+    argmax x = fst $ maximumBy (\(_, a) (_, b) -> a `compare` b) (zip [0..] x)
+
+    mean :: [Double] -> Double
+    mean x = sum x / genericLength x
+
+    stdev :: [Double] -> Double
+    stdev x =
+      let avg = mean x
+          variance = sum (map ((^2) . subtract avg) x) / (genericLength x - 1)
+      in sqrt variance
+
+    l :: (Show a) => a -> a
+    l a = trace (show a) a
--- a/src/Numeric/Sibe/Utils.hs
+++ b/src/Numeric/Sibe/Utils.hs
@@ -0,0 +1,41 @@
+module Numeric.Sibe.Utils
+  ( similarity
+  , ordNub
+  , onehot
+  , average
+  , pca
+  ) where
+    import qualified Data.Vector.Storable as V
+    import qualified Data.Set as Set
+    import Numeric.LinearAlgebra
+
+    similarity :: Vector Double -> Vector Double -> Double
+    similarity a b = (V.sum $ a * b) / (magnitude a * magnitude b)
+      where
+        magnitude :: Vector Double -> Double
+        magnitude v = sqrt $ V.sum (cmap (^2) v)
+
+    onehot :: Int -> Int -> Vector Double
+    onehot len i = vector $ replicate i 0 ++ [1] ++ replicate (len - i - 1) 0
+
+    ordNub :: (Ord a) => [a] -> [a]
+    ordNub = go Set.empty
+      where
+        go _ [] = []
+        go s (x:xs) = if x `Set.member` s then go s xs
+                                          else x : go (Set.insert x s) xs
+
+    average :: Vector Double -> Vector Double
+    average v = cmap (/ (V.sum v)) v
+
+    pca :: Matrix Double -> Int -> Matrix Double
+    pca m d =
+      let rs = toRows m
+          means = map (\v -> V.sum v / fromIntegral (V.length v)) rs
+          meanReduced = map (\(a, b) -> V.map (+ (negate b)) a) $ zip rs means
+          mat = fromRows meanReduced
+          
+          (u, s, v) = svd mat
+          diagS = diagRect 0 s (rows mat) (cols mat)
+
+      in u ?? (All, Take d) <> diagS ?? (Take d, Take d)
--- a/src/Numeric/Sibe/Word2Vec.hs
+++ b/src/Numeric/Sibe/Word2Vec.hs
@@ -0,0 +1,129 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module Numeric.Sibe.Word2Vec
+  ( word2vec
+  , Word2Vec (..)
+  , W2VMethod (..)
+  ) where
+    import Numeric.Sibe
+    import Numeric.Sibe.Utils
+    import Debug.Trace
+    import Data.Char
+    import Data.Maybe
+    import Data.List
+    import Numeric.LinearAlgebra as H hiding (find)
+    import qualified Data.Vector.Storable as V
+    import Data.Default.Class
+    import Data.Function (on)
+    import Control.Monad
+    import System.Random
+
+    import Graphics.Rendering.Chart as Chart
+    import Graphics.Rendering.Chart.Backend.Cairo
+    import Control.Lens
+
+    data W2VMethod = SkipGram | CBOW
+    data Word2Vec = Word2Vec { docs :: [String]
+                             , window :: Int
+                             , dimensions :: Int
+                             , method :: W2VMethod
+                             , w2vChartName :: String
+                             , w2vDrawChart :: Bool
+                             }
+    instance Default Word2Vec where
+      def = Word2Vec { docs = []
+                     , window = 2
+                     , w2vChartName = "w2v.png"
+                     , w2vDrawChart = False
+                     }
+
+    word2vec w2v session = do
+      seed <- newStdGen
+
+      let s = session { training = trainingData
+                      , network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, crossEntropy'))
+                      }
+
+      when (debug s) $ do
+        putStr "vocabulary size: "
+        print v
+
+        putStr "trainingData length: "
+        print . length $ trainingData
+
+      -- biases are not used in skipgram/cbow
+      newses <- run (sgd . ignoreBiases) s
+
+
+      -- export the hidden layer
+      let (hidden@(Layer biases nodes _) :- _) = network newses
+      -- run words through the hidden layer alone to get the word vector
+      let computedVocVec = map (\(w, v) -> (w, runLayer' v hidden)) vocvec
+
+      when (w2vDrawChart w2v) $ do
+        let m = fromRows . map snd $ computedVocVec
+            twoDimensions = pca m 2
+            textData = zipWith (\s l -> (V.head l, V.last l, s)) (map fst computedVocVec) (toRows twoDimensions)
+
+            chart = toRenderable layout
+              where
+                textP = plot_annotation_values .~ textData
+                      $ def
+                layout = layout_title .~ "word vectors"
+                       $ layout_plots .~ [toPlot textP]
+                       $ def
+                    
+        renderableToFile def (w2vChartName w2v) chart
+        return ()
+
+      return (computedVocVec, vocvec)
+      where
+        -- clean documents
+        ds = map cleanText (docs w2v)
+
+        -- words of each document
+        wd = map (words . (++ " ") . (map toLower)) ds
+
+        -- all words together, used to generate the vocabulary
+        ws = words (concatMap ((++ " ") . map toLower) ds)
+        vocabulary = ordNub ws
+        v = length vocabulary
+
+        -- generate one-hot vectors for each word of vocabulary
+        vocvec = zip vocabulary $ map (onehot v) [0..v - 1]
+
+        -- training data: generate input and output pairs for each word and the words in it's window
+        trainingData = concatMap (\wds -> concatMap (iter wds) $ zip [0..] wds) wd
+          where
+            iter wds (i, w) =
+              let v = snd . fromJust . find ((==w) . fst) $ vocvec
+                  before = take (window w2v) . drop (i - window w2v) $ wds
+                  after = take (window w2v) . drop (i + 1) $ wds
+                  ns 
+                    | i == 0 = after
+                    | i == length vocvec - 1 = before
+                    | otherwise = before ++ after
+                  vectorized = map (\w -> snd . fromJust $ find ((== w) . fst) vocvec) ns
+                  new = foldl1 (+) vectorized
+              in
+                if length wds <= 1
+                  then []
+                  else
+                    case method w2v of
+                      SkipGram -> [(v, average new)]
+                      CBOW     -> [(average new, v)]
+                      _        -> error "unsupported word2vec method"
+
+    cleanText :: String -> String
+    cleanText string =
+      let puncs = filter (`notElem` ['!', '"', '#', '$', '%', '(', ')', '.', '?', '\'']) (trim string)
+          spacify = foldl (\acc x -> replace x ' ' acc) puncs [',', '/', '-', '\n', '\r']
+          nonumber = filter (not . isNumber) spacify
+          lower = map toLower nonumber
+      in (unwords . words) lower -- remove unnecessary spaces
+      where
+        trim = f . f
+          where
+            f = reverse . dropWhile isSpace
+        replace needle replacement =
+          map (\c -> if c == needle then replacement else c)