diff --git a/.gitmodules b/.gitmodules
index 17f4304..4cf4c67 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "examples/notMNIST"]
 	path = examples/notMNIST
 	url = git@github.com:mdibaiee/notMNIST
+[submodule "examples/blogs-corpus"]
+	path = examples/blogs-corpus
+	url = git@github.com:mdibaiee/blogs-corpus
diff --git a/examples/blogs-corpus b/examples/blogs-corpus
new file mode 160000
index 0000000..f242d6e
--- /dev/null
+++ b/examples/blogs-corpus
@@ -0,0 +1 @@
+Subproject commit f242d6e602cacf41cd662bfca57501638bcf7c2a
diff --git a/examples/word2vec.hs b/examples/word2vec.hs
index 921fa88..1669c41 100644
--- a/examples/word2vec.hs
+++ b/examples/word2vec.hs
@@ -12,27 +12,45 @@ module Main where
   import Data.Function (on)
   import Numeric.LinearAlgebra
   import System.IO
+  import System.Directory
   import Data.List.Split
+  import Control.Exception (evaluate)
+  import Debug.Trace
+  import Data.Char
+  import System.Random
+
+  rf :: FilePath -> IO String
+  rf p = do
+    hs <- openFile p ReadMode
+    hSetEncoding hs latin1
+    content <- evaluate =<< hGetContents hs
+    length content `seq` hClose hs
+    return content
 
   main = do
+    setStdGen (mkStdGen 100)
     sws <- lines <$> readFile "examples/stopwords"
-    {-ds <- do
-        content <- readFile "examples/doc-classifier-data/data-reuters"
-        let splitted = splitOn (replicate 10 '-' ++ "\n") content
-            d = concatMap (tail . lines) (take 100 splitted)
-        return $ removeWords sws d-}
-    --let ds = ["I like deep learning", "I like NLP", "I enjoy flying"]
+
+    -- real data, takes a lot of time to train
+    {-ds <- do-}
+        {-files <- filter ((/= "xml") . take 3 . reverse) <$> listDirectory "examples/blogs-corpus/"-}
+        {-contents <- mapM (rf . ("examples/blogs-corpus/" ++)) files-}
+
+        {-let texts = map (unwords . splitOn "&nbsp;") contents-}
+        {-let tags = ["<Blog>", "</Blog>", "<date>", "</date>", "<post>", "</post>", "&nbsp;"]-}
+        {-return $ map cleanText $ removeWords (sws ++ tags) texts-}
+
     let ds = ["the king loves the queen", "the queen loves the king",
               "the dwarf hates the king", "the queen hates the dwarf",
               "the dwarf poisons the king", "the dwarf poisons the queen"]
 
-    let session = def { learningRate = 5e-2
+    let session = def { learningRate = 1e-1
                       , batchSize = 1
-                      , epochs = 100
+                      , epochs = 200
                       , debug = True
                       } :: Session
         w2v = def { docs = ds
-                  , dimensions = 50
+                  , dimensions = 25
                   , method = SkipGram
                   , window = 2
                   } :: Word2Vec
@@ -49,10 +67,25 @@ module Main where
 
     return ()
 
+  cleanText :: String -> String
+  cleanText string = 
+    let notag = unwords $ filter ((/= "<date>") . take 6) (words string)
+        ws = unwords $ filter (`notElem` ["urlLink"]) (words notag)
+        spacify = foldl (\acc x -> replace x ' ' acc) (trim ws) [',', '/', '-', '\n', '\r', '?', '.', '(', ')', '%', '$', '"', ';', ':', '!', '\'']
+        nonumber = filter (not . isNumber) spacify
+        lower = map toLower nonumber
+    in unwords . words $ lower
+    where
+      trim = f . f
+        where
+          f = reverse . dropWhile isSpace
+      replace needle replacement =
+        map (\c -> if c == needle then replacement else c)
+
   removeWords :: [String] -> [String] -> [String]
   removeWords ws documents =
-    map (rm ws) documents
+    map rm documents
     where
-        rm list text =
-          unwords $ filter (`notElem` list) (words text)
+        rm text = 
+          unwords $ filter (`notElem` ws) (words text)
 
diff --git a/sibe.cabal b/sibe.cabal
index de76920..5bde54d 100644
--- a/sibe.cabal
+++ b/sibe.cabal
@@ -16,6 +16,7 @@ cabal-version:       >=1.10
 library
   hs-source-dirs:      src
   exposed-modules:     Sibe, Sibe.NaiveBayes, Sibe.NLP, Sibe.Word2Vec, Sibe.Utils
+  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
   build-depends:       base >= 4.7 && < 5
                      , hmatrix
                      , random
@@ -53,6 +54,8 @@ executable example-word2vec
                      , data-default-class
                      , split
                      , vector
+                     , directory
+                     , random
   default-language:    Haskell2010
 
 executable example-424
diff --git a/src/Sibe.hs b/src/Sibe.hs
index eb97ca5..7460aab 100644
--- a/src/Sibe.hs
+++ b/src/Sibe.hs
@@ -26,6 +26,7 @@ module Sibe
      sigmoid',
      softmax,
      softmax',
+     sampledSoftmax,
      relu,
      relu',
      crossEntropy,
@@ -181,6 +182,12 @@ module Sibe
         where
           sig x = 1 / max (1 + exp (-x)) 1e-10
 
+      -- used for negative sampling
+      sampledSoftmax :: Int -> Vector Double -> Vector Double
+      sampledSoftmax n x = cmap (\a -> exp a / s) x
+        where
+          s = V.sum . exp $ V.take n x
+
       relu :: Vector Double -> Vector Double
       relu = cmap (max 0.1)
 
diff --git a/src/Sibe/Word2Vec.hs b/src/Sibe/Word2Vec.hs
index 94c3ea7..b18f8ae 100644
--- a/src/Sibe/Word2Vec.hs
+++ b/src/Sibe/Word2Vec.hs
@@ -14,6 +14,7 @@ module Sibe.Word2Vec
     import Data.Default.Class
     import Data.Function (on)
     import Control.Monad
+    import System.Random
 
     data W2VMethod = SkipGram | CBOW
     data Word2Vec = Word2Vec { docs :: [String]
@@ -27,8 +28,9 @@ module Sibe.Word2Vec
                      }
 
     word2vec w2v session = do
+      seed <- newStdGen
       let s = session { training = trainingData
-                      , network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, one))
+                      , network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, crossEntropy'))
                       }
 
       when (debug s) $ do