feat(blogs-corpus): new corpus for word2vec

This commit is contained in:
Mahdi Dibaiee
2016-09-19 16:00:45 +04:30
parent f16cc26798
commit d9d24f69a6
6 changed files with 62 additions and 13 deletions

View File

@ -26,6 +26,7 @@ module Sibe
sigmoid',
softmax,
softmax',
sampledSoftmax,
relu,
relu',
crossEntropy,
@ -181,6 +182,12 @@ module Sibe
where
sig x = 1 / max (1 + exp (-x)) 1e-10
-- used for negative sampling
sampledSoftmax :: Int -> Vector Double -> Vector Double
sampledSoftmax n x = cmap (\a -> exp a / s) x
where
s = V.sum . exp $ V.take n x
relu :: Vector Double -> Vector Double
relu = cmap (max 0.1)

View File

@ -14,6 +14,7 @@ module Sibe.Word2Vec
import Data.Default.Class
import Data.Function (on)
import Control.Monad
import System.Random
data W2VMethod = SkipGram | CBOW
data Word2Vec = Word2Vec { docs :: [String]
@ -27,8 +28,9 @@ module Sibe.Word2Vec
}
word2vec w2v session = do
seed <- newStdGen
let s = session { training = trainingData
, network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, one))
, network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, crossEntropy'))
}
when (debug s) $ do