feat(blogs-corpus): new corpus for word2vec
This commit is contained in:
@ -26,6 +26,7 @@ module Sibe
|
||||
sigmoid',
|
||||
softmax,
|
||||
softmax',
|
||||
sampledSoftmax,
|
||||
relu,
|
||||
relu',
|
||||
crossEntropy,
|
||||
@ -181,6 +182,12 @@ module Sibe
|
||||
where
|
||||
sig x = 1 / max (1 + exp (-x)) 1e-10
|
||||
|
||||
-- used for negative sampling
|
||||
sampledSoftmax :: Int -> Vector Double -> Vector Double
|
||||
sampledSoftmax n x = cmap (\a -> exp a / s) x
|
||||
where
|
||||
s = V.sum . exp $ V.take n x
|
||||
|
||||
relu :: Vector Double -> Vector Double
|
||||
relu = cmap (max 0.1)
|
||||
|
||||
|
@ -14,6 +14,7 @@ module Sibe.Word2Vec
|
||||
import Data.Default.Class
|
||||
import Data.Function (on)
|
||||
import Control.Monad
|
||||
import System.Random
|
||||
|
||||
data W2VMethod = SkipGram | CBOW
|
||||
data Word2Vec = Word2Vec { docs :: [String]
|
||||
@ -27,8 +28,9 @@ module Sibe.Word2Vec
|
||||
}
|
||||
|
||||
word2vec w2v session = do
|
||||
seed <- newStdGen
|
||||
let s = session { training = trainingData
|
||||
, network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, one))
|
||||
, network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, crossEntropy'))
|
||||
}
|
||||
|
||||
when (debug s) $ do
|
||||
|
Reference in New Issue
Block a user