fix(word2vec): simple example of word2vec
This commit is contained in:
parent
d4ac90bbd5
commit
0d43814448
@ -27,13 +27,14 @@ module Main where
|
||||
"the dwarf poisons the king", "the dwarf poisons the queen"]
|
||||
|
||||
let session = def { learningRate = 0.1
|
||||
, batchSize = 16
|
||||
, batchSize = 1
|
||||
, epochs = 100
|
||||
, debug = True
|
||||
} :: Session
|
||||
w2v = def { docs = ds
|
||||
, dimensions = 50
|
||||
, method = SkipGram
|
||||
, window = 3
|
||||
, window = 2
|
||||
} :: Word2Vec
|
||||
|
||||
|
||||
|
BIN
sgd.png
BIN
sgd.png
Binary file not shown.
Before Width: | Height: | Size: 31 KiB After Width: | Height: | Size: 12 KiB |
@ -86,6 +86,7 @@ module Sibe
|
||||
, batchSize :: Int
|
||||
, chart :: [(Int, Double, Double)]
|
||||
, momentum :: Double
|
||||
, debug :: Bool
|
||||
} deriving (Show)
|
||||
|
||||
emptyNetwork = randomNetwork 0 (0, 0) 0 [] (0, (id, id))
|
||||
@ -99,6 +100,7 @@ module Sibe
|
||||
, batchSize = 0
|
||||
, chart = []
|
||||
, momentum = 0
|
||||
, debug = False
|
||||
}
|
||||
|
||||
saveNetwork :: Network -> String -> IO ()
|
||||
|
@ -13,6 +13,7 @@ module Sibe.Word2Vec
|
||||
import qualified Data.Vector.Storable as V
|
||||
import Data.Default.Class
|
||||
import Data.Function (on)
|
||||
import Control.Monad
|
||||
|
||||
data W2VMethod = SkipGram | CBOW
|
||||
data Word2Vec = Word2Vec { docs :: [String]
|
||||
@ -30,11 +31,12 @@ module Sibe.Word2Vec
|
||||
, network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, one))
|
||||
}
|
||||
|
||||
putStr "vocabulary size: "
|
||||
print v
|
||||
when (debug s) $ do
|
||||
putStr "vocabulary size: "
|
||||
print v
|
||||
|
||||
putStr "trainingData length: "
|
||||
print . length $ trainingData
|
||||
putStr "trainingData length: "
|
||||
print . length $ trainingData
|
||||
|
||||
-- biases are not used in skipgram/cbow
|
||||
newses <- run (sgd . ignoreBiases) s
|
||||
@ -73,7 +75,7 @@ module Sibe.Word2Vec
|
||||
| i == length vocvec - 1 = before
|
||||
| otherwise = before ++ after
|
||||
vectorized = map (\w -> snd . fromJust $ find ((== w) . fst) vocvec) ns
|
||||
new = foldl1 (+) vectorized
|
||||
new = cmap (max 1) $ foldl1 (+) vectorized
|
||||
in
|
||||
case method w2v of
|
||||
SkipGram -> zip (repeat v) vectorized
|
||||
|
Loading…
Reference in New Issue
Block a user