feat(w2v): draw text charts for words

This commit is contained in:
Mahdi Dibaiee
2016-10-01 12:24:36 +03:30
parent d9d24f69a6
commit 85971bc84d
6 changed files with 42 additions and 19 deletions

View File

@ -26,7 +26,6 @@ module Sibe
sigmoid',
softmax,
softmax',
sampledSoftmax,
relu,
relu',
crossEntropy,
@ -183,10 +182,10 @@ module Sibe
sig x = 1 / max (1 + exp (-x)) 1e-10
-- used for negative sampling
sampledSoftmax :: Int -> Vector Double -> Vector Double
sampledSoftmax n x = cmap (\a -> exp a / s) x
where
s = V.sum . exp $ V.take n x
{-sampledSoftmax :: Vector Double -> Vector Double-}
{-sampledSoftmax x = cmap (\a -> exp a / s) x-}
{-where-}
{-s = V.sum . exp $ x-}
relu :: Vector Double -> Vector Double
relu = cmap (max 0.1)

View File

@ -16,19 +16,28 @@ module Sibe.Word2Vec
import Control.Monad
import System.Random
import Graphics.Rendering.Chart as Chart
import Graphics.Rendering.Chart.Backend.Cairo
import Control.Lens
data W2VMethod = SkipGram | CBOW
data Word2Vec = Word2Vec { docs :: [String]
, window :: Int
, dimensions :: Int
, method :: W2VMethod
, w2vChartName :: String
, w2vDrawChart :: Bool
}
instance Default Word2Vec where
def = Word2Vec { docs = []
, window = 2
, w2vChartName = "w2v.png"
, w2vDrawChart = False
}
word2vec w2v session = do
seed <- newStdGen
let s = session { training = trainingData
, network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, crossEntropy'))
}
@ -49,6 +58,26 @@ module Sibe.Word2Vec
-- run words through the hidden layer alone to get the word vector
let computedVocVec = map (\(w, v) -> (w, runLayer' v hidden)) vocvec
when (w2vDrawChart w2v) $ do
let mat = fromColumns . map snd $ computedVocVec
(u, s, v) = svd mat
cut = subMatrix (0, 0) (2, cols mat)
diagS = diagRect 0 (V.take 2 s) (rows mat) (cols mat)
twoDimensions = cut $ u <> diagS <> tr v
textData = zipWith (\s l -> (V.head l, V.last l, s)) (map fst computedVocVec) (toColumns twoDimensions)
chart = toRenderable layout
where
textP = plot_annotation_values .~ textData
$ def
layout = layout_title .~ "word vectors"
$ layout_plots .~ [toPlot textP]
$ def
renderableToFile def (w2vChartName w2v) chart
return ()
return (computedVocVec, vocvec)
where
-- clean documents