rm(sin): remove sin example

fix(ignoreBiases): was ignoring nodes, lol
fix(w2v): better logging and implementation
This commit is contained in:
Mahdi Dibaiee 2016-09-16 13:31:23 +04:30
parent c0083f5c05
commit d4ac90bbd5
6 changed files with 1119 additions and 146 deletions

View File

@ -1,35 +0,0 @@
module Main where
import Sibe
import Numeric.LinearAlgebra
import Data.List
import Debug.Trace
main = do
let alpha = 0.5
epochs = 1000
a = (sigmoid, sigmoid')
lo = (sigmoid, (\_ -> 1)) -- cross entropy
-- a = (relu, relu')
rnetwork = randomNetwork 0 (-1, 1) 1 [(50, a)] (1, lo)
inputs = map (\a -> vector [a]) (reverse [0, 30, 45, 60, 90])
labels = map (\deg -> vector $ [sin $ deg * pi/180]) (reverse [0, 30, 45, 60, 90])
initial_cost = zipWith crossEntropy (map (`forward` rnetwork) inputs) labels
network <- run session inputs rnetwork labels alpha epochs
let results = map (`forward` network) inputs
rounded = map (map round . toList) results
cost = zipWith crossEntropy (map (`forward` network) inputs) labels
putStrLn "parameters: "
putStrLn $ "- inputs: " ++ show inputs
putStrLn $ "- labels: " ++ show labels
putStrLn $ "- learning rate: " ++ show alpha
putStrLn $ "- epochs: " ++ show epochs
{-putStrLn $ "- initial cost (cross-entropy): " ++ show initial_cost-}
putStrLn "results: "
putStrLn $ "- actual result: " ++ show results
{-putStrLn $ "- cost (cross-entropy): " ++ show cost-}

View File

@ -15,20 +15,43 @@ module Main where
import Data.List.Split import Data.List.Split
main = do main = do
{-ds <- do sws <- lines <$> readFile "examples/stopwords"
content <- readFile "examples/doc-classifier-data/data-reuters" {-ds <- do-}
let splitted = splitOn (replicate 10 '-' ++ "\n") content {-content <- readFile "examples/doc-classifier-data/data-reuters"-}
d = concatMap (tail . lines) (take 100 splitted) {-let splitted = splitOn (replicate 10 '-' ++ "\n") content-}
return d-} {-d = concatMap (tail . lines) (take 100 splitted)-}
let ds = ["I like deep learning", "I like NLP", "I enjoy flying"] {-return $ removeWords sws d-}
--let ds = ["I like deep learning", "I like NLP", "I enjoy flying"]
let ds = ["the king loves the queen", "the queen loves the king",
"the dwarf hates the king", "the queen hates the dwarf",
"the dwarf poisons the king", "the dwarf poisons the queen"]
let session = def { learningRate = 0.8 let session = def { learningRate = 0.1
, batchSize = 10 , batchSize = 16
, epochs = 1000 , epochs = 100
} :: Session } :: Session
w2v = def { docs = ds }:: Word2Vec w2v = def { docs = ds
, dimensions = 50
, method = SkipGram
, window = 3
} :: Word2Vec
r <- word2vec w2v session (computed, vocvec) <- word2vec w2v session
{-print r-}
mapM_ (\(w, v) -> do
putStr $ w ++ ": "
let similarities = map (similarity v . snd) computed
let sorted = sortBy (compare `on` similarity v . snd) computed
print . take 2 . drop 1 . reverse $ map fst sorted
) computed
return () return ()
removeWords :: [String] -> [String] -> [String]
removeWords ws documents =
map (rm ws) documents
where
rm list text =
unwords $ filter (`notElem` list) (words text)

1012
log Normal file

File diff suppressed because it is too large Load Diff

View File

@ -33,15 +33,6 @@ library
, Chart-cairo , Chart-cairo
default-language: Haskell2010 default-language: Haskell2010
--executable sibe-exe
--hs-source-dirs: app
--main-is: Main.hs
--ghc-options: -threaded -rtsopts -with-rtsopts=-N
--build-depends: base
--, sibe
--, hmatrix
--default-language: Haskell2010
executable example-xor executable example-xor
hs-source-dirs: examples hs-source-dirs: examples
main-is: xor.hs main-is: xor.hs
@ -64,15 +55,6 @@ executable example-word2vec
, vector , vector
default-language: Haskell2010 default-language: Haskell2010
--executable example-sin
--hs-source-dirs: examples
--main-is: sin.hs
--ghc-options: -threaded -rtsopts -with-rtsopts=-N
--build-depends: base
--, sibe
--, hmatrix
--default-language: Haskell2010
executable example-424 executable example-424
hs-source-dirs: examples hs-source-dirs: examples
main-is: 424encoder.hs main-is: 424encoder.hs

View File

@ -10,7 +10,9 @@ module Sibe
Output, Output,
Activation, Activation,
forward, forward,
forward',
runLayer, runLayer,
runLayer',
randomLayer, randomLayer,
randomNetwork, randomNetwork,
buildNetwork, buildNetwork,
@ -84,7 +86,6 @@ module Sibe
, batchSize :: Int , batchSize :: Int
, chart :: [(Int, Double, Double)] , chart :: [(Int, Double, Double)]
, momentum :: Double , momentum :: Double
, biases :: Bool
} deriving (Show) } deriving (Show)
emptyNetwork = randomNetwork 0 (0, 0) 0 [] (0, (id, id)) emptyNetwork = randomNetwork 0 (0, 0) 0 [] (0, (id, id))
@ -98,7 +99,6 @@ module Sibe
, batchSize = 0 , batchSize = 0
, chart = [] , chart = []
, momentum = 0 , momentum = 0
, biases = True
} }
saveNetwork :: Network -> String -> IO () saveNetwork :: Network -> String -> IO ()
@ -133,6 +133,12 @@ module Sibe
compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l
compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer input l) n compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer input l) n
forward' :: Input -> Session -> Output
forward' input session = compute input (network session)
where
compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer' input l
compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer' input l) n
randomLayer :: Seed -> (Int, Int) -> (Double, Double) -> Activation -> Layer randomLayer :: Seed -> (Int, Int) -> (Double, Double) -> Activation -> Layer
randomLayer seed (wr, wc) (l, u) = randomLayer seed (wr, wc) (l, u) =
let weights = uniformSample seed wr $ replicate wc (l, u) let weights = uniformSample seed wr $ replicate wc (l, u)
@ -209,14 +215,12 @@ module Sibe
o = fn y o = fn y
delta = o - target delta = o - target
de = delta * fn' y de = delta * fn' y
-- de = delta / fromIntegral (V.length o) -- cross entropy cost
biases' = biases - scale alpha de biases' = biases - scale alpha de
weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
layer = Layer biases' weights' (fn, fn') -- updated layer layer = Layer biases' weights' (fn, fn') -- updated layer
pass = weights #> de pass = weights #> de
-- pass = weights #> de
in (O layer, pass) in (O layer, pass)
run input (l@(Layer biases weights (fn, fn')) :- n) = run input (l@(Layer biases weights (fn, fn')) :- n) =
@ -226,12 +230,11 @@ module Sibe
de = delta * fn' y de = delta * fn' y
biases' = biases - cmap (*alpha) de biases' = biases - scale alpha de
weights' = weights - cmap (*alpha) (input `outer` de) weights' = weights - scale alpha (input `outer` de)
layer = Layer biases' weights' (fn, fn') layer = Layer biases' weights' (fn, fn')
pass = weights #> de pass = weights #> de
-- pass = weights #> de
in (layer :- n', pass) in (layer :- n', pass)
gd :: Session -> IO Session gd :: Session -> IO Session
@ -280,8 +283,6 @@ module Sibe
let el = map (\(e, l, _) -> (e, l)) (chart session) let el = map (\(e, l, _) -> (e, l)) (chart session)
ea = map (\(e, _, a) -> (e, a)) (chart session) ea = map (\(e, _, a) -> (e, a)) (chart session)
putStrLn $ (show $ epoch session) ++ " => " ++ (show cost) ++ " @ " ++ (show $ learningRate session)
toFile Chart.def "sgd.png" $ do toFile Chart.def "sgd.png" $ do
Chart.layoutlr_title Chart..= "loss over time" Chart.layoutlr_title Chart..= "loss over time"
Chart.plotLeft (Chart.line "loss" [el]) Chart.plotLeft (Chart.line "loss" [el])
@ -312,8 +313,8 @@ module Sibe
ignoreBiases session = ignoreBiases session =
session { network = rmbias (network session) } session { network = rmbias (network session) }
where where
rmbias (O (Layer nodes biases a)) = O $ Layer nodes (biases * 0) a rmbias (O (Layer biases nodes a)) = O $ Layer (biases * 0) nodes a
rmbias ((Layer nodes biases a) :- n) = Layer nodes (biases * 0) a :- rmbias n rmbias ((Layer biases nodes a) :- n) = Layer (biases * 0) nodes a :- rmbias n
run :: (Session -> IO Session) run :: (Session -> IO Session)
-> Session -> IO Session -> Session -> IO Session

View File

@ -1,9 +1,9 @@
module Sibe.Word2Vec module Sibe.Word2Vec
(word2vec, ( word2vec
Word2Vec (..) , Word2Vec (..)
, W2VMethod (..)
) where ) where
import Sibe import Sibe
import Sibe.NLP
import Sibe.Utils import Sibe.Utils
import Debug.Trace import Debug.Trace
import Data.Char import Data.Char
@ -14,8 +14,11 @@ module Sibe.Word2Vec
import Data.Default.Class import Data.Default.Class
import Data.Function (on) import Data.Function (on)
data W2VMethod = SkipGram | CBOW
data Word2Vec = Word2Vec { docs :: [String] data Word2Vec = Word2Vec { docs :: [String]
, window :: Int , window :: Int
, dimensions :: Int
, method :: W2VMethod
} }
instance Default Word2Vec where instance Default Word2Vec where
def = Word2Vec { docs = [] def = Word2Vec { docs = []
@ -23,83 +26,70 @@ module Sibe.Word2Vec
} }
word2vec w2v session = do word2vec w2v session = do
return trainingData
let s = session { training = trainingData let s = session { training = trainingData
, network = buildNetwork 0 (-1, 1) v [(v, 25, (id, one))] (20, v, (softmax, crossEntropy')) , network = randomNetwork 0 (-1, 1) v [(dimensions w2v, (id, one))] (v, (softmax, one))
, biases = False
} }
print trainingData
newses <- run (gd . learningRateDecay (1.1, 0.1)) s
putStr "vocabulary size: "
print v
putStr "trainingData length: "
print . length $ trainingData
-- biases are not used in skipgram/cbow
newses <- run (sgd . ignoreBiases) s
-- export the hidden layer
let (hidden@(Layer biases nodes _) :- _) = network newses let (hidden@(Layer biases nodes _) :- _) = network newses
{-let computedVocVec = map (\(w, v) -> (w, forward v newses)) vocvec-} -- run words through the hidden layer alone to get the word vector
print biases let computedVocVec = map (\(w, v) -> (w, runLayer' v hidden)) vocvec
let computedVocVec = map (\(w, v) -> (w, v <# nodes)) vocvec
{-print computedVocVec-}
{-mapM_ (\(w, v) -> do return (computedVocVec, vocvec)
putStr $ w ++ ": "
let similarities = map (similarity v . snd) computedVocVec
let sorted = sortBy (compare `on` similarity v . snd) computedVocVec
{-print $ zip (map fst sorted) similarities-}
print . take 2 . drop 1 . reverse $ map fst sorted
) computedVocVec-}
return newses
where where
ws = words (concatMap ((++ " <start> ") . map toLower) (docs w2v)) -- clean documents
ds = map cleanText (docs w2v)
-- words of each document
wd = map (words . (++ " ") . (map toLower)) ds
-- all words together, used to generate the vocabulary
ws = words (concatMap ((++ " ") . map toLower) ds)
vocabulary = ordNub ws vocabulary = ordNub ws
v = length vocabulary v = length vocabulary
cooccurence = foldl' iter [] (zip [0..] ws) -- generate one-hot vectors for each word of vocabulary
where
iter acc (i, w) =
let a = findIndex ((== w) . fst) acc
before = take (window w2v) . drop (i - window w2v) $ ws
after = take (window w2v) . drop (i + 1) $ ws
ns = if i == 0 then after else before ++ after
in
if isJust a then
let idx = fromJust a
new = foldl (\acc n -> add acc n) (snd $ acc !! idx) ns
in take idx acc ++ [(w, new)] ++ drop (idx + 1) acc
else
acc ++ [(w, map (\n -> (n, 1)) ns)]
add [] n = [(n, 1)]
add ((hw, hc):hs) n
| n == hw = (hw, hc + 1):hs
| otherwise = (hw, hc):add hs n
vocvec = zip vocabulary $ map (onehot v) [0..v - 1] vocvec = zip vocabulary $ map (onehot v) [0..v - 1]
{-trainingData = map iter cooccurence
where
iter (w, targets) =
let ts = map (\(w, c) -> c * (snd . fromJust $ find ((== w) . fst) vocvec)) targets
folded = foldl (+) (vector $ replicate v 0) ts
input = snd . fromJust $ find ((== w) . fst) vocvec
in (input, folded)-}
trainingData = map iter $ zip [window w2v..length vocvec - window w2v] vocvec
where
iter (i, (w, v)) =
let before = take (window w2v) . drop (i - window w2v) $ vocvec
after = take (window w2v) . drop (i + 1) $ vocvec
ns = map snd $ before ++ after
new = foldl1 (+) ns
in (v, new)
add [] n = [(n, 1)] -- training data: generate input and output pairs for each word and the words in it's window
add ((hw, hc):hs) n trainingData = concatMap (\wds -> concatMap (iter wds) $ zip [0..] wds) wd
| n == hw = (hw, hc + 1):hs
| otherwise = (hw, hc):add hs n
wordfrequency = foldl' iter [] ws
where where
iter acc w = iter wds (i, w) =
let i = findIndex ((== w) . fst) acc let v = snd . fromJust . find ((==w) . fst) $ vocvec
before = take (window w2v) . drop (i - window w2v) $ wds
after = take (window w2v) . drop (i + 1) $ wds
ns
| i == 0 = after
| i == length vocvec - 1 = before
| otherwise = before ++ after
vectorized = map (\w -> snd . fromJust $ find ((== w) . fst) vocvec) ns
new = foldl1 (+) vectorized
in in
if isJust i then case method w2v of
let idx = fromJust i SkipGram -> zip (repeat v) vectorized
in take idx acc ++ [(w, snd (acc !! idx) + 1)] ++ drop (idx + 1) acc CBOW -> zip vectorized (repeat v)
else _ -> error "unsupported word2vec method"
acc ++ [(w, 1)]
cleanText :: String -> String
cleanText string =
let puncs = filter (`notElem` ['!', '"', '#', '$', '%', '(', ')', '.', '?', '\'']) (trim string)
spacify = foldl (\acc x -> replace x ' ' acc) puncs [',', '/', '-', '\n', '\r']
nonumber = filter (not . isNumber) spacify
lower = map toLower nonumber
in (unwords . words) lower -- remove unnecessary spaces
where
trim = f . f
where
f = reverse . dropWhile isSpace
replace needle replacement =
map (\c -> if c == needle then replacement else c)