feat(rnn): recurrent neural networks, experimental
WIP: runs out of memory quickly
This commit is contained in:
parent
44f2ae372a
commit
728df02fbd
65
examples/recurrent.hs
Normal file
65
examples/recurrent.hs
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
|
||||||
|
module Main where
|
||||||
|
import Numeric.LinearAlgebra
|
||||||
|
import Numeric.Sibe.Recurrent
|
||||||
|
import Numeric.Sibe.Utils
|
||||||
|
import System.IO
|
||||||
|
import Data.Default.Class
|
||||||
|
import Data.List (genericLength)
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as BSL
|
||||||
|
|
||||||
|
main = do
|
||||||
|
texts <- lines <$> readFile "examples/reddit.csv"
|
||||||
|
let (vocabulary, indexes) = processData texts
|
||||||
|
|
||||||
|
let settings = def { wordD = length vocabulary }
|
||||||
|
r = randomRecurrent 0 settings
|
||||||
|
|
||||||
|
let x0 = reverse . drop 1 . reverse $ indexes !! 0
|
||||||
|
y0 = drop 1 $ indexes !! 0
|
||||||
|
|
||||||
|
print $ x0
|
||||||
|
print $ y0
|
||||||
|
|
||||||
|
let xs = map (reverse . drop 1 . reverse) indexes
|
||||||
|
ys = map (drop 1) indexes
|
||||||
|
|
||||||
|
let tov = fromList . map fromIntegral
|
||||||
|
let vys = map tov ys
|
||||||
|
|
||||||
|
let newr = sgd r (take 1 xs) (take 1 vys) 0.005 1
|
||||||
|
|
||||||
|
saveRecurrent "recurrent.trained" (show newr) 512
|
||||||
|
--writeFile "recurrent.trained" (show newr)
|
||||||
|
|
||||||
|
let newpredicted = predict newr x0
|
||||||
|
print $ y0
|
||||||
|
print $ newpredicted
|
||||||
|
|
||||||
|
print $ loss (tov y0) (tov newpredicted)
|
||||||
|
|
||||||
|
{-let (dU, dV, dW) = backprop r x0 (fromList $ map fromIntegral y0)-}
|
||||||
|
{-print $ seq u "u"-}
|
||||||
|
{-print $ seq v "v"-}
|
||||||
|
{-print $ seq w "w"-}
|
||||||
|
|
||||||
|
--print $ dW
|
||||||
|
print "done"
|
||||||
|
|
||||||
|
saveRecurrent :: FilePath -> String -> Int -> IO ()
|
||||||
|
saveRecurrent path str chunkSize = do
|
||||||
|
handle <- openFile path AppendMode
|
||||||
|
hSetBuffering handle NoBuffering
|
||||||
|
loop handle str
|
||||||
|
hClose handle
|
||||||
|
where
|
||||||
|
loop _ [] = return ()
|
||||||
|
loop handle s = do
|
||||||
|
hPutStr handle $ take chunkSize s
|
||||||
|
hFlush handle
|
||||||
|
putStr $ take chunkSize s
|
||||||
|
loop handle $ drop chunkSize s
|
||||||
|
|
20087
examples/reddit.csv
Normal file
20087
examples/reddit.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -31,7 +31,7 @@ module Main where
|
|||||||
setStdGen (mkStdGen 100)
|
setStdGen (mkStdGen 100)
|
||||||
sws <- lines <$> readFile "examples/stopwords"
|
sws <- lines <$> readFile "examples/stopwords"
|
||||||
|
|
||||||
-- real data, takes a lot of time to train
|
-- real data, currently faces a memory problem
|
||||||
{-ds <- do-}
|
{-ds <- do-}
|
||||||
{-files <- filter ((/= "xml") . take 1 . reverse) <$> listDirectory "examples/blogs-corpus/"-}
|
{-files <- filter ((/= "xml") . take 1 . reverse) <$> listDirectory "examples/blogs-corpus/"-}
|
||||||
{-contents <- mapM (rf . ("examples/blogs-corpus/" ++)) files-}
|
{-contents <- mapM (rf . ("examples/blogs-corpus/" ++)) files-}
|
||||||
|
29
sibe.cabal
29
sibe.cabal
@ -1,5 +1,5 @@
|
|||||||
name: sibe
|
name: sibe
|
||||||
version: 0.2.0.0
|
version: 0.2.0.1
|
||||||
synopsis: Machine Learning algorithms
|
synopsis: Machine Learning algorithms
|
||||||
description: Haskell Machine Learning
|
description: Haskell Machine Learning
|
||||||
homepage: https://github.com/mdibaiee/sibe
|
homepage: https://github.com/mdibaiee/sibe
|
||||||
@ -15,7 +15,13 @@ cabal-version: >=1.10
|
|||||||
|
|
||||||
library
|
library
|
||||||
hs-source-dirs: src
|
hs-source-dirs: src
|
||||||
exposed-modules: Numeric.Sibe, Numeric.Sibe.NaiveBayes, Numeric.Sibe.NLP, Numeric.Sibe.Word2Vec, Numeric.Sibe.Utils
|
exposed-modules: Numeric.Sibe,
|
||||||
|
Numeric.Sibe.NaiveBayes,
|
||||||
|
Numeric.Sibe.NLP,
|
||||||
|
Numeric.Sibe.Word2Vec,
|
||||||
|
Numeric.Sibe.Utils,
|
||||||
|
Numeric.Sibe.Recurrent
|
||||||
|
|
||||||
build-depends: base >= 4.7 && < 5
|
build-depends: base >= 4.7 && < 5
|
||||||
, hmatrix
|
, hmatrix
|
||||||
, random
|
, random
|
||||||
@ -29,8 +35,8 @@ library
|
|||||||
, vector
|
, vector
|
||||||
, random-shuffle
|
, random-shuffle
|
||||||
, data-default-class
|
, data-default-class
|
||||||
, Chart
|
, Chart >= 1.8 && < 2
|
||||||
, Chart-cairo
|
, Chart-cairo >= 1.8 && < 2
|
||||||
, lens
|
, lens
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
@ -58,6 +64,21 @@ executable example-word2vec
|
|||||||
, random
|
, random
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
|
executable example-recurrent
|
||||||
|
hs-source-dirs: examples
|
||||||
|
main-is: recurrent.hs
|
||||||
|
ghc-options: -threaded -rtsopts -with-rtsopts=-N -O2
|
||||||
|
build-depends: base
|
||||||
|
, sibe
|
||||||
|
, hmatrix
|
||||||
|
, data-default-class
|
||||||
|
, split
|
||||||
|
, vector
|
||||||
|
, directory
|
||||||
|
, random
|
||||||
|
, bytestring
|
||||||
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable example-424
|
executable example-424
|
||||||
hs-source-dirs: examples
|
hs-source-dirs: examples
|
||||||
main-is: 424encoder.hs
|
main-is: 424encoder.hs
|
||||||
|
@ -112,8 +112,8 @@ module Numeric.Sibe.NaiveBayes
|
|||||||
-- in realToFrac (tct * pg + 1) / realToFrac (cvoc + voc) -- uncomment to enable ngrams
|
-- in realToFrac (tct * pg + 1) / realToFrac (cvoc + voc) -- uncomment to enable ngrams
|
||||||
in realToFrac (tct + 1) / realToFrac (cvoc + voc)
|
in realToFrac (tct + 1) / realToFrac (cvoc + voc)
|
||||||
|
|
||||||
argmax :: (Ord a) => [a] -> Int
|
{-argmax :: (Ord a) => [a] -> Int-}
|
||||||
argmax x = fst $ maximumBy (\(_, a) (_, b) -> a `compare` b) (zip [0..] x)
|
{-argmax x = fst $ maximumBy (\(_, a) (_, b) -> a `compare` b) (zip [0..] x)-}
|
||||||
|
|
||||||
mean :: [Double] -> Double
|
mean :: [Double] -> Double
|
||||||
mean x = sum x / genericLength x
|
mean x = sum x / genericLength x
|
||||||
|
144
src/Numeric/Sibe/Recurrent.hs
Normal file
144
src/Numeric/Sibe/Recurrent.hs
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
{-# LANGUAGE GADTs #-}
|
||||||
|
{-# LANGUAGE BangPatterns #-}
|
||||||
|
{-# LANGUAGE DataKinds #-}
|
||||||
|
{-# LANGUAGE TypeOperators #-}
|
||||||
|
|
||||||
|
module Numeric.Sibe.Recurrent
|
||||||
|
( Recurrent (..)
|
||||||
|
, randomRecurrent
|
||||||
|
, processData
|
||||||
|
, forward
|
||||||
|
, predict
|
||||||
|
, loss
|
||||||
|
, backprop
|
||||||
|
, sgd
|
||||||
|
) where
|
||||||
|
import Numeric.LinearAlgebra
|
||||||
|
import System.Random
|
||||||
|
import System.Random.Shuffle
|
||||||
|
import Debug.Trace
|
||||||
|
import qualified Data.List as L
|
||||||
|
import Data.Maybe
|
||||||
|
import System.IO
|
||||||
|
import Control.DeepSeq
|
||||||
|
import Control.Monad
|
||||||
|
import qualified Data.Vector.Storable as V
|
||||||
|
import Data.Default.Class
|
||||||
|
|
||||||
|
import qualified Graphics.Rendering.Chart.Easy as Chart
|
||||||
|
import Graphics.Rendering.Chart.Backend.Cairo
|
||||||
|
import Numeric.Sibe.Utils
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
|
processData :: [String] -> ([(Int, String)], [[Int]])
|
||||||
|
processData x =
|
||||||
|
let setokens = map (\a -> " <start> " ++ a ++ " <end> ") x
|
||||||
|
tokenized = map tokenize setokens
|
||||||
|
vocabulary = zip [0..] (unique . concat $ tokenized)
|
||||||
|
indexes = map (\a -> fst . fromJust $ L.find ((==a) . snd) vocabulary)
|
||||||
|
in (vocabulary, map indexes tokenized)
|
||||||
|
|
||||||
|
data Recurrent = Recurrent { bpttThreshold :: Int
|
||||||
|
, wordD :: Int
|
||||||
|
, hiddenD :: Int
|
||||||
|
, u :: Matrix Double
|
||||||
|
, v :: Matrix Double
|
||||||
|
, w :: Matrix Double
|
||||||
|
} deriving (Show, Read)
|
||||||
|
instance Default Recurrent where
|
||||||
|
def = Recurrent { bpttThreshold = 3
|
||||||
|
, hiddenD = 100
|
||||||
|
}
|
||||||
|
|
||||||
|
randomRecurrent :: Seed -> Recurrent -> Recurrent
|
||||||
|
randomRecurrent seed r = r { u = randomMatrix (wordD r, hiddenD r) (bounds $ wordD r)
|
||||||
|
, v = randomMatrix (hiddenD r, wordD r) (bounds $ hiddenD r)
|
||||||
|
, w = randomMatrix (hiddenD r, hiddenD r) (bounds $ hiddenD r)
|
||||||
|
}
|
||||||
|
where
|
||||||
|
randomMatrix (wr, wc) (l, u) = uniformSample (seed + wr + wc) wr $ replicate wc (l, u)
|
||||||
|
bounds x = (negate . sqrt $ 1 / fromIntegral x, sqrt $ 1 / fromIntegral x)
|
||||||
|
|
||||||
|
|
||||||
|
forward :: Recurrent -> [Int] -> (Matrix Double, Matrix Double)
|
||||||
|
forward r input =
|
||||||
|
let (h, o) = helper [vector (replicate (hiddenD r) 0)] [] input
|
||||||
|
in (fromRows h, fromRows o)
|
||||||
|
where
|
||||||
|
helper hs os [] = (hs, os)
|
||||||
|
helper (h:hs) os (i:is) =
|
||||||
|
let k = w r #> h
|
||||||
|
newh = V.map tanh $ (u r ! i) + k
|
||||||
|
o = softmax $ newh <# v r
|
||||||
|
in helper (newh:h:hs) (o:os) is
|
||||||
|
|
||||||
|
predict :: Recurrent -> [Int] -> [Int]
|
||||||
|
predict r i =
|
||||||
|
let (_, o) = forward r i
|
||||||
|
in map argmax (toLists o)
|
||||||
|
|
||||||
|
backprop :: Recurrent -> [Int] -> Vector Double -> (Matrix Double, Matrix Double, Matrix Double)
|
||||||
|
backprop r input y =
|
||||||
|
let dU = zero (u r)
|
||||||
|
dV = zero (v r)
|
||||||
|
dW = zero (w r)
|
||||||
|
in bp dU dV dW (zip [0..] input)
|
||||||
|
where
|
||||||
|
(hs, os) = forward r input
|
||||||
|
-- delta
|
||||||
|
dO = fromColumns $ zipWith (\i o -> if i `V.elem` y then o - 1 else o) [0..] (toColumns os)
|
||||||
|
|
||||||
|
bp dU dV dW [] = (dU, dV, dW)
|
||||||
|
bp dU dV dW ((i,x):xs) =
|
||||||
|
let ndV = dV + (hs ! i) `outer` (dO ! i)
|
||||||
|
dT = (v r) #> (dO ! i) -- * (1 - (hs ! i)^2)
|
||||||
|
threshold = bpttThreshold r
|
||||||
|
(ndU, ndW) = tt dU dW dT [max 0 (i-threshold)..i]
|
||||||
|
in bp ndU ndV ndW xs
|
||||||
|
where
|
||||||
|
tt dU dW dT [] = (dU, dW)
|
||||||
|
tt dU dW dT (c:cs) =
|
||||||
|
let ndW = dW + (dT `outer` (hs ! (max 0 $ c - 1)))
|
||||||
|
zdT = vector $ replicate (V.length dT) 0
|
||||||
|
mdT = fromRows $ replicate (max 0 $ c - 1) zdT ++ [dT] ++ replicate (min (rows dU - 1) $ rows dU - c) zdT
|
||||||
|
ndU = dU + mdT
|
||||||
|
ndT = (w r) #> dT
|
||||||
|
in tt ndU ndW ndT cs
|
||||||
|
|
||||||
|
zero m = ((rows m)><(cols m)) $ repeat 0
|
||||||
|
|
||||||
|
{-gradientCheck :: Recurrent -> [Int] -> Vector Double -> Double-}
|
||||||
|
|
||||||
|
sgdStep :: Recurrent -> [Int] -> Vector Double -> Double -> Recurrent
|
||||||
|
sgdStep r input y learningRate =
|
||||||
|
let (dU, dV, dW) = backprop r input y
|
||||||
|
in r { u = (u r) - scale learningRate dU
|
||||||
|
, v = (v r) - scale learningRate dV
|
||||||
|
, w = (w r) - scale learningRate dW
|
||||||
|
}
|
||||||
|
|
||||||
|
sgd :: Recurrent -> [[Int]] -> [Vector Double] -> Double -> Int -> Recurrent
|
||||||
|
sgd r input y learningRate epochs = run [0..epochs] r
|
||||||
|
where
|
||||||
|
run [] r = r
|
||||||
|
run (i:is) r = run is $ train (zip input y) r
|
||||||
|
|
||||||
|
train [] r = r
|
||||||
|
train ((x, y):xs) r = train xs $ sgdStep r x y learningRate
|
||||||
|
|
||||||
|
softmax :: Vector Double -> Vector Double
|
||||||
|
softmax x = cmap (\a -> exp a / s) x
|
||||||
|
where
|
||||||
|
s = V.sum $ exp x
|
||||||
|
|
||||||
|
softmax' :: Vector Double -> Vector Double
|
||||||
|
softmax' = cmap (\a -> sig a * (1 - sig a))
|
||||||
|
where
|
||||||
|
sig x = 1 / max (1 + exp (-x)) 1e-10
|
||||||
|
|
||||||
|
-- cross-entropy
|
||||||
|
loss :: Vector Double -> Vector Double -> Double
|
||||||
|
loss ys os = (-1 / fromIntegral (V.length os)) * V.sum (V.zipWith f os ys)
|
||||||
|
where
|
||||||
|
f a y = y * log (max 1e-10 a)
|
||||||
|
|
@ -4,10 +4,19 @@ module Numeric.Sibe.Utils
|
|||||||
, onehot
|
, onehot
|
||||||
, average
|
, average
|
||||||
, pca
|
, pca
|
||||||
|
, tokenize
|
||||||
|
, frequency
|
||||||
|
, unique
|
||||||
|
, argmax
|
||||||
|
, shape
|
||||||
) where
|
) where
|
||||||
import qualified Data.Vector.Storable as V
|
import qualified Data.Vector.Storable as V
|
||||||
import qualified Data.Set as Set
|
import qualified Data.Set as Set
|
||||||
import Numeric.LinearAlgebra
|
import Numeric.LinearAlgebra
|
||||||
|
import Data.List.Split
|
||||||
|
import Data.Char (isSpace, isNumber, toLower)
|
||||||
|
import Control.Arrow ((&&&))
|
||||||
|
import Data.List
|
||||||
|
|
||||||
similarity :: Vector Double -> Vector Double -> Double
|
similarity :: Vector Double -> Vector Double -> Double
|
||||||
similarity a b = (V.sum $ a * b) / (magnitude a * magnitude b)
|
similarity a b = (V.sum $ a * b) / (magnitude a * magnitude b)
|
||||||
@ -24,6 +33,8 @@ module Numeric.Sibe.Utils
|
|||||||
go _ [] = []
|
go _ [] = []
|
||||||
go s (x:xs) = if x `Set.member` s then go s xs
|
go s (x:xs) = if x `Set.member` s then go s xs
|
||||||
else x : go (Set.insert x s) xs
|
else x : go (Set.insert x s) xs
|
||||||
|
unique :: (Ord a) => [a] -> [a]
|
||||||
|
unique = ordNub
|
||||||
|
|
||||||
average :: Vector Double -> Vector Double
|
average :: Vector Double -> Vector Double
|
||||||
average v = cmap (/ (V.sum v)) v
|
average v = cmap (/ (V.sum v)) v
|
||||||
@ -39,3 +50,27 @@ module Numeric.Sibe.Utils
|
|||||||
diagS = diagRect 0 s (rows mat) (cols mat)
|
diagS = diagRect 0 s (rows mat) (cols mat)
|
||||||
|
|
||||||
in u ?? (All, Take d) <> diagS ?? (Take d, Take d)
|
in u ?? (All, Take d) <> diagS ?? (Take d, Take d)
|
||||||
|
|
||||||
|
tokenize :: String -> [String]
|
||||||
|
tokenize str =
|
||||||
|
let spaced = spacify str
|
||||||
|
ws = words spaced
|
||||||
|
in ws
|
||||||
|
where
|
||||||
|
puncs = ['!', '"', '#', '$', '%', '(', ')', '.', '?', ',', '\'', '/', '-']
|
||||||
|
replace needle replacement =
|
||||||
|
concatMap (\c -> if c == needle then replacement else c)
|
||||||
|
spacify = foldl (\acc c -> if c `elem` puncs then acc ++ [' ', c, ' '] else acc ++ [c]) ""
|
||||||
|
|
||||||
|
frequency :: (Ord a) => [a] -> [(a, Int)]
|
||||||
|
frequency = map (head &&& length) . group . sort
|
||||||
|
|
||||||
|
argmax :: (Foldable t, Num a, Fractional a, Ord a) => t a -> Int
|
||||||
|
argmax v = snd $ foldl mx ((-1/0), 0) v
|
||||||
|
where
|
||||||
|
mx (a, i) b
|
||||||
|
| b > a = (b, i + 1)
|
||||||
|
| otherwise = (a, i)
|
||||||
|
|
||||||
|
shape :: Matrix a -> (Int, Int)
|
||||||
|
shape x = (rows x, cols x)
|
||||||
|
Loading…
Reference in New Issue
Block a user