feat(notmnist): notmnist example using SGD + learning rate decay

This commit is contained in:
Mahdi Dibaiee
2016-09-10 00:36:15 +04:30
parent ace0a18653
commit b26347e19f
21 changed files with 619 additions and 320 deletions

49
examples/424encoder.hs Normal file
View File

@ -0,0 +1,49 @@
module Main where
import Sibe
import Numeric.LinearAlgebra
import Data.List
import Debug.Trace
import Data.Default.Class
main = do
let alpha = 0.5
epochs = 1000
a = (sigmoid, sigmoid')
rnetwork = randomNetwork 0 (-0.1, 0.1) 4 [(2, a)] (4, a)
inputs = [vector [1, 0, 0, 0],
vector [0, 1, 0, 0],
vector [0, 0, 1, 0],
vector [0, 0, 0, 1]]
labels = [vector [1, 0, 0, 0],
vector [0, 1, 0, 0],
vector [0, 0, 1, 0],
vector [0, 0, 0, 1]]
session = def { network = rnetwork
, learningRate = 0.5
, epochs = 1000
, training = zip inputs labels
, test = zip inputs labels
} :: Session
let initialCost = crossEntropy session
newsession <- run gd session
let results = map (`forward` newsession) inputs
rounded = map (map round . toList) results
cost = crossEntropy newsession
putStrLn "parameters: "
putStrLn $ "- inputs: " ++ show inputs
putStrLn $ "- labels: " ++ show labels
putStrLn $ "- learning rate: " ++ show alpha
putStrLn $ "- epochs: " ++ show epochs
putStrLn $ "- initial cost (cross-entropy): " ++ show initialCost
putStrLn "results: "
putStrLn $ "- actual result: " ++ show results
putStrLn $ "- rounded result: " ++ show rounded
putStrLn $ "- cost (cross-entropy): " ++ show cost

View File

@ -1,6 +1,7 @@
module Main
where
-- import Sibe
import Sibe.NLP
import Sibe.NaiveBayes
import Text.Printf
import Data.List
@ -28,14 +29,14 @@ module Main
documents = cleanDocuments . removeWords sws $ createDocuments classes dataset
testDocuments = cleanDocuments $ createDocuments classes test
nb = train documents intClasses
nb = initialize documents intClasses
-- top-ten
topClasses = take 10 . reverse $ sortBy (compare `on` (length . snd)) (cd nb)
filtered = map (\(c, ds) -> (c, take 100 ds)) topClasses
filteredClasses = map fst filtered
ttDocs = concatMap snd filtered
ttNB = train ttDocs filteredClasses
ttNB = initialize ttDocs filteredClasses
ttTestDocuments = filter ((`elem` filteredClasses) . c) . cleanDocuments $ createDocuments classes test

View File

@ -1,54 +0,0 @@
{-# LANGUAGE BangPatterns #-}
module Main
where
-- import Sibe
import Sibe.NaiveBayes
import Text.Printf
import Data.List
import Data.Maybe
import Debug.Trace
import Data.List.Split
import System.Directory
import Control.DeepSeq
import System.IO
main = do
putStr "Reading documents... "
neg_documents <- createDocuments "examples/sentiment-analysis-data/train/neg/"
pos_documents <- createDocuments "examples/sentiment-analysis-data/train/pos/"
test_neg_documents <- createDocuments "examples/sentiment-analysis-data/test/neg/"
test_pos_documents <- createDocuments "examples/sentiment-analysis-data/test/pos/"
putStrLn "done"
let classes = [0..9] -- rating, from 0 to 9 (1 to 10)
documents = neg_documents ++ pos_documents
nb = train documents classes
testDocuments = neg_documents ++ pos_documents
results = map (\(Document text c) -> (c, run text nb)) testDocuments
-- results = map (\(Document text c) -> (c, determine text nb intClasses documents)) devTestDocuments
print results
-- let showResults (c, r) = putStrLn (show (classes !! c) ++ " ~ " ++ show (classes !! r))
-- mapM_ showResults results
--
-- putStrLn $ "Recall: " ++ show (recall results)
-- putStrLn $ "Precision: " ++ show (precision results)
-- putStrLn $ "F Measure: " ++ show (fmeasure results)
-- putStrLn $ "Accuracy: " ++ show (accuracy results)
createDocuments :: FilePath -> IO [Document]
createDocuments path = do
files <- drop 2 <$> getDirectoryContents path
let ratings = map (subtract 1 . read . take 1 . last . splitOn "_") files :: [Int]
contents <- mapM (forceReadFile . (path ++)) files
return $ zipWith Document contents ratings
forceReadFile :: FilePath -> IO String
forceReadFile file = do
handle <- openFile file ReadMode
content <- hGetContents handle
content `deepseq` hClose handle
return content

1
examples/notMNIST Submodule

Submodule examples/notMNIST added at 0dbdfd43ff

111
examples/notmnist.hs Normal file
View File

@ -0,0 +1,111 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Main where
import Sibe
import Numeric.LinearAlgebra
import Data.List
import Debug.Trace
import System.IO
import System.Directory
import Codec.Picture
import Codec.Picture.Types
import qualified Data.Vector.Storable as V
import Data.Either
import System.Random
import System.Random.Shuffle
import Data.Default.Class
import qualified Graphics.Rendering.Chart.Easy as Chart
import Graphics.Rendering.Chart.Backend.Cairo
main = do
setStdGen (mkStdGen 100)
let a = (sigmoid, sigmoid')
o = (softmax, one)
rnetwork = randomNetwork 0 (-1, 1) (28*28) [(100, a)] (10, a)
(inputs, labels) <- dataset
let trp = length inputs * 70 `div` 100
tep = length inputs * 30 `div` 100
-- training data
trinputs = take trp inputs
trlabels = take trp labels
-- test data
teinputs = take tep . drop trp $ inputs
telabels = take tep . drop trp $ labels
let session = def { learningRate = 0.5
, batchSize = 32
, epochs = 35
, network = rnetwork
, training = zip trinputs trlabels
, test = zip teinputs telabels
} :: Session
let initialCost = crossEntropy session
newsession <- run (sgd . learningRateDecay (1.1, 5e-2)) session
let el = map (\(e, l, _) -> (e, l)) (chart newsession)
ea = map (\(e, _, a) -> (e, a)) (chart newsession)
toFile Chart.def "notmnist.png" $ do
Chart.layoutlr_title Chart..= "loss over time"
Chart.plotLeft (Chart.line "loss" [el])
Chart.plotRight (Chart.line "learningRate" [ea])
let cost = crossEntropy newsession
putStrLn "parameters: "
putStrLn $ "- batch size: " ++ show (batchSize session)
putStrLn $ "- learning rate: " ++ show (learningRate session)
putStrLn $ "- epochs: " ++ show (epochs session)
putStrLn $ "- initial cost (cross-entropy): " ++ show initialCost
putStrLn "results: "
putStrLn $ "- accuracy: " ++ show (accuracy newsession)
putStrLn $ "- cost (cross-entropy): " ++ show cost
dataset :: IO ([Vector Double], [Vector Double])
dataset = do
let dir = "examples/notMNIST/"
groups <- filter ((/= '.') . head) <$> listDirectory dir
inputFiles <- mapM (listDirectory . (dir ++)) groups
let n = 512 {-- minimum (map length inputFiles) --}
numbers = map (`div` n) [0..n * length groups - 1]
inputFilesFull = map (\(i, g) -> map ((dir ++ i ++ "/") ++) g) (zip groups inputFiles)
inputImages <- mapM (mapM readImage . take n) inputFilesFull
let names = map (take n) inputFilesFull
let (l, r) = partitionEithers $ concat inputImages
inputs = map (fromPixels . convertRGB8) r
labels = map (\i -> V.replicate i 0 `V.snoc` 1 V.++ V.replicate (9 - i) 0) numbers
pairs = zip inputs labels
shuffled <- shuffleM pairs
return (map fst shuffled, map snd shuffled)
where
fromPixels :: Image PixelRGB8 -> Vector Double
fromPixels img@Image { .. } =
let pairs = [(x, y) | x <- [0..imageWidth - 1], y <- [0..imageHeight - 1]]
in V.fromList $ map iter pairs
where
iter (x, y) =
let (PixelRGB8 r g b) = convertPixel $ pixelAt img x y
in
if r == 0 && g == 0 && b == 0 then 0 else 1

View File

View File

@ -1,5 +0,0 @@
XSym
0040
3666c4cacaf995ebd11ef25aab70de99
../../sibe-repos/sentiment-analysis-data

35
examples/sin.hs Normal file
View File

@ -0,0 +1,35 @@
module Main where
import Sibe
import Numeric.LinearAlgebra
import Data.List
import Debug.Trace
main = do
let alpha = 0.5
epochs = 1000
a = (sigmoid, sigmoid')
lo = (sigmoid, (\_ -> 1)) -- cross entropy
-- a = (relu, relu')
rnetwork = randomNetwork 0 (-1, 1) 1 [(50, a)] (1, lo)
inputs = map (\a -> vector [a]) (reverse [0, 30, 45, 60, 90])
labels = map (\deg -> vector $ [sin $ deg * pi/180]) (reverse [0, 30, 45, 60, 90])
initial_cost = zipWith crossEntropy (map (`forward` rnetwork) inputs) labels
network <- run session inputs rnetwork labels alpha epochs
let results = map (`forward` network) inputs
rounded = map (map round . toList) results
cost = zipWith crossEntropy (map (`forward` network) inputs) labels
putStrLn "parameters: "
putStrLn $ "- inputs: " ++ show inputs
putStrLn $ "- labels: " ++ show labels
putStrLn $ "- learning rate: " ++ show alpha
putStrLn $ "- epochs: " ++ show epochs
{-putStrLn $ "- initial cost (cross-entropy): " ++ show initial_cost-}
putStrLn "results: "
putStrLn $ "- actual result: " ++ show results
{-putStrLn $ "- cost (cross-entropy): " ++ show cost-}

View File

@ -3,30 +3,37 @@ module Main where
import Numeric.LinearAlgebra
import Data.List
import Debug.Trace
import Data.Default.Class
main = do
let learning_rate = 0.5
(iterations, epochs) = (2, 1000)
a = (sigmoid, sigmoid')
rnetwork = randomNetwork 0 2 [(8, a)] (1, a) -- two inputs, 8 nodes in a single hidden layer, 1 output
let a = (sigmoid, sigmoid')
rnetwork = randomNetwork 0 (-1, 1) 2 [(2, a)] (1, a) -- two inputs, 8 nodes in a single hidden layer, 1 output
inputs = [vector [0, 1], vector [1, 0], vector [1, 1], vector [0, 0]]
labels = [vector [1], vector [1], vector [0], vector [0]]
initial_cost = zipWith crossEntropy (map (`forward` rnetwork) inputs) labels
session = def { network = rnetwork
, learningRate = 0.5
, epochs = 1000
, training = zip inputs labels
, test = zip inputs labels
} :: Session
network = session inputs rnetwork labels learning_rate (iterations, epochs)
results = map (`forward` network) inputs
initialCost = crossEntropy session
newsession <- run gd session
let results = map (`forward` newsession) inputs
rounded = map (map round . toList) results
cost = zipWith crossEntropy (map (`forward` network) inputs) labels
cost = crossEntropy newsession
putStrLn "parameters: "
putStrLn $ "- inputs: " ++ show inputs
putStrLn $ "- labels: " ++ show labels
putStrLn $ "- learning rate: " ++ show learning_rate
putStrLn $ "- iterations/epochs: " ++ show (iterations, epochs)
putStrLn $ "- initial cost (cross-entropy): " ++ show initial_cost
putStrLn $ "- learning rate: " ++ show (learningRate session)
putStrLn $ "- epochs: " ++ show (epochs session)
putStrLn $ "- initial cost (cross-entropy): " ++ show initialCost
putStrLn "results: "
putStrLn $ "- actual result: " ++ show results
putStrLn $ "- rounded result: " ++ show rounded