feat(notmnist): notmnist example using SGD + learning rate decay
This commit is contained in:
		
							
								
								
									
										3
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							@@ -1,3 +1,6 @@
 | 
				
			|||||||
[submodule "examples/doc-classifier-data"]
 | 
					[submodule "examples/doc-classifier-data"]
 | 
				
			||||||
	path = examples/doc-classifier-data
 | 
						path = examples/doc-classifier-data
 | 
				
			||||||
	url = git@github.com:mdibaiee/doc-classifier-data
 | 
						url = git@github.com:mdibaiee/doc-classifier-data
 | 
				
			||||||
 | 
					[submodule "examples/notMNIST"]
 | 
				
			||||||
 | 
						path = examples/notMNIST
 | 
				
			||||||
 | 
						url = git@github.com:mdibaiee/notMNIST
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										49
									
								
								examples/424encoder.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								examples/424encoder.hs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,49 @@
 | 
				
			|||||||
 | 
					module Main where
 | 
				
			||||||
 | 
					  import Sibe
 | 
				
			||||||
 | 
					  import Numeric.LinearAlgebra
 | 
				
			||||||
 | 
					  import Data.List
 | 
				
			||||||
 | 
					  import Debug.Trace
 | 
				
			||||||
 | 
					  import Data.Default.Class
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  main = do
 | 
				
			||||||
 | 
					    let alpha = 0.5
 | 
				
			||||||
 | 
					        epochs = 1000
 | 
				
			||||||
 | 
					        a = (sigmoid, sigmoid')
 | 
				
			||||||
 | 
					        rnetwork = randomNetwork 0 (-0.1, 0.1) 4 [(2, a)] (4, a)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        inputs = [vector [1, 0, 0, 0],
 | 
				
			||||||
 | 
					                  vector [0, 1, 0, 0],
 | 
				
			||||||
 | 
					                  vector [0, 0, 1, 0],
 | 
				
			||||||
 | 
					                  vector [0, 0, 0, 1]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        labels = [vector [1, 0, 0, 0],
 | 
				
			||||||
 | 
					                  vector [0, 1, 0, 0],
 | 
				
			||||||
 | 
					                  vector [0, 0, 1, 0],
 | 
				
			||||||
 | 
					                  vector [0, 0, 0, 1]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        session = def { network = rnetwork
 | 
				
			||||||
 | 
					                      , learningRate = 0.5
 | 
				
			||||||
 | 
					                      , epochs = 1000
 | 
				
			||||||
 | 
					                      , training = zip inputs labels
 | 
				
			||||||
 | 
					                      , test = zip inputs labels
 | 
				
			||||||
 | 
					                      } :: Session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let initialCost = crossEntropy session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    newsession <- run gd session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let results = map (`forward` newsession) inputs
 | 
				
			||||||
 | 
					        rounded = map (map round . toList) results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        cost = crossEntropy newsession
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    putStrLn "parameters: "
 | 
				
			||||||
 | 
					    putStrLn $ "- inputs: " ++ show inputs
 | 
				
			||||||
 | 
					    putStrLn $ "- labels: " ++ show labels
 | 
				
			||||||
 | 
					    putStrLn $ "- learning rate: " ++ show alpha
 | 
				
			||||||
 | 
					    putStrLn $ "- epochs: " ++ show epochs
 | 
				
			||||||
 | 
					    putStrLn $ "- initial cost (cross-entropy): " ++ show initialCost
 | 
				
			||||||
 | 
					    putStrLn "results: "
 | 
				
			||||||
 | 
					    putStrLn $ "- actual result: " ++ show results
 | 
				
			||||||
 | 
					    putStrLn $ "- rounded result: " ++ show rounded
 | 
				
			||||||
 | 
					    putStrLn $ "- cost (cross-entropy): " ++ show cost
 | 
				
			||||||
@@ -1,6 +1,7 @@
 | 
				
			|||||||
module Main
 | 
					module Main
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
    -- import Sibe
 | 
					    -- import Sibe
 | 
				
			||||||
 | 
					    import Sibe.NLP
 | 
				
			||||||
    import Sibe.NaiveBayes
 | 
					    import Sibe.NaiveBayes
 | 
				
			||||||
    import Text.Printf
 | 
					    import Text.Printf
 | 
				
			||||||
    import Data.List
 | 
					    import Data.List
 | 
				
			||||||
@@ -28,14 +29,14 @@ module Main
 | 
				
			|||||||
          documents = cleanDocuments . removeWords sws $ createDocuments classes dataset
 | 
					          documents = cleanDocuments . removeWords sws $ createDocuments classes dataset
 | 
				
			||||||
          testDocuments = cleanDocuments $ createDocuments classes test
 | 
					          testDocuments = cleanDocuments $ createDocuments classes test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
          nb = train documents intClasses
 | 
					          nb = initialize documents intClasses
 | 
				
			||||||
 | 
					
 | 
				
			||||||
          -- top-ten
 | 
					          -- top-ten
 | 
				
			||||||
          topClasses = take 10 . reverse $ sortBy (compare `on` (length . snd)) (cd nb)
 | 
					          topClasses = take 10 . reverse $ sortBy (compare `on` (length . snd)) (cd nb)
 | 
				
			||||||
          filtered = map (\(c, ds) -> (c, take 100 ds)) topClasses
 | 
					          filtered = map (\(c, ds) -> (c, take 100 ds)) topClasses
 | 
				
			||||||
          filteredClasses = map fst filtered
 | 
					          filteredClasses = map fst filtered
 | 
				
			||||||
          ttDocs = concatMap snd filtered
 | 
					          ttDocs = concatMap snd filtered
 | 
				
			||||||
          ttNB = train ttDocs filteredClasses
 | 
					          ttNB = initialize ttDocs filteredClasses
 | 
				
			||||||
 | 
					
 | 
				
			||||||
          ttTestDocuments = filter ((`elem` filteredClasses) . c) . cleanDocuments $ createDocuments classes test
 | 
					          ttTestDocuments = filter ((`elem` filteredClasses) . c) . cleanDocuments $ createDocuments classes test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,54 +0,0 @@
 | 
				
			|||||||
{-# LANGUAGE BangPatterns #-}
 | 
					 | 
				
			||||||
module Main
 | 
					 | 
				
			||||||
  where
 | 
					 | 
				
			||||||
    -- import Sibe
 | 
					 | 
				
			||||||
    import Sibe.NaiveBayes
 | 
					 | 
				
			||||||
    import Text.Printf
 | 
					 | 
				
			||||||
    import Data.List
 | 
					 | 
				
			||||||
    import Data.Maybe
 | 
					 | 
				
			||||||
    import Debug.Trace
 | 
					 | 
				
			||||||
    import Data.List.Split
 | 
					 | 
				
			||||||
    import System.Directory
 | 
					 | 
				
			||||||
    import Control.DeepSeq
 | 
					 | 
				
			||||||
    import System.IO
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    main = do
 | 
					 | 
				
			||||||
      putStr "Reading documents... "
 | 
					 | 
				
			||||||
      neg_documents <- createDocuments "examples/sentiment-analysis-data/train/neg/"
 | 
					 | 
				
			||||||
      pos_documents <- createDocuments "examples/sentiment-analysis-data/train/pos/"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      test_neg_documents  <- createDocuments "examples/sentiment-analysis-data/test/neg/"
 | 
					 | 
				
			||||||
      test_pos_documents <- createDocuments "examples/sentiment-analysis-data/test/pos/"
 | 
					 | 
				
			||||||
      putStrLn "done"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      let classes = [0..9] -- rating, from 0 to 9 (1 to 10)
 | 
					 | 
				
			||||||
          documents = neg_documents ++ pos_documents
 | 
					 | 
				
			||||||
          nb = train documents classes
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
          testDocuments = neg_documents ++ pos_documents
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
          results = map (\(Document text c) -> (c, run text nb)) testDocuments
 | 
					 | 
				
			||||||
          -- results = map (\(Document text c) -> (c, determine text nb intClasses documents)) devTestDocuments
 | 
					 | 
				
			||||||
      print results
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      -- let showResults (c, r) = putStrLn (show (classes !! c) ++ " ~ " ++ show (classes !! r))
 | 
					 | 
				
			||||||
      -- mapM_ showResults results
 | 
					 | 
				
			||||||
      --
 | 
					 | 
				
			||||||
      -- putStrLn $ "Recall: " ++ show (recall results)
 | 
					 | 
				
			||||||
      -- putStrLn $ "Precision: " ++ show (precision results)
 | 
					 | 
				
			||||||
      -- putStrLn $ "F Measure: " ++ show (fmeasure results)
 | 
					 | 
				
			||||||
      -- putStrLn $ "Accuracy: " ++ show (accuracy results)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    createDocuments :: FilePath -> IO [Document]
 | 
					 | 
				
			||||||
    createDocuments path = do
 | 
					 | 
				
			||||||
      files <- drop 2 <$> getDirectoryContents path
 | 
					 | 
				
			||||||
      let ratings = map (subtract 1 . read . take 1 . last . splitOn "_") files :: [Int]
 | 
					 | 
				
			||||||
      contents <- mapM (forceReadFile . (path ++)) files
 | 
					 | 
				
			||||||
      return $ zipWith Document contents ratings
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    forceReadFile :: FilePath -> IO String
 | 
					 | 
				
			||||||
    forceReadFile file = do
 | 
					 | 
				
			||||||
      handle <- openFile file ReadMode
 | 
					 | 
				
			||||||
      content <- hGetContents handle
 | 
					 | 
				
			||||||
      content `deepseq` hClose handle
 | 
					 | 
				
			||||||
      return content
 | 
					 | 
				
			||||||
							
								
								
									
										1
									
								
								examples/notMNIST
									
									
									
									
									
										Submodule
									
								
							
							
								
								
								
								
								
							
						
						
									
										1
									
								
								examples/notMNIST
									
									
									
									
									
										Submodule
									
								
							 Submodule examples/notMNIST added at 0dbdfd43ff
									
								
							
							
								
								
									
										111
									
								
								examples/notmnist.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								examples/notmnist.hs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,111 @@
 | 
				
			|||||||
 | 
					{-# LANGUAGE RecordWildCards #-}
 | 
				
			||||||
 | 
					{-# LANGUAGE FlexibleContexts #-}
 | 
				
			||||||
 | 
					{-# LANGUAGE ScopedTypeVariables #-}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module Main where
 | 
				
			||||||
 | 
					  import Sibe
 | 
				
			||||||
 | 
					  import Numeric.LinearAlgebra
 | 
				
			||||||
 | 
					  import Data.List
 | 
				
			||||||
 | 
					  import Debug.Trace
 | 
				
			||||||
 | 
					  import System.IO
 | 
				
			||||||
 | 
					  import System.Directory
 | 
				
			||||||
 | 
					  import Codec.Picture
 | 
				
			||||||
 | 
					  import Codec.Picture.Types
 | 
				
			||||||
 | 
					  import qualified Data.Vector.Storable as V
 | 
				
			||||||
 | 
					  import Data.Either
 | 
				
			||||||
 | 
					  import System.Random
 | 
				
			||||||
 | 
					  import System.Random.Shuffle
 | 
				
			||||||
 | 
					  import Data.Default.Class
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  import qualified Graphics.Rendering.Chart.Easy as Chart
 | 
				
			||||||
 | 
					  import Graphics.Rendering.Chart.Backend.Cairo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  main = do
 | 
				
			||||||
 | 
					    setStdGen (mkStdGen 100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let a         = (sigmoid, sigmoid')
 | 
				
			||||||
 | 
					        o         = (softmax, one)
 | 
				
			||||||
 | 
					        rnetwork  = randomNetwork 0 (-1, 1) (28*28) [(100, a)] (10, a)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (inputs, labels) <- dataset
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let trp      = length inputs * 70 `div` 100
 | 
				
			||||||
 | 
					        tep      = length inputs * 30 `div` 100
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        -- training data
 | 
				
			||||||
 | 
					        trinputs = take trp inputs
 | 
				
			||||||
 | 
					        trlabels = take trp labels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        -- test data
 | 
				
			||||||
 | 
					        teinputs = take tep . drop trp $ inputs
 | 
				
			||||||
 | 
					        telabels = take tep . drop trp $ labels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let session = def { learningRate = 0.5
 | 
				
			||||||
 | 
					                      , batchSize = 32
 | 
				
			||||||
 | 
					                      , epochs = 35
 | 
				
			||||||
 | 
					                      , network = rnetwork
 | 
				
			||||||
 | 
					                      , training = zip trinputs trlabels
 | 
				
			||||||
 | 
					                      , test = zip teinputs telabels
 | 
				
			||||||
 | 
					                      } :: Session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let initialCost = crossEntropy session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    newsession <- run (sgd . learningRateDecay (1.1, 5e-2)) session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let el = map (\(e, l, _) -> (e, l)) (chart newsession)
 | 
				
			||||||
 | 
					        ea = map (\(e, _, a) -> (e, a)) (chart newsession)
 | 
				
			||||||
 | 
					    toFile Chart.def "notmnist.png" $ do
 | 
				
			||||||
 | 
					      Chart.layoutlr_title Chart..= "loss over time"
 | 
				
			||||||
 | 
					      Chart.plotLeft (Chart.line "loss" [el])
 | 
				
			||||||
 | 
					      Chart.plotRight (Chart.line "learningRate" [ea])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let cost = crossEntropy newsession
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    putStrLn "parameters: "
 | 
				
			||||||
 | 
					    putStrLn $ "- batch size: " ++ show (batchSize session)
 | 
				
			||||||
 | 
					    putStrLn $ "- learning rate: " ++ show (learningRate session)
 | 
				
			||||||
 | 
					    putStrLn $ "- epochs: " ++ show (epochs session)
 | 
				
			||||||
 | 
					    putStrLn $ "- initial cost (cross-entropy): " ++ show initialCost
 | 
				
			||||||
 | 
					    putStrLn "results: "
 | 
				
			||||||
 | 
					    putStrLn $ "- accuracy: " ++ show (accuracy newsession)
 | 
				
			||||||
 | 
					    putStrLn $ "- cost (cross-entropy): " ++ show cost
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  dataset :: IO ([Vector Double], [Vector Double])
 | 
				
			||||||
 | 
					  dataset = do
 | 
				
			||||||
 | 
					    let dir = "examples/notMNIST/"
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    groups <- filter ((/= '.') . head) <$> listDirectory dir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    inputFiles <- mapM (listDirectory . (dir ++)) groups
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let n = 512 {-- minimum (map length inputFiles) --}
 | 
				
			||||||
 | 
					        numbers = map (`div` n) [0..n * length groups - 1]
 | 
				
			||||||
 | 
					        inputFilesFull = map (\(i, g) -> map ((dir ++ i ++ "/") ++) g) (zip groups inputFiles)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    inputImages <- mapM (mapM readImage . take n) inputFilesFull
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let names = map (take n) inputFilesFull
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let (l, r) = partitionEithers $ concat inputImages
 | 
				
			||||||
 | 
					        inputs = map (fromPixels . convertRGB8) r
 | 
				
			||||||
 | 
					        labels = map (\i -> V.replicate i 0 `V.snoc` 1 V.++ V.replicate (9 - i) 0) numbers
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        pairs  = zip inputs labels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    shuffled <- shuffleM pairs
 | 
				
			||||||
 | 
					    return (map fst shuffled, map snd shuffled)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    where
 | 
				
			||||||
 | 
					      fromPixels :: Image PixelRGB8 -> Vector Double
 | 
				
			||||||
 | 
					      fromPixels img@Image { .. } =
 | 
				
			||||||
 | 
					        let pairs = [(x, y) | x <- [0..imageWidth - 1], y <- [0..imageHeight - 1]]
 | 
				
			||||||
 | 
					        in V.fromList $ map iter pairs
 | 
				
			||||||
 | 
					        where
 | 
				
			||||||
 | 
					          iter (x, y) =
 | 
				
			||||||
 | 
					            let (PixelRGB8 r g b) = convertPixel $ pixelAt img x y
 | 
				
			||||||
 | 
					            in
 | 
				
			||||||
 | 
					              if r == 0 && g == 0 && b == 0 then 0 else 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
							
								
								
									
										0
									
								
								examples/recurrent-doc-classifier.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								examples/recurrent-doc-classifier.hs
									
									
									
									
									
										Normal file
									
								
							@@ -1,5 +0,0 @@
 | 
				
			|||||||
XSym
 | 
					 | 
				
			||||||
0040
 | 
					 | 
				
			||||||
3666c4cacaf995ebd11ef25aab70de99
 | 
					 | 
				
			||||||
../../sibe-repos/sentiment-analysis-data
 | 
					 | 
				
			||||||
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
 | 
					 | 
				
			||||||
							
								
								
									
										35
									
								
								examples/sin.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								examples/sin.hs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
				
			|||||||
 | 
					module Main where
 | 
				
			||||||
 | 
					  import Sibe
 | 
				
			||||||
 | 
					  import Numeric.LinearAlgebra
 | 
				
			||||||
 | 
					  import Data.List
 | 
				
			||||||
 | 
					  import Debug.Trace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  main = do
 | 
				
			||||||
 | 
					    let alpha = 0.5
 | 
				
			||||||
 | 
					        epochs = 1000
 | 
				
			||||||
 | 
					        a = (sigmoid, sigmoid')
 | 
				
			||||||
 | 
					        lo = (sigmoid, (\_ -> 1)) -- cross entropy
 | 
				
			||||||
 | 
					        -- a = (relu, relu')
 | 
				
			||||||
 | 
					        rnetwork = randomNetwork 0 (-1, 1) 1 [(50, a)] (1, lo)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        inputs = map (\a -> vector [a]) (reverse [0, 30, 45, 60, 90])
 | 
				
			||||||
 | 
					        labels = map (\deg -> vector $ [sin $ deg * pi/180]) (reverse [0, 30, 45, 60, 90])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        initial_cost = zipWith crossEntropy (map (`forward` rnetwork) inputs) labels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    network <- run session inputs rnetwork labels alpha epochs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let results = map (`forward` network) inputs
 | 
				
			||||||
 | 
					        rounded = map (map round . toList) results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        cost = zipWith crossEntropy (map (`forward` network) inputs) labels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    putStrLn "parameters: "
 | 
				
			||||||
 | 
					    putStrLn $ "- inputs: " ++ show inputs
 | 
				
			||||||
 | 
					    putStrLn $ "- labels: " ++ show labels
 | 
				
			||||||
 | 
					    putStrLn $ "- learning rate: " ++ show alpha
 | 
				
			||||||
 | 
					    putStrLn $ "- epochs: " ++ show epochs
 | 
				
			||||||
 | 
					    {-putStrLn $ "- initial cost (cross-entropy): " ++ show initial_cost-}
 | 
				
			||||||
 | 
					    putStrLn "results: "
 | 
				
			||||||
 | 
					    putStrLn $ "- actual result: " ++ show results
 | 
				
			||||||
 | 
					    {-putStrLn $ "- cost (cross-entropy): " ++ show cost-}
 | 
				
			||||||
@@ -3,30 +3,37 @@ module Main where
 | 
				
			|||||||
  import Numeric.LinearAlgebra
 | 
					  import Numeric.LinearAlgebra
 | 
				
			||||||
  import Data.List
 | 
					  import Data.List
 | 
				
			||||||
  import Debug.Trace
 | 
					  import Debug.Trace
 | 
				
			||||||
 | 
					  import Data.Default.Class
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  main = do
 | 
					  main = do
 | 
				
			||||||
    let learning_rate = 0.5
 | 
					    let a = (sigmoid, sigmoid')
 | 
				
			||||||
        (iterations, epochs) = (2, 1000)
 | 
					        rnetwork = randomNetwork 0 (-1, 1) 2 [(2, a)] (1, a) -- two inputs, 8 nodes in a single hidden layer, 1 output
 | 
				
			||||||
        a = (sigmoid, sigmoid')
 | 
					 | 
				
			||||||
        rnetwork = randomNetwork 0 2 [(8, a)] (1, a) -- two inputs, 8 nodes in a single hidden layer, 1 output
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        inputs = [vector [0, 1], vector [1, 0], vector [1, 1], vector [0, 0]]
 | 
					        inputs = [vector [0, 1], vector [1, 0], vector [1, 1], vector [0, 0]]
 | 
				
			||||||
        labels = [vector [1], vector [1], vector [0], vector [0]]
 | 
					        labels = [vector [1], vector [1], vector [0], vector [0]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        initial_cost = zipWith crossEntropy (map (`forward` rnetwork) inputs) labels
 | 
					        session = def { network = rnetwork
 | 
				
			||||||
 | 
					                      , learningRate = 0.5
 | 
				
			||||||
 | 
					                      , epochs = 1000
 | 
				
			||||||
 | 
					                      , training = zip inputs labels
 | 
				
			||||||
 | 
					                      , test = zip inputs labels
 | 
				
			||||||
 | 
					                      } :: Session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        network = session inputs rnetwork labels learning_rate (iterations, epochs)
 | 
					        initialCost = crossEntropy session
 | 
				
			||||||
        results = map (`forward` network) inputs
 | 
					
 | 
				
			||||||
 | 
					    newsession <- run gd session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let results = map (`forward` newsession) inputs
 | 
				
			||||||
        rounded = map (map round . toList) results
 | 
					        rounded = map (map round . toList) results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        cost = zipWith crossEntropy (map (`forward` network) inputs) labels
 | 
					        cost = crossEntropy newsession
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    putStrLn "parameters: "
 | 
					    putStrLn "parameters: "
 | 
				
			||||||
    putStrLn $ "- inputs: " ++ show inputs
 | 
					    putStrLn $ "- inputs: " ++ show inputs
 | 
				
			||||||
    putStrLn $ "- labels: " ++ show labels
 | 
					    putStrLn $ "- labels: " ++ show labels
 | 
				
			||||||
    putStrLn $ "- learning rate: " ++ show learning_rate
 | 
					    putStrLn $ "- learning rate: " ++ show (learningRate session)
 | 
				
			||||||
    putStrLn $ "- iterations/epochs: " ++ show (iterations, epochs)
 | 
					    putStrLn $ "- epochs: " ++ show (epochs session)
 | 
				
			||||||
    putStrLn $ "- initial cost (cross-entropy): " ++ show initial_cost
 | 
					    putStrLn $ "- initial cost (cross-entropy): " ++ show initialCost
 | 
				
			||||||
    putStrLn "results: "
 | 
					    putStrLn "results: "
 | 
				
			||||||
    putStrLn $ "- actual result: " ++ show results
 | 
					    putStrLn $ "- actual result: " ++ show results
 | 
				
			||||||
    putStrLn $ "- rounded result: " ++ show rounded
 | 
					    putStrLn $ "- rounded result: " ++ show rounded
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										
											BIN
										
									
								
								notmnist-0.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								notmnist-0.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 33 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								notmnist-1.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								notmnist-1.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 26 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								notmnist-2.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								notmnist-2.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 25 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								notmnist.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								notmnist.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 24 KiB  | 
							
								
								
									
										73
									
								
								sibe.cabal
									
									
									
									
									
								
							
							
						
						
									
										73
									
								
								sibe.cabal
									
									
									
									
									
								
							@@ -15,7 +15,7 @@ cabal-version:       >=1.10
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
library
 | 
					library
 | 
				
			||||||
  hs-source-dirs:      src
 | 
					  hs-source-dirs:      src
 | 
				
			||||||
  exposed-modules:     Sibe, Sibe.NaiveBayes
 | 
					  exposed-modules:     Sibe, Sibe.NaiveBayes, Sibe.NLP
 | 
				
			||||||
  build-depends:       base >= 4.7 && < 5
 | 
					  build-depends:       base >= 4.7 && < 5
 | 
				
			||||||
                     , hmatrix
 | 
					                     , hmatrix
 | 
				
			||||||
                     , random
 | 
					                     , random
 | 
				
			||||||
@@ -26,16 +26,21 @@ library
 | 
				
			|||||||
                     , regex-pcre
 | 
					                     , regex-pcre
 | 
				
			||||||
                     , text
 | 
					                     , text
 | 
				
			||||||
                     , stemmer
 | 
					                     , stemmer
 | 
				
			||||||
 | 
					                     , vector
 | 
				
			||||||
 | 
					                     , random-shuffle
 | 
				
			||||||
 | 
					                     , data-default-class
 | 
				
			||||||
 | 
					                     , Chart
 | 
				
			||||||
 | 
					                     , Chart-cairo
 | 
				
			||||||
  default-language:    Haskell2010
 | 
					  default-language:    Haskell2010
 | 
				
			||||||
 | 
					
 | 
				
			||||||
executable sibe-exe
 | 
					--executable sibe-exe
 | 
				
			||||||
  hs-source-dirs:      app
 | 
					  --hs-source-dirs:      app
 | 
				
			||||||
  main-is:             Main.hs
 | 
					  --main-is:             Main.hs
 | 
				
			||||||
  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
 | 
					  --ghc-options:         -threaded -rtsopts -with-rtsopts=-N
 | 
				
			||||||
  build-depends:       base
 | 
					  --build-depends:       base
 | 
				
			||||||
                     , sibe
 | 
					                     --, sibe
 | 
				
			||||||
                     , hmatrix
 | 
					                     --, hmatrix
 | 
				
			||||||
  default-language:    Haskell2010
 | 
					  --default-language:    Haskell2010
 | 
				
			||||||
 | 
					
 | 
				
			||||||
executable example-xor
 | 
					executable example-xor
 | 
				
			||||||
  hs-source-dirs:      examples
 | 
					  hs-source-dirs:      examples
 | 
				
			||||||
@@ -44,6 +49,43 @@ executable example-xor
 | 
				
			|||||||
  build-depends:       base
 | 
					  build-depends:       base
 | 
				
			||||||
                     , sibe
 | 
					                     , sibe
 | 
				
			||||||
                     , hmatrix
 | 
					                     , hmatrix
 | 
				
			||||||
 | 
					                     , data-default-class
 | 
				
			||||||
 | 
					  default-language:    Haskell2010
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					--executable example-sin
 | 
				
			||||||
 | 
					  --hs-source-dirs:      examples
 | 
				
			||||||
 | 
					  --main-is:             sin.hs
 | 
				
			||||||
 | 
					  --ghc-options:         -threaded -rtsopts -with-rtsopts=-N
 | 
				
			||||||
 | 
					  --build-depends:       base
 | 
				
			||||||
 | 
					                     --, sibe
 | 
				
			||||||
 | 
					                     --, hmatrix
 | 
				
			||||||
 | 
					  --default-language:    Haskell2010
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					executable example-424
 | 
				
			||||||
 | 
					  hs-source-dirs:      examples
 | 
				
			||||||
 | 
					  main-is:             424encoder.hs
 | 
				
			||||||
 | 
					  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
 | 
				
			||||||
 | 
					  build-depends:       base
 | 
				
			||||||
 | 
					                     , sibe
 | 
				
			||||||
 | 
					                     , hmatrix
 | 
				
			||||||
 | 
					                     , data-default-class
 | 
				
			||||||
 | 
					  default-language:    Haskell2010
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					executable example-notmnist
 | 
				
			||||||
 | 
					  hs-source-dirs:      examples
 | 
				
			||||||
 | 
					  main-is:             notmnist.hs
 | 
				
			||||||
 | 
					  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
 | 
				
			||||||
 | 
					  build-depends:       base
 | 
				
			||||||
 | 
					                     , sibe
 | 
				
			||||||
 | 
					                     , hmatrix
 | 
				
			||||||
 | 
					                     , directory >= 1.2.5.0
 | 
				
			||||||
 | 
					                     , JuicyPixels == 3.2.7.2
 | 
				
			||||||
 | 
					                     , vector == 0.11.0.0
 | 
				
			||||||
 | 
					                     , random
 | 
				
			||||||
 | 
					                     , random-shuffle
 | 
				
			||||||
 | 
					                     , data-default-class
 | 
				
			||||||
 | 
					                     , Chart
 | 
				
			||||||
 | 
					                     , Chart-cairo
 | 
				
			||||||
  default-language:    Haskell2010
 | 
					  default-language:    Haskell2010
 | 
				
			||||||
 | 
					
 | 
				
			||||||
executable example-naivebayes-doc-classifier
 | 
					executable example-naivebayes-doc-classifier
 | 
				
			||||||
@@ -57,19 +99,6 @@ executable example-naivebayes-doc-classifier
 | 
				
			|||||||
                     , split
 | 
					                     , split
 | 
				
			||||||
  default-language:    Haskell2010
 | 
					  default-language:    Haskell2010
 | 
				
			||||||
 | 
					
 | 
				
			||||||
executable example-naivebayes-sentiment-analysis
 | 
					 | 
				
			||||||
  hs-source-dirs:      examples
 | 
					 | 
				
			||||||
  main-is:             naivebayes-sentiment-analysis.hs
 | 
					 | 
				
			||||||
  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
 | 
					 | 
				
			||||||
  build-depends:       base
 | 
					 | 
				
			||||||
                     , sibe
 | 
					 | 
				
			||||||
                     , hmatrix
 | 
					 | 
				
			||||||
                     , containers
 | 
					 | 
				
			||||||
                     , split
 | 
					 | 
				
			||||||
                     , directory
 | 
					 | 
				
			||||||
                     , deepseq
 | 
					 | 
				
			||||||
  default-language:    Haskell2010
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
test-suite sibe-test
 | 
					test-suite sibe-test
 | 
				
			||||||
  type:                exitcode-stdio-1.0
 | 
					  type:                exitcode-stdio-1.0
 | 
				
			||||||
  hs-source-dirs:      test
 | 
					  hs-source-dirs:      test
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										258
									
								
								src/Sibe.hs
									
									
									
									
									
								
							
							
						
						
									
										258
									
								
								src/Sibe.hs
									
									
									
									
									
								
							@@ -15,22 +15,37 @@ module Sibe
 | 
				
			|||||||
     saveNetwork,
 | 
					     saveNetwork,
 | 
				
			||||||
     loadNetwork,
 | 
					     loadNetwork,
 | 
				
			||||||
     train,
 | 
					     train,
 | 
				
			||||||
     session,
 | 
					     gd,
 | 
				
			||||||
     shuffle,
 | 
					     sgd,
 | 
				
			||||||
 | 
					     run,
 | 
				
			||||||
     sigmoid,
 | 
					     sigmoid,
 | 
				
			||||||
     sigmoid',
 | 
					     sigmoid',
 | 
				
			||||||
 | 
					     softmax,
 | 
				
			||||||
 | 
					     softmax',
 | 
				
			||||||
 | 
					     one,
 | 
				
			||||||
     relu,
 | 
					     relu,
 | 
				
			||||||
     relu',
 | 
					     relu',
 | 
				
			||||||
     crossEntropy,
 | 
					     crossEntropy,
 | 
				
			||||||
     genSeed,
 | 
					     genSeed,
 | 
				
			||||||
     replaceVector
 | 
					     replaceVector,
 | 
				
			||||||
 | 
					     Session(..),
 | 
				
			||||||
 | 
					     accuracy,
 | 
				
			||||||
 | 
					     learningRateDecay
 | 
				
			||||||
    ) where
 | 
					    ) where
 | 
				
			||||||
      import Numeric.LinearAlgebra
 | 
					      import Numeric.LinearAlgebra
 | 
				
			||||||
      import System.Random
 | 
					      import System.Random
 | 
				
			||||||
 | 
					      import System.Random.Shuffle
 | 
				
			||||||
      import Debug.Trace
 | 
					      import Debug.Trace
 | 
				
			||||||
      import Data.List (foldl', sortBy)
 | 
					      import Data.List (foldl', sortBy, genericLength, permutations)
 | 
				
			||||||
      import System.IO
 | 
					      import System.IO
 | 
				
			||||||
      import Control.DeepSeq
 | 
					      import Control.DeepSeq
 | 
				
			||||||
 | 
					      import Control.Monad
 | 
				
			||||||
 | 
					      import qualified Data.Vector.Storable as V
 | 
				
			||||||
 | 
					      import Data.Default.Class
 | 
				
			||||||
 | 
					      import System.Exit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      import qualified Graphics.Rendering.Chart.Easy as Chart
 | 
				
			||||||
 | 
					      import Graphics.Rendering.Chart.Backend.Cairo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      type LearningRate = Double
 | 
					      type LearningRate = Double
 | 
				
			||||||
      type Input = Vector Double
 | 
					      type Input = Vector Double
 | 
				
			||||||
@@ -48,8 +63,33 @@ module Sibe
 | 
				
			|||||||
      data Network = O Layer
 | 
					      data Network = O Layer
 | 
				
			||||||
                   | Layer :- Network
 | 
					                   | Layer :- Network
 | 
				
			||||||
                   deriving (Show)
 | 
					                   deriving (Show)
 | 
				
			||||||
 | 
					                   
 | 
				
			||||||
      infixr 5 :-
 | 
					      infixr 5 :-
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      data Session = Session  { network      :: Network
 | 
				
			||||||
 | 
					                              , training     :: [(Vector Double, Vector Double)]
 | 
				
			||||||
 | 
					                              , test         :: [(Vector Double, Vector Double)]
 | 
				
			||||||
 | 
					                              , learningRate :: Double
 | 
				
			||||||
 | 
					                              , epochs       :: Int
 | 
				
			||||||
 | 
					                              , epoch        :: Int
 | 
				
			||||||
 | 
					                              , batchSize    :: Int
 | 
				
			||||||
 | 
					                              , chart        :: [(Int, Double, Double)]
 | 
				
			||||||
 | 
					                              , momentum     :: Double
 | 
				
			||||||
 | 
					                              }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      emptyNetwork = randomNetwork 0 (0, 0) 0 [] (0, (id, id))
 | 
				
			||||||
 | 
					      instance Default Session where
 | 
				
			||||||
 | 
					        def = Session { network      = seq (die "You have not specified a network parameter") emptyNetwork
 | 
				
			||||||
 | 
					                      , training     = seq (die "You have not specified training data") []
 | 
				
			||||||
 | 
					                      , test         = seq (die "You have not specified test data") []
 | 
				
			||||||
 | 
					                      , learningRate = 0.5
 | 
				
			||||||
 | 
					                      , epochs       = 35
 | 
				
			||||||
 | 
					                      , epoch        = 0
 | 
				
			||||||
 | 
					                      , batchSize    = 0
 | 
				
			||||||
 | 
					                      , chart        = []
 | 
				
			||||||
 | 
					                      , momentum     = 0
 | 
				
			||||||
 | 
					                      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      saveNetwork :: Network -> String -> IO ()
 | 
					      saveNetwork :: Network -> String -> IO ()
 | 
				
			||||||
      saveNetwork network file =
 | 
					      saveNetwork network file =
 | 
				
			||||||
        writeFile file ((show . reverse) (gen network []))
 | 
					        writeFile file ((show . reverse) (gen network []))
 | 
				
			||||||
@@ -73,22 +113,24 @@ module Sibe
 | 
				
			|||||||
      runLayer :: Input -> Layer -> Output
 | 
					      runLayer :: Input -> Layer -> Output
 | 
				
			||||||
      runLayer input (Layer !biases !weights _) = input <# weights + biases
 | 
					      runLayer input (Layer !biases !weights _) = input <# weights + biases
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      forward :: Input -> Network -> Output
 | 
					      forward :: Input -> Session -> Output
 | 
				
			||||||
      forward input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l
 | 
					      forward input session = compute input (network session)
 | 
				
			||||||
      forward input (l@(Layer _ _ (fn, _)) :- n) = forward ((fst . activation $ l) $ runLayer input l) n
 | 
					        where
 | 
				
			||||||
 | 
					          compute input (O l@(Layer _ _ (fn, _))) = fn $ runLayer input l
 | 
				
			||||||
 | 
					          compute input (l@(Layer _ _ (fn, _)) :- n) = compute ((fst . activation $ l) $ runLayer input l) n
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      randomLayer :: Seed -> (Int, Int) -> Activation -> Layer
 | 
					      randomLayer :: Seed -> (Int, Int) -> (Double, Double) -> Activation -> Layer
 | 
				
			||||||
      randomLayer seed (wr, wc) =
 | 
					      randomLayer seed (wr, wc) (l, u) =
 | 
				
			||||||
        let weights = uniformSample seed wr $ replicate wc (-1, 1)
 | 
					        let weights = uniformSample seed wr $ replicate wc (l, u)
 | 
				
			||||||
            biases  = randomVector seed Uniform wc * 2 - 1
 | 
					            biases  = randomVector seed Uniform wc * realToFrac u - realToFrac l
 | 
				
			||||||
        in Layer biases weights
 | 
					        in Layer biases weights
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      randomNetwork :: Seed -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
 | 
					      randomNetwork :: Seed -> (Double, Double) -> Int -> [(Int, Activation)] -> (Int, Activation) -> Network
 | 
				
			||||||
      randomNetwork seed input [] (output, a) =
 | 
					      randomNetwork seed bound input [] (output, a) =
 | 
				
			||||||
        O $ randomLayer seed (input, output) a
 | 
					        O $ randomLayer seed (input, output) bound a
 | 
				
			||||||
      randomNetwork seed input ((h, a):hs) output =
 | 
					      randomNetwork seed bound input ((h, a):hs) output =
 | 
				
			||||||
        randomLayer seed (input, h) a :-
 | 
					        randomLayer seed (input, h) bound a :-
 | 
				
			||||||
        randomNetwork (seed + 1) h hs output
 | 
					        randomNetwork (seed + 1) bound h hs output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      sigmoid :: Vector Double -> Vector Double
 | 
					      sigmoid :: Vector Double -> Vector Double
 | 
				
			||||||
      sigmoid x = 1 / max (1 + exp (-x)) 1e-10
 | 
					      sigmoid x = 1 / max (1 + exp (-x)) 1e-10
 | 
				
			||||||
@@ -96,18 +138,37 @@ module Sibe
 | 
				
			|||||||
      sigmoid' :: Vector Double -> Vector Double
 | 
					      sigmoid' :: Vector Double -> Vector Double
 | 
				
			||||||
      sigmoid' x = sigmoid x * (1 - sigmoid x)
 | 
					      sigmoid' x = sigmoid x * (1 - sigmoid x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      softmax :: Vector Double -> Vector Double
 | 
				
			||||||
 | 
					      softmax x = cmap (\a -> exp a / s) x
 | 
				
			||||||
 | 
					        where
 | 
				
			||||||
 | 
					          s = V.sum $ exp x
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      one :: a -> Double
 | 
				
			||||||
 | 
					      one x = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      softmax' :: Vector Double -> Vector Double
 | 
				
			||||||
 | 
					      softmax' x = softmax x * (1 - softmax x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      relu :: Vector Double -> Vector Double
 | 
					      relu :: Vector Double -> Vector Double
 | 
				
			||||||
      relu x = log (max (1 + exp x) 1e-10)
 | 
					      relu = cmap (max 0.1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      relu' :: Vector Double -> Vector Double
 | 
					      relu' :: Vector Double -> Vector Double
 | 
				
			||||||
      relu' = sigmoid
 | 
					      relu' = cmap dev
 | 
				
			||||||
 | 
					        where dev x
 | 
				
			||||||
 | 
					                | x < 0 = 0
 | 
				
			||||||
 | 
					                | otherwise = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      crossEntropy :: Output -> Output -> Double
 | 
					      crossEntropy :: Session -> Double
 | 
				
			||||||
      crossEntropy output target =
 | 
					      crossEntropy session =
 | 
				
			||||||
        let pairs = zip (toList output) (toList target)
 | 
					        let inputs = map fst (test session)
 | 
				
			||||||
            n = fromIntegral (length pairs)
 | 
					            labels = map (toList . snd) (test session)
 | 
				
			||||||
        in (-1 / n) * sum (map f pairs)
 | 
					            outputs = map (toList . (`forward` session)) inputs
 | 
				
			||||||
 | 
					            pairs = zip outputs labels
 | 
				
			||||||
 | 
					            n = genericLength pairs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        in sum (map set pairs) / n
 | 
				
			||||||
        where
 | 
					        where
 | 
				
			||||||
 | 
					          set (os, ls) = (-1 / genericLength os) * sum (zipWith (curry f) os ls)
 | 
				
			||||||
          f (a, y) = y * log (max 1e-10 a) + (1 - y) * log (max (1 - a) 1e-10)
 | 
					          f (a, y) = y * log (max 1e-10 a) + (1 - y) * log (max (1 - a) 1e-10)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      train :: Input
 | 
					      train :: Input
 | 
				
			||||||
@@ -138,35 +199,137 @@ module Sibe
 | 
				
			|||||||
                o = fn y
 | 
					                o = fn y
 | 
				
			||||||
                (n', delta) = run o n
 | 
					                (n', delta) = run o n
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                de = delta * fn' y -- quadratic cost
 | 
					                de = delta * fn' y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                biases'  = biases  - scale alpha de
 | 
					                biases'  = biases  - cmap (*alpha) de
 | 
				
			||||||
                weights' = weights - scale alpha (input `outer` de)
 | 
					                weights' = weights - cmap (*alpha) (input `outer` de)
 | 
				
			||||||
                layer = Layer biases' weights' (fn, fn')
 | 
					                layer = Layer biases' weights' (fn, fn')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                pass = weights #> de
 | 
					                pass = weights #> de
 | 
				
			||||||
                -- pass = weights #> de
 | 
					                -- pass = weights #> de
 | 
				
			||||||
            in (layer :- n', pass)
 | 
					            in (layer :- n', pass)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      session :: [Input] -> Network -> [Output] -> Double -> (Int, Int) -> Network
 | 
					      {-trainMomentum :: Input
 | 
				
			||||||
      session inputs network labels alpha (iterations, epochs) =
 | 
					                    -> Network
 | 
				
			||||||
        let n = length inputs
 | 
					                    -> Output -- target
 | 
				
			||||||
            indexes = shuffle n (map (`mod` n) [0..n * epochs])
 | 
					                    -> Double -- learning rate
 | 
				
			||||||
        in foldl' iter network indexes
 | 
					                    -> (Double, Double) -- momentum
 | 
				
			||||||
 | 
					                    -> Network -- network's output
 | 
				
			||||||
 | 
					      trainMomentum input network target alpha (m, v) = fst $ run input network
 | 
				
			||||||
        where
 | 
					        where
 | 
				
			||||||
          iter net i =
 | 
					          run :: Input -> Network -> (Network, Vector Double)
 | 
				
			||||||
            let n = length inputs
 | 
					          run input (O l@(Layer biases weights (fn, fn'))) =
 | 
				
			||||||
                index = i `mod` n
 | 
					            let y = runLayer input l
 | 
				
			||||||
                input = inputs !! index
 | 
					                o = fn y
 | 
				
			||||||
                label = labels !! index
 | 
					                delta = o - target
 | 
				
			||||||
            in foldl' (\net _ -> train input net label alpha) net [0..iterations]
 | 
					                de = delta * fn' y
 | 
				
			||||||
 | 
					                v = 
 | 
				
			||||||
 | 
					                -- de = delta -- cross entropy cost
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      shuffle :: Seed -> [a] -> [a]
 | 
					                biases'  = biases  - scale alpha de
 | 
				
			||||||
      shuffle seed list =
 | 
					                weights' = weights - scale alpha (input `outer` de) -- small inputs learn slowly
 | 
				
			||||||
        let ords = map ord $ take (length list) (randomRs (0, 1) (mkStdGen seed) :: [Int])
 | 
					                layer    = Layer biases' weights' (fn, fn') -- updated layer
 | 
				
			||||||
        in map snd $ sortBy (\x y -> fst x) (zip ords list)
 | 
					
 | 
				
			||||||
        where ord x | x == 0 = LT
 | 
					                pass = weights #> de
 | 
				
			||||||
                    | x == 1 = GT
 | 
					                -- pass = weights #> de
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            in (O layer, pass)
 | 
				
			||||||
 | 
					          run input (l@(Layer biases weights (fn, fn')) :- n) =
 | 
				
			||||||
 | 
					            let y = runLayer input l
 | 
				
			||||||
 | 
					                o = fn y
 | 
				
			||||||
 | 
					                (n', delta) = run o n
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                de = delta * fn' y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                biases'  = biases  - cmap (*alpha) de
 | 
				
			||||||
 | 
					                weights' = weights - cmap (*alpha) (input `outer` de)
 | 
				
			||||||
 | 
					                layer = Layer biases' weights' (fn, fn')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                pass = weights #> de
 | 
				
			||||||
 | 
					                -- pass = weights #> de
 | 
				
			||||||
 | 
					            in (layer :- n', pass)-}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      gd :: Session -> IO Session
 | 
				
			||||||
 | 
					      gd session = do
 | 
				
			||||||
 | 
					        seed <- newStdGen
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let pairs = training session
 | 
				
			||||||
 | 
					            alpha = learningRate session
 | 
				
			||||||
 | 
					            net = network session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let n = length pairs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        shuffled <- shuffleM pairs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let newnet = foldl' (\n (input, label) -> train input n label alpha) net pairs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return session { network = newnet
 | 
				
			||||||
 | 
					                       , epoch = epoch session + 1
 | 
				
			||||||
 | 
					                       }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      sgd :: Session -> IO Session
 | 
				
			||||||
 | 
					      sgd session = do
 | 
				
			||||||
 | 
					        seed <- newStdGen
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let pairs = training session
 | 
				
			||||||
 | 
					            bsize = batchSize session
 | 
				
			||||||
 | 
					            alpha = learningRate session
 | 
				
			||||||
 | 
					            net = network session
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let n = length pairs
 | 
				
			||||||
 | 
					            iterations = n `div` bsize - 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        shuffled <- shuffleM pairs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let iter net i =
 | 
				
			||||||
 | 
					              let n = length pairs
 | 
				
			||||||
 | 
					                  batch = take bsize . drop (i * bsize) $ shuffled
 | 
				
			||||||
 | 
					                  batchInputs = map fst batch
 | 
				
			||||||
 | 
					                  batchLabels = map snd batch
 | 
				
			||||||
 | 
					                  batchPair = zip batchInputs batchLabels
 | 
				
			||||||
 | 
					              in foldl' (\n (input, label) -> train input n label alpha) net batchPair
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let newnet = foldl' iter net [0..iterations]
 | 
				
			||||||
 | 
					            cost = crossEntropy (session { network = newnet })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let el = map (\(e, l, _) -> (e, l)) (chart session)
 | 
				
			||||||
 | 
					            ea = map (\(e, _, a) -> (e, a)) (chart session)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        putStrLn $ (show $ epoch session) ++ " => " ++ (show cost) ++ " @ " ++ (show $ learningRate session)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        toFile Chart.def "sgd.png" $ do
 | 
				
			||||||
 | 
					          Chart.layoutlr_title Chart..= "loss over time"
 | 
				
			||||||
 | 
					          Chart.plotLeft (Chart.line "loss" [el])
 | 
				
			||||||
 | 
					          Chart.plotRight (Chart.line "learningRate" [ea])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return session { network = newnet
 | 
				
			||||||
 | 
					                       , epoch = epoch session + 1
 | 
				
			||||||
 | 
					                       , chart = (epoch session, cost, learningRate session):chart session
 | 
				
			||||||
 | 
					                       }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      accuracy :: Session -> Double
 | 
				
			||||||
 | 
					      accuracy session = 
 | 
				
			||||||
 | 
					        let inputs = map fst (test session)
 | 
				
			||||||
 | 
					            labels = map snd (test session)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            results = map (`forward` session) inputs
 | 
				
			||||||
 | 
					            rounded = map (map round . toList) results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            equals = zipWith (==) rounded (map (map round . toList) labels)
 | 
				
			||||||
 | 
					        in genericLength (filter (== True) equals) / genericLength inputs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      learningRateDecay :: (Double, Double) -> Session -> Session
 | 
				
			||||||
 | 
					      learningRateDecay (step, m) session =
 | 
				
			||||||
 | 
					        session { learningRate = max m $ learningRate session / step }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      run :: (Session -> IO Session)
 | 
				
			||||||
 | 
					          ->  Session -> IO Session
 | 
				
			||||||
 | 
					      run fn session = foldM (\s i -> fn s) session [0..epochs session]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      factorial :: Int -> Int
 | 
				
			||||||
 | 
					      factorial 0 = 1
 | 
				
			||||||
 | 
					      factorial x = x * factorial (x - 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      genSeed :: IO Seed
 | 
					      genSeed :: IO Seed
 | 
				
			||||||
      genSeed = do
 | 
					      genSeed = do
 | 
				
			||||||
@@ -176,12 +339,7 @@ module Sibe
 | 
				
			|||||||
      replaceVector :: Vector Double -> Int -> Double -> Vector Double
 | 
					      replaceVector :: Vector Double -> Int -> Double -> Vector Double
 | 
				
			||||||
      replaceVector vec index value =
 | 
					      replaceVector vec index value =
 | 
				
			||||||
        let list = toList vec
 | 
					        let list = toList vec
 | 
				
			||||||
        in fromList $ rrow index list
 | 
					        in fromList $ take index list ++ value : drop (index + 1) list
 | 
				
			||||||
        where
 | 
					 | 
				
			||||||
          rrow index [] = []
 | 
					 | 
				
			||||||
          rrow index (x:xs)
 | 
					 | 
				
			||||||
            | index == index = value:xs
 | 
					 | 
				
			||||||
            | otherwise = x : rrow (index + 1) xs
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      clip :: Double -> (Double, Double) -> Double
 | 
					      clip :: Double -> (Double, Double) -> Double
 | 
				
			||||||
      clip x (l, u) = min u (max l x)
 | 
					      clip x (l, u) = min u (max l x)
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										0
									
								
								src/Sibe/LogisticRegression.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/Sibe/LogisticRegression.hs
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										129
									
								
								src/Sibe/NLP.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								src/Sibe/NLP.hs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,129 @@
 | 
				
			|||||||
 | 
					module Sibe.NLP
 | 
				
			||||||
 | 
					  (Class,
 | 
				
			||||||
 | 
					   Document(..),
 | 
				
			||||||
 | 
					   ordNub,
 | 
				
			||||||
 | 
					   accuracy,
 | 
				
			||||||
 | 
					   recall,
 | 
				
			||||||
 | 
					   precision,
 | 
				
			||||||
 | 
					   fmeasure,
 | 
				
			||||||
 | 
					   cleanText,
 | 
				
			||||||
 | 
					   cleanDocuments,
 | 
				
			||||||
 | 
					   removeWords,
 | 
				
			||||||
 | 
					   removeStopwords,
 | 
				
			||||||
 | 
					   ngram,
 | 
				
			||||||
 | 
					   ngramText,
 | 
				
			||||||
 | 
					  )
 | 
				
			||||||
 | 
					  where
 | 
				
			||||||
 | 
					    import Data.List
 | 
				
			||||||
 | 
					    import Debug.Trace
 | 
				
			||||||
 | 
					    import qualified Data.Set as Set
 | 
				
			||||||
 | 
					    import Data.List.Split
 | 
				
			||||||
 | 
					    import Data.Maybe
 | 
				
			||||||
 | 
					    import Control.Arrow ((&&&))
 | 
				
			||||||
 | 
					    import Text.Regex.PCRE
 | 
				
			||||||
 | 
					    import Data.Char (isSpace, isNumber, toLower)
 | 
				
			||||||
 | 
					    import NLP.Stemmer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    type Class = Int;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    data Document = Document { text :: String
 | 
				
			||||||
 | 
					                             , c    :: Class
 | 
				
			||||||
 | 
					                             } deriving (Eq, Show, Read)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    cleanText :: String -> String
 | 
				
			||||||
 | 
					    cleanText string =
 | 
				
			||||||
 | 
					      let puncs = filter (`notElem` ['!', '"', '#', '$', '%', '(', ')', '.', '?']) (trim string)
 | 
				
			||||||
 | 
					          spacify = foldl (\acc x -> replace x ' ' acc) puncs [',', '/', '-', '\n', '\r']
 | 
				
			||||||
 | 
					          stemmed = unwords $ map (stem Porter) (words spacify)
 | 
				
			||||||
 | 
					          nonumber = filter (not . isNumber) stemmed
 | 
				
			||||||
 | 
					          lower = map toLower nonumber
 | 
				
			||||||
 | 
					      in (unwords . words) lower -- remove unnecessary spaces
 | 
				
			||||||
 | 
					      where
 | 
				
			||||||
 | 
					        trim = f . f
 | 
				
			||||||
 | 
					          where
 | 
				
			||||||
 | 
					            f = reverse . dropWhile isSpace
 | 
				
			||||||
 | 
					        replace needle replacement =
 | 
				
			||||||
 | 
					          map (\c -> if c == needle then replacement else c)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    cleanDocuments :: [Document] -> [Document]
 | 
				
			||||||
 | 
					    cleanDocuments documents =
 | 
				
			||||||
 | 
					      let cleaned = map (\(Document text c) -> Document (cleanText text) c) documents
 | 
				
			||||||
 | 
					      in cleaned
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    removeWords :: [String] -> [Document] -> [Document]
 | 
				
			||||||
 | 
					    removeWords ws documents =
 | 
				
			||||||
 | 
					      map (\(Document text c) -> Document (rm ws text) c) documents
 | 
				
			||||||
 | 
					      where
 | 
				
			||||||
 | 
					          rm list text =
 | 
				
			||||||
 | 
					            unwords $ filter (`notElem` list) (words text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    removeStopwords :: Int -> [Document] -> [Document]
 | 
				
			||||||
 | 
					    removeStopwords i documents =
 | 
				
			||||||
 | 
					      let wc = wordCounts (concatDocs documents)
 | 
				
			||||||
 | 
					          wlist = sortBy (\(_, a) (_, b) -> b `compare` a) wc
 | 
				
			||||||
 | 
					          stopwords = map fst (take i wlist)
 | 
				
			||||||
 | 
					      in removeWords stopwords documents
 | 
				
			||||||
 | 
					      where
 | 
				
			||||||
 | 
					        vocabulary x = ordNub (words x)
 | 
				
			||||||
 | 
					        countWordInDoc d w = genericLength (filter (==w) d)
 | 
				
			||||||
 | 
					        wordCounts x =
 | 
				
			||||||
 | 
					          let voc = vocabulary x
 | 
				
			||||||
 | 
					          in zip voc $ map (countWordInDoc (words x)) voc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        concatDocs = concatMap (\(Document text _) -> text ++ " ")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ordNub :: (Ord a) => [a] -> [a]
 | 
				
			||||||
 | 
					    ordNub = go Set.empty
 | 
				
			||||||
 | 
					      where
 | 
				
			||||||
 | 
					        go _ [] = []
 | 
				
			||||||
 | 
					        go s (x:xs) = if x `Set.member` s then go s xs
 | 
				
			||||||
 | 
					                                          else x : go (Set.insert x s) xs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    accuracy :: [(Int, (Int, Double))] -> Double
 | 
				
			||||||
 | 
					    accuracy results =
 | 
				
			||||||
 | 
					      let pairs = map (\(a, b) -> (a, fst b)) results
 | 
				
			||||||
 | 
					          correct = filter (uncurry (==)) pairs
 | 
				
			||||||
 | 
					      in genericLength correct / genericLength results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    recall :: [(Int, (Int, Double))] -> Double
 | 
				
			||||||
 | 
					    recall results =
 | 
				
			||||||
 | 
					      let classes = ordNub (map fst results)
 | 
				
			||||||
 | 
					          s = sum (map rec classes) / genericLength classes
 | 
				
			||||||
 | 
					      in s
 | 
				
			||||||
 | 
					      where
 | 
				
			||||||
 | 
					        rec a =
 | 
				
			||||||
 | 
					          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
 | 
				
			||||||
 | 
					              y = genericLength $ filter (\(c, (r, _)) -> c == a) results
 | 
				
			||||||
 | 
					          in t / y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    precision :: [(Int, (Int, Double))] -> Double
 | 
				
			||||||
 | 
					    precision results =
 | 
				
			||||||
 | 
					      let classes = ordNub (map fst results)
 | 
				
			||||||
 | 
					          s = sum (map prec classes) / genericLength classes
 | 
				
			||||||
 | 
					      in s
 | 
				
			||||||
 | 
					      where
 | 
				
			||||||
 | 
					        prec a =
 | 
				
			||||||
 | 
					          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
 | 
				
			||||||
 | 
					              y = genericLength $ filter (\(c, (r, _)) -> r == a) results
 | 
				
			||||||
 | 
					          in
 | 
				
			||||||
 | 
					            if y == 0
 | 
				
			||||||
 | 
					              then 0
 | 
				
			||||||
 | 
					              else t / y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fmeasure :: [(Int, (Int, Double))] -> Double
 | 
				
			||||||
 | 
					    fmeasure results =
 | 
				
			||||||
 | 
					      let r = recall results
 | 
				
			||||||
 | 
					          p = precision results
 | 
				
			||||||
 | 
					      in (2 * p * r) / (p + r)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ngram :: Int -> [Document] -> [Document]
 | 
				
			||||||
 | 
					    ngram n documents =
 | 
				
			||||||
 | 
					      map (\(Document text c) -> Document (ngramText n text) c) documents
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ngramText :: Int -> String -> String
 | 
				
			||||||
 | 
					    ngramText n text =
 | 
				
			||||||
 | 
					      let ws = words text
 | 
				
			||||||
 | 
					          pairs = zip [0..] ws
 | 
				
			||||||
 | 
					          grams = map (\(i, w) -> concat . intersperse "_" $ w:((take (n - 1) . drop (i+1)) ws)) pairs
 | 
				
			||||||
 | 
					      in unwords ("<b>_":grams)
 | 
				
			||||||
@@ -1,7 +1,7 @@
 | 
				
			|||||||
module Sibe.NaiveBayes
 | 
					module Sibe.NaiveBayes
 | 
				
			||||||
  (Document(..),
 | 
					  (Document(..),
 | 
				
			||||||
   NB(..),
 | 
					   NB(..),
 | 
				
			||||||
   train,
 | 
					   initialize,
 | 
				
			||||||
   run,
 | 
					   run,
 | 
				
			||||||
   session,
 | 
					   session,
 | 
				
			||||||
   ordNub,
 | 
					   ordNub,
 | 
				
			||||||
@@ -19,21 +19,13 @@ module Sibe.NaiveBayes
 | 
				
			|||||||
   removeStopwords,
 | 
					   removeStopwords,
 | 
				
			||||||
  )
 | 
					  )
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
 | 
					    import Sibe.NLP
 | 
				
			||||||
    import Data.List
 | 
					    import Data.List
 | 
				
			||||||
    import Debug.Trace
 | 
					    import Debug.Trace
 | 
				
			||||||
    import qualified Data.Set as Set
 | 
					    import qualified Data.Set as Set
 | 
				
			||||||
    import Data.List.Split
 | 
					    import Data.List.Split
 | 
				
			||||||
    import Data.Maybe
 | 
					    import Data.Maybe
 | 
				
			||||||
    import Control.Arrow ((&&&))
 | 
					    import Control.Arrow ((&&&))
 | 
				
			||||||
    import Text.Regex.PCRE
 | 
					 | 
				
			||||||
    import Data.Char (isSpace, isNumber, toLower)
 | 
					 | 
				
			||||||
    import NLP.Stemmer
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    type Class = Int;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    data Document = Document { text :: String
 | 
					 | 
				
			||||||
                             , c    :: Class
 | 
					 | 
				
			||||||
                             } deriving (Eq, Show, Read)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    data NB = NB { documents  :: [Document]
 | 
					    data NB = NB { documents  :: [Document]
 | 
				
			||||||
                 , classes    :: [(Class, Double)]
 | 
					                 , classes    :: [(Class, Double)]
 | 
				
			||||||
@@ -44,8 +36,8 @@ module Sibe.NaiveBayes
 | 
				
			|||||||
                 , cgram      :: [(Class, [(String, Int)])]
 | 
					                 , cgram      :: [(Class, [(String, Int)])]
 | 
				
			||||||
                 } deriving (Eq, Show, Read)
 | 
					                 } deriving (Eq, Show, Read)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    train :: [Document] -> [Class] -> NB
 | 
					    initialize :: [Document] -> [Class] -> NB
 | 
				
			||||||
    train documents classes =
 | 
					    initialize documents classes =
 | 
				
			||||||
      let megadoc = concatDocs documents
 | 
					      let megadoc = concatDocs documents
 | 
				
			||||||
          vocabulary = genericLength ((ordNub . words) megadoc)
 | 
					          vocabulary = genericLength ((ordNub . words) megadoc)
 | 
				
			||||||
          -- (class, prior probability)
 | 
					          -- (class, prior probability)
 | 
				
			||||||
@@ -83,17 +75,6 @@ module Sibe.NaiveBayes
 | 
				
			|||||||
        classWordsCounts x = wordsCount (classWords x) (classVocabulary x)
 | 
					        classWordsCounts x = wordsCount (classWords x) (classVocabulary x)
 | 
				
			||||||
        classNGramCounts x = wordsCount (classNGramWords x) (ordNub $ classNGramWords x)
 | 
					        classNGramCounts x = wordsCount (classNGramWords x) (ordNub $ classNGramWords x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ngram :: Int -> [Document] -> [Document]
 | 
					 | 
				
			||||||
    ngram n documents =
 | 
					 | 
				
			||||||
      map (\(Document text c) -> Document (ngramText n text) c) documents
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    ngramText :: Int -> String -> String
 | 
					 | 
				
			||||||
    ngramText n text =
 | 
					 | 
				
			||||||
      let ws = words text
 | 
					 | 
				
			||||||
          pairs = zip [0..] ws
 | 
					 | 
				
			||||||
          grams = map (\(i, w) -> concat . intersperse "_" $ w:((take (n - 1) . drop (i+1)) ws)) pairs
 | 
					 | 
				
			||||||
      in unwords ("<b>_":grams)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    session :: [Document] -> NB -> [(Class, (Class, Double))]
 | 
					    session :: [Document] -> NB -> [(Class, (Class, Double))]
 | 
				
			||||||
    session docs nb =
 | 
					    session docs nb =
 | 
				
			||||||
      let results = map (\(Document text c) -> (c, run text nb)) docs
 | 
					      let results = map (\(Document text c) -> (c, run text nb)) docs
 | 
				
			||||||
@@ -143,91 +124,5 @@ module Sibe.NaiveBayes
 | 
				
			|||||||
          variance = sum (map ((^2) . subtract avg) x) / (genericLength x - 1)
 | 
					          variance = sum (map ((^2) . subtract avg) x) / (genericLength x - 1)
 | 
				
			||||||
      in sqrt variance
 | 
					      in sqrt variance
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cleanText :: String -> String
 | 
					 | 
				
			||||||
    cleanText string =
 | 
					 | 
				
			||||||
      let puncs = filter (`notElem` ['!', '"', '#', '$', '%', '(', ')', '.', '?']) (trim string)
 | 
					 | 
				
			||||||
          spacify = foldl (\acc x -> replace x ' ' acc) puncs [',', '/', '-', '\n', '\r']
 | 
					 | 
				
			||||||
          stemmed = unwords $ map (stem Porter) (words spacify)
 | 
					 | 
				
			||||||
          nonumber = filter (not . isNumber) stemmed
 | 
					 | 
				
			||||||
          lower = map toLower nonumber
 | 
					 | 
				
			||||||
      in (unwords . words) lower -- remove unnecessary spaces
 | 
					 | 
				
			||||||
      where
 | 
					 | 
				
			||||||
        trim = f . f
 | 
					 | 
				
			||||||
          where
 | 
					 | 
				
			||||||
            f = reverse . dropWhile isSpace
 | 
					 | 
				
			||||||
        replace needle replacement =
 | 
					 | 
				
			||||||
          map (\c -> if c == needle then replacement else c)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    cleanDocuments :: [Document] -> [Document]
 | 
					 | 
				
			||||||
    cleanDocuments documents =
 | 
					 | 
				
			||||||
      let cleaned = map (\(Document text c) -> Document (cleanText text) c) documents
 | 
					 | 
				
			||||||
      in cleaned
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    removeWords :: [String] -> [Document] -> [Document]
 | 
					 | 
				
			||||||
    removeWords ws documents =
 | 
					 | 
				
			||||||
      map (\(Document text c) -> Document (rm ws text) c) documents
 | 
					 | 
				
			||||||
      where
 | 
					 | 
				
			||||||
          rm list text =
 | 
					 | 
				
			||||||
            unwords $ filter (`notElem` list) (words text)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    removeStopwords :: Int -> [Document] -> [Document]
 | 
					 | 
				
			||||||
    removeStopwords i documents =
 | 
					 | 
				
			||||||
      let wc = wordCounts (concatDocs documents)
 | 
					 | 
				
			||||||
          wlist = sortBy (\(_, a) (_, b) -> b `compare` a) wc
 | 
					 | 
				
			||||||
          stopwords = map fst (take i wlist)
 | 
					 | 
				
			||||||
      in removeWords stopwords documents
 | 
					 | 
				
			||||||
      where
 | 
					 | 
				
			||||||
        vocabulary x = ordNub (words x)
 | 
					 | 
				
			||||||
        countWordInDoc d w = genericLength (filter (==w) d)
 | 
					 | 
				
			||||||
        wordCounts x =
 | 
					 | 
				
			||||||
          let voc = vocabulary x
 | 
					 | 
				
			||||||
          in zip voc $ map (countWordInDoc (words x)) voc
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        concatDocs = concatMap (\(Document text _) -> text ++ " ")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    l :: (Show a) => a -> a
 | 
					    l :: (Show a) => a -> a
 | 
				
			||||||
    l a = trace (show a) a
 | 
					    l a = trace (show a) a
 | 
				
			||||||
 | 
					 | 
				
			||||||
    ordNub :: (Ord a) => [a] -> [a]
 | 
					 | 
				
			||||||
    ordNub = go Set.empty
 | 
					 | 
				
			||||||
      where
 | 
					 | 
				
			||||||
        go _ [] = []
 | 
					 | 
				
			||||||
        go s (x:xs) = if x `Set.member` s then go s xs
 | 
					 | 
				
			||||||
                                          else x : go (Set.insert x s) xs
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    accuracy :: [(Int, (Int, Double))] -> Double
 | 
					 | 
				
			||||||
    accuracy results =
 | 
					 | 
				
			||||||
      let pairs = map (\(a, b) -> (a, fst b)) results
 | 
					 | 
				
			||||||
          correct = filter (uncurry (==)) pairs
 | 
					 | 
				
			||||||
      in genericLength correct / genericLength results
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    recall :: [(Int, (Int, Double))] -> Double
 | 
					 | 
				
			||||||
    recall results =
 | 
					 | 
				
			||||||
      let classes = ordNub (map fst results)
 | 
					 | 
				
			||||||
          s = sum (map rec classes) / genericLength classes
 | 
					 | 
				
			||||||
      in s
 | 
					 | 
				
			||||||
      where
 | 
					 | 
				
			||||||
        rec a =
 | 
					 | 
				
			||||||
          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
 | 
					 | 
				
			||||||
              y = genericLength $ filter (\(c, (r, _)) -> c == a) results
 | 
					 | 
				
			||||||
          in t / y
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    precision :: [(Int, (Int, Double))] -> Double
 | 
					 | 
				
			||||||
    precision results =
 | 
					 | 
				
			||||||
      let classes = ordNub (map fst results)
 | 
					 | 
				
			||||||
          s = sum (map prec classes) / genericLength classes
 | 
					 | 
				
			||||||
      in s
 | 
					 | 
				
			||||||
      where
 | 
					 | 
				
			||||||
        prec a =
 | 
					 | 
				
			||||||
          let t = genericLength $ filter (\(c, (r, _)) -> c == r && c == a) results
 | 
					 | 
				
			||||||
              y = genericLength $ filter (\(c, (r, _)) -> r == a) results
 | 
					 | 
				
			||||||
          in
 | 
					 | 
				
			||||||
            if y == 0
 | 
					 | 
				
			||||||
              then 0
 | 
					 | 
				
			||||||
              else t / y
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fmeasure :: [(Int, (Int, Double))] -> Double
 | 
					 | 
				
			||||||
    fmeasure results =
 | 
					 | 
				
			||||||
      let r = recall results
 | 
					 | 
				
			||||||
          p = precision results
 | 
					 | 
				
			||||||
      in (2 * p * r) / (p + r)
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										74
									
								
								stack.yaml
									
									
									
									
									
								
							
							
						
						
									
										74
									
								
								stack.yaml
									
									
									
									
									
								
							@@ -1,40 +1,5 @@
 | 
				
			|||||||
# This file was automatically generated by 'stack init'
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Some commonly used options have been documented as comments in this file.
 | 
					 | 
				
			||||||
# For advanced use and comprehensive documentation of the format, please see:
 | 
					 | 
				
			||||||
# http://docs.haskellstack.org/en/stable/yaml_configuration/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
 | 
					 | 
				
			||||||
# A snapshot resolver dictates the compiler version and the set of packages
 | 
					 | 
				
			||||||
# to be used for project dependencies. For example:
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# resolver: lts-3.5
 | 
					 | 
				
			||||||
# resolver: nightly-2015-09-21
 | 
					 | 
				
			||||||
# resolver: ghc-7.10.2
 | 
					 | 
				
			||||||
# resolver: ghcjs-0.1.0_ghc-7.10.2
 | 
					 | 
				
			||||||
# resolver:
 | 
					 | 
				
			||||||
#  name: custom-snapshot
 | 
					 | 
				
			||||||
#  location: "./custom-snapshot.yaml"
 | 
					 | 
				
			||||||
resolver: lts-6.7
 | 
					resolver: lts-6.7
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# User packages to be built.
 | 
					 | 
				
			||||||
# Various formats can be used as shown in the example below.
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# packages:
 | 
					 | 
				
			||||||
# - some-directory
 | 
					 | 
				
			||||||
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
 | 
					 | 
				
			||||||
# - location:
 | 
					 | 
				
			||||||
#    git: https://github.com/commercialhaskell/stack.git
 | 
					 | 
				
			||||||
#    commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
 | 
					 | 
				
			||||||
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
 | 
					 | 
				
			||||||
#   extra-dep: true
 | 
					 | 
				
			||||||
#  subdirs:
 | 
					 | 
				
			||||||
#  - auto-update
 | 
					 | 
				
			||||||
#  - wai
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# A package marked 'extra-dep: true' will only be built if demanded by a
 | 
					 | 
				
			||||||
# non-dependency (i.e. a user package), and its test suites and benchmarks
 | 
					 | 
				
			||||||
# will not be run. This is useful for tweaking upstream packages.
 | 
					 | 
				
			||||||
packages:
 | 
					packages:
 | 
				
			||||||
- location:
 | 
					- location:
 | 
				
			||||||
    git: git@github.com:albertoruiz/hmatrix.git
 | 
					    git: git@github.com:albertoruiz/hmatrix.git
 | 
				
			||||||
@@ -42,36 +7,11 @@ packages:
 | 
				
			|||||||
  subdirs:
 | 
					  subdirs:
 | 
				
			||||||
    - packages/base
 | 
					    - packages/base
 | 
				
			||||||
- .
 | 
					- .
 | 
				
			||||||
- http://hackage.haskell.org/package/containers-0.5.7.1/containers-0.5.7.1.tar.gz
 | 
					 | 
				
			||||||
- http://hackage.haskell.org/package/text-1.2.2.1/text-1.2.2.1.tar.gz
 | 
					 | 
				
			||||||
- http://hackage.haskell.org/package/stemmer-0.5.2/stemmer-0.5.2.tar.gz
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Dependency packages to be pulled from upstream that are not in the resolver
 | 
					extra-deps:
 | 
				
			||||||
# (e.g., acme-missiles-0.3)
 | 
					  - directory-1.2.7.0
 | 
				
			||||||
extra-deps: []
 | 
					  - text-1.2.2.1
 | 
				
			||||||
 | 
					  - stemmer-0.5.2
 | 
				
			||||||
# Override default flag values for local packages and extra-deps
 | 
					  - containers-0.5.7.1
 | 
				
			||||||
flags: {}
 | 
					  - Chart-1.8
 | 
				
			||||||
 | 
					  - Chart-cairo-1.8
 | 
				
			||||||
# Extra package databases containing global packages
 | 
					 | 
				
			||||||
extra-package-dbs: []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Control whether we use the GHC we find on the path
 | 
					 | 
				
			||||||
# system-ghc: true
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Require a specific version of stack, using version ranges
 | 
					 | 
				
			||||||
# require-stack-version: -any # Default
 | 
					 | 
				
			||||||
# require-stack-version: ">=1.1"
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Override the architecture used by stack, especially useful on Windows
 | 
					 | 
				
			||||||
# arch: i386
 | 
					 | 
				
			||||||
# arch: x86_64
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Extra directories used by stack for building
 | 
					 | 
				
			||||||
# extra-include-dirs: [/path/to/dir]
 | 
					 | 
				
			||||||
# extra-lib-dirs: [/path/to/dir]
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Allow a newer minor version of GHC than the snapshot specifies
 | 
					 | 
				
			||||||
# compiler-check: newer-minor
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
system-ghc: false
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user