diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ff2a935 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "examples/doc-classifier-data"] + path = examples/doc-classifier-data + url = git@github.com:mdibaiee/doc-classifier-data diff --git a/README.md b/README.md index 02c4bfe..e87d5d1 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ module Main where network = session inputs rnetwork labels learning_rate (iterations, epochs) -- run inputs through the trained network + -- note: here we are using the examples in the training dataset to test the network, + -- this is here just to demonstrate the way the library works, you should not do this results = map (`forward` network) inputs -- compute the new cost diff --git a/examples/doc-classifier-data b/examples/doc-classifier-data new file mode 160000 index 0000000..5b069a5 --- /dev/null +++ b/examples/doc-classifier-data @@ -0,0 +1 @@ +Subproject commit 5b069a54a6a68efee0ef4bb15c1aa56414f12c28 diff --git a/examples/naivebayes-doc-classifier.hs b/examples/naivebayes-doc-classifier.hs index d9026fd..85098a4 100644 --- a/examples/naivebayes-doc-classifier.hs +++ b/examples/naivebayes-doc-classifier.hs @@ -6,24 +6,24 @@ module Main import Data.List import Data.Maybe import Debug.Trace + import Data.List.Split main = do - dataset <- readFile "examples/naivebayes-doc-classifier/data-reuters" - test <- readFile "examples/naivebayes-doc-classifier/data-reuters-test" + dataset <- readFile "examples/doc-classifier-data/data-reuters" + test <- readFile "examples/doc-classifier-data/data-reuters-test" - classes <- map (filter (/= ' ')) . lines <$> readFile "examples/naivebayes-doc-classifier/data-classes" + classes <- map (filter (/= ' ')) . lines <$> readFile "examples/doc-classifier-data/data-classes" let intClasses = [0..length classes - 1] documents = createDocuments classes dataset testDocuments = createDocuments classes test nb = initialize documents - let testResults (Document text c) = - let r = determine text nb intClasses documents - in trace (classes !! c ++ " ~ " ++ classes !! r) c == r + results = map (\(Document text c) -> (c, determine text nb intClasses documents)) testDocuments - let results = map testResults testDocuments + let showResults (c, r) = putStrLn (classes !! c ++ " ~ " ++ classes !! r) + mapM_ showResults results - putStr "Accuracy: " - putStr . show . round $ (genericLength (filter (==True) results) / genericLength results) * 100 - putStrLn "%" + putStrLn $ "Recall: " ++ show (recall results) ++ "%" + putStrLn $ "Precision: " ++ show (precision results) ++ "%" + putStrLn $ "Accuracy: " ++ show (accuracy results) ++ "%" diff --git a/sibe.cabal b/sibe.cabal index 2528066..4a74e99 100644 --- a/sibe.cabal +++ b/sibe.cabal @@ -50,6 +50,7 @@ executable example-naivebayes-doc-classifier , sibe , hmatrix , containers + , split default-language: Haskell2010 test-suite sibe-test diff --git a/src/Sibe/NaiveBayes.hs b/src/Sibe/NaiveBayes.hs index 7c840f8..54348cb 100644 --- a/src/Sibe/NaiveBayes.hs +++ b/src/Sibe/NaiveBayes.hs @@ -4,7 +4,11 @@ module Sibe.NaiveBayes createDocuments, initialize, calculate, - determine + determine, + ordNub, + accuracy, + precision, + recall, ) where import Data.List @@ -63,3 +67,30 @@ module Sibe.NaiveBayes go _ [] = [] go s (x:xs) = if x `Set.member` s then go s xs else x : go (Set.insert x s) xs + + accuracy :: [(Int, Int)] -> Int + accuracy results = + let correct = filter (uncurry (==)) results + in round $ genericLength correct / genericLength results * 100 + + recall :: [(Int, Int)] -> Int + recall results = + let classes = ordNub (map fst results) + s = sum (map rec classes) / genericLength results + in round $ s * 100 + where + rec a = + let t = genericLength $ filter (\(c, r) -> c == r && c == a) results + y = genericLength $ filter (\(c, r) -> c == a) results + in t / y + + precision :: [(Int, Int)] -> Int + precision results = + let classes = ordNub (map fst results) + s = sum (map prec classes) / genericLength results + in round $ s * 100 + where + prec a = + let t = genericLength $ filter (\(c, r) -> c == r && c == a) results + y = genericLength $ filter (\(c, r) -> r == a) results + in t / y