module Main where -- import Sibe import Sibe.NaiveBayes import Text.Printf import Data.List import Data.Maybe import Debug.Trace import Data.List.Split import Control.Arrow ((&&&)) main = do dataset <- readFile "examples/doc-classifier-data/data-reuters" test <- readFile "examples/doc-classifier-data/data-reuters-test" classes <- map (filter (/= ' ')) . lines <$> readFile "examples/doc-classifier-data/data-classes" let intClasses = [0..length classes - 1] -- let intClasses = [0, 1] documents = cleanDocuments $ createDocuments classes dataset -- documents = [Document "Chinese Beijing Chinese" 0, -- Document "Chinese Chinese Shanghai" 0, -- Document "Chinese Macao" 0, -- Document "Japan Tokyo Chinese" 1] -- testDocuments = [Document "Chinese Chinese Chinese Japan Tokyo" 0] testDocuments = cleanDocuments $ createDocuments classes test devTestDocuments = take 30 testDocuments -- devTestDocuments = [Document "Chinese Chinese Chinese Tokyo Japan" 0] nb = train documents intClasses results = map (\(Document text c) -> (c, run text nb)) testDocuments -- results = map (\(Document text c) -> (c, run text nb)) devTestDocuments -- print (text $ head documents) let showResults (c, r) = putStrLn (classes !! c ++ " ~ " ++ classes !! r) mapM_ showResults results putStrLn $ "Recall: " ++ show (recall results) putStrLn $ "Precision: " ++ show (precision results) putStrLn $ "F Measure: " ++ show (fmeasure results) putStrLn $ "Accuracy: " ++ show (accuracy results) createDocuments classes content = let splitted = splitOn (replicate 10 '-' ++ "\n") content pairs = map ((head . lines) &&& (unwords . tail . lines)) splitted documents = map (\(topic, text) -> Document text (fromJust $ elemIndex topic classes)) pairs in documents