feat(stopwords): removeWords and removeStopwords functions as pre-processors

feat(confidence, WIP): calculate confidence of each classification
This commit is contained in:
Mahdi Dibaiee
2016-08-08 10:02:26 +04:30
parent ea1f05f001
commit 099c25e166
3 changed files with 742 additions and 52 deletions

View File

@ -14,26 +14,20 @@ module Main
test <- readFile "examples/doc-classifier-data/data-reuters-test"
classes <- map (filter (/= ' ')) . lines <$> readFile "examples/doc-classifier-data/data-classes"
sws <- lines <$> readFile "examples/stopwords"
let intClasses = [0..length classes - 1]
-- let intClasses = [0, 1]
documents = cleanDocuments $ createDocuments classes dataset
-- documents = [Document "Chinese Beijing Chinese" 0,
-- Document "Chinese Chinese Shanghai" 0,
-- Document "Chinese Macao" 0,
-- Document "Japan Tokyo Chinese" 1]
-- testDocuments = [Document "Chinese Chinese Chinese Japan Tokyo" 0]
documents = cleanDocuments $ removeWords sws $ createDocuments classes dataset
testDocuments = cleanDocuments $ createDocuments classes test
devTestDocuments = take 30 testDocuments
-- devTestDocuments = [Document "Chinese Chinese Chinese Tokyo Japan" 0]
nb = train documents intClasses
results = map (\(Document text c) -> (c, run text nb)) testDocuments
-- results = map (\(Document text c) -> (c, run text nb)) devTestDocuments
-- print (text $ head documents)
print (text $ head documents)
let showResults (c, r) = putStrLn (classes !! c ++ " ~ " ++ classes !! r)
let showResults (c, (r, confidence)) = putStrLn (classes !! c ++ " ~ " ++ classes !! r)
mapM_ showResults results
putStrLn $ "Recall: " ++ show (recall results)