diff --git a/README.md b/README.md
index fa44744..8a4f5d6 100644
--- a/README.md
+++ b/README.md
@@ -41,8 +41,11 @@ stack exec example-xor
 
 # Naive Bayes document classifier, using Reuters dataset, achieves ~62% accuracy
 # using Porter stemming, stopword elimination and a few custom techniques.
-# the dataset is imbalanced which causes the classifier to be biased towards some classes (earn, acq, ...)
+# The dataset is imbalanced which causes the classifier to be biased towards some classes (earn, acq, ...)
+# to workaround the imbalanced dataset problem, there is a --top-ten option which classifies only top 10 popular
+# classes, with evenly split datasets (100 for each)
 # N-Grams don't seem to help us much here (or maybe my implementation is wrong!), using bigrams increases
 # accuracy, while decreasing F-Measure slightly.
 stack exec example-naivebayes-doc-classifier -- --verbose
+stack exec example-naivebayes-doc-classifier -- --verbose --top-ten
 ```
diff --git a/examples/doc-classifier-data b/examples/doc-classifier-data
deleted file mode 160000
index 5b069a5..0000000
--- a/examples/doc-classifier-data
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 5b069a54a6a68efee0ef4bb15c1aa56414f12c28
diff --git a/examples/naivebayes-doc-classifier.hs b/examples/naivebayes-doc-classifier.hs
index a16071e..f0d4985 100644
--- a/examples/naivebayes-doc-classifier.hs
+++ b/examples/naivebayes-doc-classifier.hs
@@ -8,7 +8,8 @@ module Main
     import Debug.Trace
     import Data.List.Split
     import Control.Arrow ((&&&))
-    import Control.Monad (when)
+    import Control.Monad (when, unless)
+    import Data.Function (on)
     import System.Environment
 
     main = do
@@ -19,32 +20,47 @@ module Main
       classes <- map (filter (/= ' ')) . lines <$> readFile "examples/doc-classifier-data/data-classes"
       sws <- lines <$> readFile "examples/stopwords"
 
-      let verbose = or [elem "-v" args, elem "--verbose" args]
-      when (not verbose) $ putStrLn "use --verbose to print more information"
+      let verbose = elem "-v" args || elem "--verbose" args
+          topten  = elem "-10" args || elem "--top-ten" args
+      unless verbose $ putStrLn "use --verbose to print more information"
 
       let intClasses = [0..length classes - 1]
-          documents = cleanDocuments $ removeWords sws $ createDocuments classes dataset
+          documents = cleanDocuments . removeWords sws $ createDocuments classes dataset
           testDocuments = cleanDocuments $ createDocuments classes test
-          devTestDocuments = take 30 testDocuments
+
           nb = train documents intClasses
 
-          results = session testDocuments nb
+          -- top-ten
+          topClasses = take 10 . reverse $ sortBy (compare `on` (length . snd)) (cd nb)
+          filtered = map (\(c, ds) -> (c, take 100 ds)) topClasses
+          filteredClasses = map fst filtered
+          ttDocs = concatMap snd filtered
+          ttNB = train ttDocs filteredClasses
+
+          ttTestDocuments = filter ((`elem` filteredClasses) . c) . cleanDocuments $ createDocuments classes test
+
+          ttResults = session ttTestDocuments ttNB
+          normalResults = session testDocuments nb
+          results = if topten then ttResults else normalResults
+
+          iClasses = if topten then filteredClasses else intClasses
           -- results = session devTestDocuments nb
 
-      when verbose $ print (text $ head documents)
+      when verbose . putStrLn $ "# Example of cleaned document:\n" ++ (show . text $ head documents)
 
       let showResults (c, (r, confidence)) = putStrLn (classes !! c ++ " ~ " ++ classes !! r)
       when verbose $ mapM_ showResults results
 
-      when verbose $
+      when (verbose && not topten) .
         putStrLn $ "The training data is imbalanced which causes the classifier to be biased towards\n"
                 ++ "some classes, `earn` is an example, the class alone has around 90% accuracy while\n"
                 ++ "the rest of classes have a much lower accuracy and it's commonly seen that most inputs\n"
                 ++ "are incorrectly classified as `earn`.\n"
+                ++ "Try running with --top-ten to classify top 10 classes by using evenly split documents\n"
 
       let
         accuracies =
-          let as = zip intClasses $ map (\c -> filter ((==c) . fst) results) intClasses
+          let as = zip iClasses $ map (\c -> filter ((==c) . fst) results) iClasses
               av = filter (not . null . snd) as
               calculated = map (fst &&& accuracy . snd) av
           in sortBy (\(_, a) (_, b) -> b `compare` a) calculated
diff --git a/examples/sentiment-analysis-data b/examples/sentiment-analysis-data
deleted file mode 120000
index dbd75b3..0000000
--- a/examples/sentiment-analysis-data
+++ /dev/null
@@ -1 +0,0 @@
-../../sibe-repos/sentiment-analysis-data
\ No newline at end of file
diff --git a/examples/sentiment-analysis-data b/examples/sentiment-analysis-data
new file mode 100755
index 0000000..f0a3f50
--- /dev/null
+++ b/examples/sentiment-analysis-data
@@ -0,0 +1,5 @@
+XSym
+0040
+3666c4cacaf995ebd11ef25aab70de99
+../../sibe-repos/sentiment-analysis-data
+                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
\ No newline at end of file
diff --git a/src/Sibe/NaiveBayes.hs b/src/Sibe/NaiveBayes.hs
index 4d72968..7afe1f6 100644
--- a/src/Sibe/NaiveBayes.hs
+++ b/src/Sibe/NaiveBayes.hs
@@ -14,6 +14,7 @@ module Sibe.NaiveBayes
    cleanText,
    cleanDocuments,
    ngram,
+   ngramText,
    removeWords,
    removeStopwords,
   )
@@ -84,13 +85,14 @@ module Sibe.NaiveBayes
 
     ngram :: Int -> [Document] -> [Document]
     ngram n documents =
-      map (\(Document text c) -> Document (helper text) c) documents
-      where
-        helper text =
-          let ws = words text
-              pairs = zip [0..] ws
-              grams = map (\(i, w) -> concat . intersperse "_" $ w:((take (n - 1) . drop (i+1)) ws)) pairs
-          in unwords ("<b>":grams)
+      map (\(Document text c) -> Document (ngramText n text) c) documents
+
+    ngramText :: Int -> String -> String
+    ngramText n text =
+      let ws = words text
+          pairs = zip [0..] ws
+          grams = map (\(i, w) -> concat . intersperse "_" $ w:((take (n - 1) . drop (i+1)) ws)) pairs
+      in unwords ("<b>_":grams)
 
     session :: [Document] -> NB -> [(Class, (Class, Double))]
     session docs nb =
@@ -102,10 +104,10 @@ module Sibe.NaiveBayes
       let scores = map (score . fst) classes
           index = argmax scores
           m = maximum scores
-      in (index, m)
+      in (fst (classes !! index), m)
       where
         score c =
-          let prior = snd (classes !! c)
+          let prior = snd (fromJust $ find ((==c) . fst) classes)
 
           -- below is the formula according to Multinominal Naive Bayes, but it seems
           -- using a uniform prior probability seems to work better when working with imbalanced
@@ -118,11 +120,13 @@ module Sibe.NaiveBayes
           in alpha * product (map (prob c) (words txt))
 
         prob c w =
-          let tctM = find ((== w) . fst) (snd (cw !! c))
+          let fcw  = fromJust $ find ((==c) . fst) cw
+              fcg  = fromJust $ find ((==c) . fst) cgram
+              tctM = find ((== w) . fst) (snd fcw)
               tct  = if isJust tctM then (snd . fromJust) tctM else 0
-              cvoc = sum $ map snd (snd (cw !! c))
+              cvoc = sum $ map snd (snd fcw)
               voc  = vocabulary
-              gram = find ((==w) . last . splitOn "_" . fst) (snd (cgram !! c))
+              gram = find ((==w) . last . splitOn "_" . fst) (snd fcg)
               pg   = if isJust gram then (snd . fromJust) gram else 0
           -- in realToFrac (tct * pg + 1) / realToFrac (cvoc + voc) -- uncomment to enable ngrams
           in realToFrac (tct + 1) / realToFrac (cvoc + voc)