hs-huffman/index.hs
2016-03-18 13:37:20 +03:30

82 lines
3.3 KiB
Haskell

module Huffman ( tree, charSequence, decode )
where
import Data.List
import Data.Function (on)
import Node
import qualified Data.Map as Map
import Data.Maybe
import Debug.Trace
import Data.Char (chr)
pEOF = chr 999
-- Create initial nodes (forest of trees) for each symbol
createNodes :: String -> [Node]
createNodes s = map (\ch -> createLeaf ch $ charWeight s ch) $ nub s
-- Coding table generation step (recursive)
step :: [Node] -> Node
step nodes = let cut = (tail . tail) sorted
in
if length nodes > 1 then
step (merge:cut)
else
head nodes
where
sorted = sortBy (compare `on` weight) nodes
merge = Node { symbol = Nothing, weight = sumWeights, left = Just first, right = Just second }
first = head sorted
second = (head . tail) sorted
sumWeights = weight first + weight second
tree :: String -> Node
tree = step . createNodes
-- Root-to-leaf search, find a character's sequence string
charSequence :: Node -> Char -> String
charSequence node ch =
fromJust $ helper node ch ""
where
helper (Node { symbol = s, left = l, right = r }) ch sequ
| (isJust s) && (fromJust s == ch) = Just sequ
| isNothing l && isNothing r = Nothing
| otherwise = let leftPath = helper (fromJust l) ch (sequ ++ "0")
rightPath = helper (fromJust r) ch (sequ ++ "1")
in if isJust leftPath then leftPath else rightPath
-- Root-to-leaf search, find a character based on sequence string
findChar :: Node -> String -> Maybe Char
findChar n@(Node { symbol = s, left = l, right = r}) sequ
| length sequ > 0 = let path = if head sequ == '0' then l else r
in if isJust path then
findChar (fromJust path) (tail sequ)
else
Nothing
| otherwise = if isJust s then s else Nothing
-- Encode an string into huffman coding
encode :: String -> String
encode input = let t = tree input
table = charTable t input
in concat $ map (\a -> fromJust $ Map.lookup a table) input
-- Character table, a Map representing each character's bit sequence
charTable :: Node -> String -> Map.Map Char String
charTable t input = Map.fromList $ map (\a -> (a, charSequence t a)) (nub input)
-- Each character's weight in string
charWeight :: String -> Char -> Float
charWeight s x = genericLength $ filter (==x) s
-- Decode a string, given the tree representing it
decode :: Node -> String -> String
decode t input = let (valid, next) = span (isNothing . findChar t) $ inits input
sequ = (head next)
ninput = (tails input) !! (length valid)
ch = fromJust $ findChar t sequ
in if length ninput > 0 then
ch:(decode t ninput)
else
[ch]