hs-huffman/index.hs

82 lines
3.2 KiB
Haskell
Raw Normal View History

2016-03-18 10:07:20 +00:00
module Huffman ( tree, charSequence, decode )
where
import Data.List
import Data.Function (on)
import Node
import qualified Data.Map as Map
import Data.Maybe
import Debug.Trace
import Data.Char (chr)
pEOF = chr 999
-- Create initial nodes (forest of trees) for each symbol
createNodes :: String -> [Node]
createNodes s = map (\ch -> createLeaf ch $ charWeight s ch) $ nub s
-- Coding table generation step (recursive)
step :: [Node] -> Node
step nodes = let cut = (tail . tail) sorted
in
if length nodes > 1 then
step (merge:cut)
else
head nodes
where
sorted = sortBy (compare `on` weight) nodes
merge = Node { symbol = Nothing, weight = sumWeights, left = Just first, right = Just second }
first = head sorted
second = (head . tail) sorted
sumWeights = weight first + weight second
tree :: String -> Node
tree = step . createNodes
-- Root-to-leaf search, find a character's sequence string
charSequence :: Node -> Char -> String
charSequence node ch =
fromJust $ helper node ch ""
where
2016-03-18 14:21:36 +00:00
helper Node { symbol = s, left = l, right = r } ch sequ
| s == Just ch = Just sequ
2016-03-18 10:07:20 +00:00
| isNothing l && isNothing r = Nothing
| otherwise = let leftPath = helper (fromJust l) ch (sequ ++ "0")
rightPath = helper (fromJust r) ch (sequ ++ "1")
in if isJust leftPath then leftPath else rightPath
-- Root-to-leaf search, find a character based on sequence string
findChar :: Node -> String -> Maybe Char
2016-03-18 14:21:36 +00:00
findChar n@Node { symbol = s, left = l, right = r} sequ
| not $ null sequ = let path = if head sequ == '0' then l else r
2016-03-18 10:07:20 +00:00
in if isJust path then
findChar (fromJust path) (tail sequ)
else
Nothing
| otherwise = if isJust s then s else Nothing
-- Encode an string into huffman coding
encode :: String -> String
encode input = let t = tree input
table = charTable t input
2016-03-18 14:21:36 +00:00
in concatMap (\a -> fromJust $ Map.lookup a table) input
2016-03-18 10:07:20 +00:00
-- Character table, a Map representing each character's bit sequence
charTable :: Node -> String -> Map.Map Char String
charTable t input = Map.fromList $ map (\a -> (a, charSequence t a)) (nub input)
-- Each character's weight in string
charWeight :: String -> Char -> Float
charWeight s x = genericLength $ filter (==x) s
-- Decode a string, given the tree representing it
decode :: Node -> String -> String
decode t input = let (valid, next) = span (isNothing . findChar t) $ inits input
2016-03-18 14:21:36 +00:00
sequ = head next
ninput = tails input !! length valid
2016-03-18 10:07:20 +00:00
ch = fromJust $ findChar t sequ
2016-03-18 14:21:36 +00:00
in if not $ null ninput then
ch : decode t ninput
2016-03-18 10:07:20 +00:00
else
[ch]