82 lines
3.3 KiB
Haskell
82 lines
3.3 KiB
Haskell
module Huffman ( tree, charSequence, decode )
|
|
where
|
|
import Data.List
|
|
import Data.Function (on)
|
|
import Node
|
|
import qualified Data.Map as Map
|
|
import Data.Maybe
|
|
import Debug.Trace
|
|
import Data.Char (chr)
|
|
|
|
pEOF = chr 999
|
|
|
|
-- Create initial nodes (forest of trees) for each symbol
|
|
createNodes :: String -> [Node]
|
|
createNodes s = map (\ch -> createLeaf ch $ charWeight s ch) $ nub s
|
|
|
|
-- Coding table generation step (recursive)
|
|
step :: [Node] -> Node
|
|
step nodes = let cut = (tail . tail) sorted
|
|
in
|
|
if length nodes > 1 then
|
|
step (merge:cut)
|
|
else
|
|
head nodes
|
|
where
|
|
sorted = sortBy (compare `on` weight) nodes
|
|
merge = Node { symbol = Nothing, weight = sumWeights, left = Just first, right = Just second }
|
|
first = head sorted
|
|
second = (head . tail) sorted
|
|
sumWeights = weight first + weight second
|
|
tree :: String -> Node
|
|
tree = step . createNodes
|
|
|
|
-- Root-to-leaf search, find a character's sequence string
|
|
charSequence :: Node -> Char -> String
|
|
charSequence node ch =
|
|
fromJust $ helper node ch ""
|
|
where
|
|
helper (Node { symbol = s, left = l, right = r }) ch sequ
|
|
| (isJust s) && (fromJust s == ch) = Just sequ
|
|
| isNothing l && isNothing r = Nothing
|
|
| otherwise = let leftPath = helper (fromJust l) ch (sequ ++ "0")
|
|
rightPath = helper (fromJust r) ch (sequ ++ "1")
|
|
in if isJust leftPath then leftPath else rightPath
|
|
|
|
-- Root-to-leaf search, find a character based on sequence string
|
|
findChar :: Node -> String -> Maybe Char
|
|
findChar n@(Node { symbol = s, left = l, right = r}) sequ
|
|
| length sequ > 0 = let path = if head sequ == '0' then l else r
|
|
in if isJust path then
|
|
findChar (fromJust path) (tail sequ)
|
|
else
|
|
Nothing
|
|
|
|
| otherwise = if isJust s then s else Nothing
|
|
|
|
|
|
-- Encode an string into huffman coding
|
|
encode :: String -> String
|
|
encode input = let t = tree input
|
|
table = charTable t input
|
|
in concat $ map (\a -> fromJust $ Map.lookup a table) input
|
|
|
|
-- Character table, a Map representing each character's bit sequence
|
|
charTable :: Node -> String -> Map.Map Char String
|
|
charTable t input = Map.fromList $ map (\a -> (a, charSequence t a)) (nub input)
|
|
|
|
-- Each character's weight in string
|
|
charWeight :: String -> Char -> Float
|
|
charWeight s x = genericLength $ filter (==x) s
|
|
|
|
-- Decode a string, given the tree representing it
|
|
decode :: Node -> String -> String
|
|
decode t input = let (valid, next) = span (isNothing . findChar t) $ inits input
|
|
sequ = (head next)
|
|
ninput = (tails input) !! (length valid)
|
|
ch = fromJust $ findChar t sequ
|
|
in if length ninput > 0 then
|
|
ch:(decode t ninput)
|
|
else
|
|
[ch]
|