commit 69c54f4e564e5a101a1b962c79bd92359f3626e1 Author: Mahdi Dibaiee Date: Fri Mar 18 13:37:20 2016 +0330 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fe3b63e --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +#### joe made this: https://goel.io/joe + +#####=== Haskell ===##### + +dist +cabal-dev +*.o +*.hi +*.chi +*.chs.h +*.dyn_o +*.dyn_hi +.virtualenv +.hpc +.hsenv +.cabal-sandbox/ +cabal.sandbox.config +cabal.config +*.prof +*.aux +*.hp + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e287e4b --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2016 Mahdi Dibaiee + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Node.hs b/Node.hs new file mode 100644 index 0000000..6395b85 --- /dev/null +++ b/Node.hs @@ -0,0 +1,27 @@ +module Node +( createLeaf, Node(..), prettyPrint ) where + import qualified Data.Map as Map + import Data.Maybe + + data Node = Node { symbol :: Maybe Char + , weight :: Float + , left :: Maybe Node + , right :: Maybe Node + } deriving (Show) + + createLeaf :: Char -> Float -> Node + createLeaf c a = Node { symbol = Just c + , weight = a + , left = Nothing + , right = Nothing } + + prettyPrint :: Node -> Int -> String + prettyPrint Node { weight = w, symbol = s, left = l, right = r } x = + let spaces = replicate (x * 2) ' ' + indent = "\n" ++ spaces + x' = x + 1 + ws = "Weight: " ++ show w + ss = if isJust s then "Symbol: " ++ (show . fromJust) s else "" + ls = if isJust l then indent ++ "Left: " ++ prettyPrint (fromJust l) x' else "" + rs = if isJust r then indent ++ "Right: " ++ prettyPrint (fromJust r) x' else "" + in ws ++ " " ++ ss ++ ls ++ rs diff --git a/README.md b/README.md new file mode 100644 index 0000000..77a4d29 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +hs-huffman +========== + +[Huffman Coding](https://en.wikipedia.org/wiki/Huffman_coding) implementation in Haskell. + +I'm just learning Haskell, so you might not want to use this in any product. If you think I'm doing it wrong, I would be glad to know! diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/hs-huffman.cabal b/hs-huffman.cabal new file mode 100644 index 0000000..5774191 --- /dev/null +++ b/hs-huffman.cabal @@ -0,0 +1,24 @@ +-- Initial hs-huffman.cabal generated by cabal init. For further +-- documentation, see http://haskell.org/cabal/users-guide/ + +name: hs-huffman +version: 0.1.0.0 +-- synopsis: +-- description: +license: MIT +license-file: LICENSE +author: Mahdi Dibaiee +maintainer: mdibaiee@aol.com +-- copyright: +category: Data +build-type: Simple +-- extra-source-files: +cabal-version: >=1.10 + +library + -- exposed-modules: + -- other-modules: + -- other-extensions: + build-depends: base >=4.8 && <4.9 + -- hs-source-dirs: + default-language: Haskell2010 \ No newline at end of file diff --git a/index.hs b/index.hs new file mode 100644 index 0000000..a6063be --- /dev/null +++ b/index.hs @@ -0,0 +1,81 @@ +module Huffman ( tree, charSequence, decode ) + where + import Data.List + import Data.Function (on) + import Node + import qualified Data.Map as Map + import Data.Maybe + import Debug.Trace + import Data.Char (chr) + + pEOF = chr 999 + + -- Create initial nodes (forest of trees) for each symbol + createNodes :: String -> [Node] + createNodes s = map (\ch -> createLeaf ch $ charWeight s ch) $ nub s + + -- Coding table generation step (recursive) + step :: [Node] -> Node + step nodes = let cut = (tail . tail) sorted + in + if length nodes > 1 then + step (merge:cut) + else + head nodes + where + sorted = sortBy (compare `on` weight) nodes + merge = Node { symbol = Nothing, weight = sumWeights, left = Just first, right = Just second } + first = head sorted + second = (head . tail) sorted + sumWeights = weight first + weight second + tree :: String -> Node + tree = step . createNodes + + -- Root-to-leaf search, find a character's sequence string + charSequence :: Node -> Char -> String + charSequence node ch = + fromJust $ helper node ch "" + where + helper (Node { symbol = s, left = l, right = r }) ch sequ + | (isJust s) && (fromJust s == ch) = Just sequ + | isNothing l && isNothing r = Nothing + | otherwise = let leftPath = helper (fromJust l) ch (sequ ++ "0") + rightPath = helper (fromJust r) ch (sequ ++ "1") + in if isJust leftPath then leftPath else rightPath + + -- Root-to-leaf search, find a character based on sequence string + findChar :: Node -> String -> Maybe Char + findChar n@(Node { symbol = s, left = l, right = r}) sequ + | length sequ > 0 = let path = if head sequ == '0' then l else r + in if isJust path then + findChar (fromJust path) (tail sequ) + else + Nothing + + | otherwise = if isJust s then s else Nothing + + + -- Encode an string into huffman coding + encode :: String -> String + encode input = let t = tree input + table = charTable t input + in concat $ map (\a -> fromJust $ Map.lookup a table) input + + -- Character table, a Map representing each character's bit sequence + charTable :: Node -> String -> Map.Map Char String + charTable t input = Map.fromList $ map (\a -> (a, charSequence t a)) (nub input) + + -- Each character's weight in string + charWeight :: String -> Char -> Float + charWeight s x = genericLength $ filter (==x) s + + -- Decode a string, given the tree representing it + decode :: Node -> String -> String + decode t input = let (valid, next) = span (isNothing . findChar t) $ inits input + sequ = (head next) + ninput = (tails input) !! (length valid) + ch = fromJust $ findChar t sequ + in if length ninput > 0 then + ch:(decode t ninput) + else + [ch]