Haskell で副作用のない LZSS を実装してみようと考えてやってみた.
検索には二分木を用いるとして,有限サイズの窓では二分木の古いノードを消さなければならない.ということで,キューでも使って追加したノードを覚えとけばいいのだけど,Haskell でO(1)操作のキューを作るのはただ事でないので... とりあえずヒープを使って O(log n) のキューを作りましょうと.(まあ,Chris Okasaki: Simple and efficient purely functional queues and deques. JFP 5(4):583-592, October 1995. のキューが使えるんだけどよく理解してないし)
import Char import List import Monad import System.Random import System.IO import System import IO import CPUTime -- 検索およびHeap用 BinaryTree data BTree a = Node a (BTree a) (BTree a) | Nil -- 表示 instance Show a => Show (BTree a) where showsPrec n x = xshowsB x xshowsB (Node a l r) = ("Node ("++) . (shows a) . (") ("++). (shows l) . (") ("++). (shows r) . (")"++) xshowsB (Nil) = ("Nil"++) -- 表示2 printBTree b = mapM_ putStr (map (++"\n") (bstr b)) bstr (Node a l r) = let ba = shows a "" sp = map (\x -> ' ') [1..(length ba)] sps = ba:map (\x -> sp) [1..] bstrr = bstr r bstrl = (if isNil l then [""] else bstr l) leftes = (if isNil r then leftedges2 else leftedges) bl = map (\(p,e,s)->p++e++s) (zip3 sps leftes bstrl) br = map (\(e,s) -> sp++e++s) (zip rightedges bstrr) in if (isNil l) && (isNil r) then [ba] else bl++br where leftedges = "-+-":leftedges' leftedges' = " | ":leftedges' leftedges2 = "-+-":leftedges2' leftedges2' = " ":leftedges2' rightedges = " +-":rightedges' rightedges' = " ":rightedges' bstr Nil = [] -- ノードのテストと置換 isNil Nil = True isNil (Node a l r) = False getVal (Node a l r) = a getLeft (Node a l r) = l getRight (Node a l r) = r insRight (Node a l r) r' = Node a l r' insLeft (Node a l r) l' = Node a l' r insRoot (Node a l r) a' = Node a' l r newNode x = Node x Nil Nil -- ヒープはサイズと二分木のペア type HeapTree a = (Int, BTree a) -- heap の印刷用 printHeap (n, t) = printBTree t -- ヒープに値を入れる(key, val) insertHeap::Ord a => HeapTree a -> a -> HeapTree a insertHeap heap item = let news = fst heap + 1 in (news, ins news (snd heap)) where ins n t = if n==1 then Node item Nil Nil else (if mod n 2 == 0 then (let tl = getLeft t tl' = ins (div n 2) tl in swapHeapNodeLeft t tl' ) else (let tr = getRight t tr' = ins (div n 2) tr in swapHeapNodeRight t tr' ) ) -- 左右の子と適宜入れ替える swapHeapNodeLeft (p@((Node ki1 l1 r1))) (c@((Node ki2 l2 r2))) = if ki1 <= ki2 then Node ki1 c r1 else Node ki2 (Node ki1 l2 r2) r1 swapHeapNodeLeft (Node ki1 l1 r1) Nil = Node ki1 Nil r1 swapHeapNodeRight (p@((Node ki1 l1 r1))) (c@((Node ki2 l2 r2))) = if ki1 <= ki2 then Node ki1 l1 c else Node ki2 l1 (Node ki1 l2 r2) swapHeapNodeRight (Node ki1 l1 r1) Nil = Node ki1 l1 Nil -- ヒープの最小値を覗く peepMinimumHeap heap = getVal (snd heap) -- 空? isEmpty (_, Nil) = True isEmpty (_, Node _ _ _) = False -- 最小値と更新されたヒープを返す minimumHeap::Ord a => HeapTree a -> (a, HeapTree a) minimumHeap heap = let size = fst heap t = snd heap in if size == 1 then (getVal t, (0, Nil)) else let (h, l, r) = moveLastToHead t size in (getVal t, (size-1, reHeap h l r)) where reHeap ki Nil Nil = Node ki Nil Nil reHeap ki Nil (r@(Node kir lr rr)) = if ki <= kir then Node ki Nil r else recR kir Nil ki lr rr reHeap ki (l@(Node kil ll rl)) Nil = if ki <= kil then Node ki l Nil else recL kil Nil ki ll rl reHeap ki (l@(Node kil ll rl)) (r@(Node kir lr rr)) = if ki <= kir && ki <= kil then Node ki l r else (if kir <= kil then recR kir l ki lr rr else recL kil r ki ll rl ) recR kir l ki lr rr = Node kir l (reHeap ki lr rr) recL kil r ki ll rl = Node kil (reHeap ki ll rl) r moveLastToHead t s = let (h, t') = mov t s in (h, getLeft t', getRight t') mov t n = if n==1 then (getVal t, Nil) else (if mod n 2 == 0 then (let tl = getLeft t (h, tl') = mov tl (div n 2) in (h, swapHeapNodeLeft t tl') ) else (let tr = getRight t (h, tr') = mov tr (div n 2) in (h, swapHeapNodeRight t tr') ) ) -- ヒープソート heapsort xs = heapsortrec (foldl (\t x -> insertHeap t x) (0, Nil) xs) where heapsortrec heap = case heap of (0, _) -> [] (_,_) -> (let (ki, h') = minimumHeap heap in ki:heapsortrec h') -- ヒープのテスト -- 挿入過程 heaptest2 xs = foldl (\mt x -> mt>>= (\t -> let t' = insertHeap t x in printBTree (snd t')>> putStr "\n" >> return t')) (return (0, Nil)) xs -- 取り出し過程 heaptest3 heap = case heap of (0, _) -> print "" (_,_) -> (let (ki, h') = minimumHeap heap in print ki >> printBTree (snd h') >> heaptest3 h') -- 挿入して取り出す heaptest4 xs = heaptest2 xs >>= (\h -> heaptest3 h)
んで,検索木とキューの用意ができたので本体をば.展開はかなりきれいにかけるが圧縮部分が汚い... ループ使わずに全部再帰だし.ちなみに符号は Direct c で圧縮なしのキャラクタ c, Compress a l で自分より a 前から l + threlength キャラクタで圧縮と.
-- LZSS 符号 data LZCode = Direct Char | Compress Int Int instance Show LZCode where showsPrec n x = xshowsLZ x xshowsLZ (Direct x) = ("D "++) . (shows x) xshowsLZ (Compress a l) = ("C "++) . (shows (a, l)) instance Eq LZCode where (Direct x) == (Direct y) = x==y (Compress a l) == (Compress b m) = a==b && l == m -- 最大一致長 maxlength = 18 -- これ未満の一致は圧縮しない threlength = 3
- - 窓のサイズ
んで,さらにこれを8符号ごとにまとめてバイトストリームに変換するのとそれをファイルに読み書きする部分を足し合わせると.最後にコマンドラインからファイル名とかを取るようにして完成.
-- コードを8ずつまとめてフラグビットを作るのと -- アドレスとかをエンコードする -- Compress a l で, 0<=a<4096, 0<=l<16 で使用のこと encode xs = enc xs 128 0 [] [] where enc [] mask flags cs codes = List.reverse (cs ++ (Char.chr(flags):codes)) enc (x:xs) mask flags cs codes = case mask of 0 -> (enc (x:xs) 128 0 [] (cs ++ (Char.chr(flags):codes))) _ -> (case x of Direct c -> enc xs (div mask 2) flags (c:cs) codes Compress a l -> enc xs (div mask 2) (flags+mask) (Char.chr((div a 256)+l*16):Char.chr(mod a 256):cs) codes ) decode xs = dec xs 0 0 [] where dec [] mask flags codes = List.reverse codes dec (x:xs) mask flags codes = case mask of 0 -> (dec xs 128 (Char.ord x) codes) _ -> (if (flags - mask < 0) then dec xs (div mask 2) flags (Direct x:codes) else case xs of (x':xs') -> dec xs' (div mask 2) (flags-mask) (Compress ((Char.ord x) + (mod (Char.ord x') 16)*256) (div (Char.ord x') 16):codes) ) -- pico second から秒数表示 showSec psec = let msec = (div psec 1000000000) in (show (div msec 1000) ++"."++ shown 3 '0' (mod msec 1000)++" sec\n") -- n 文字で c をパディングにして a を文字列化 shown n c a = let str = show a in sh (n - length str) str where sh k str = if k<= 0 then str else sh (k-1) (c:str) -- バイナリでオープンしないとならんので... readBinaryFile path = openBinaryFile path ReadMode >>= hGetContents writeBinaryFile path str = bracket (openBinaryFile path WriteMode) hClose (`hPutStr` str) -- ファイルを圧縮, 展開 compfile path = do putStr (("output: "++path++".cmp\n")) st <- getCPUTime str <- readBinaryFile path c <- return (compress2 str) ec <- return (encode c) writeBinaryFile (path++".cmp") ec ed <- getCPUTime putStr (showSec (ed-st)) return c decompfile path = do putStr (("output: "++path++".dec\n")) st <- getCPUTime ec <- readBinaryFile path c <- return (decode ec) str <- return (decompress c) writeBinaryFile (path++".dec") str ed <- getCPUTime putStr (showSec (ed-st)) return str usage = ["usage: [compress] > lzss file", " [decompress] > lzss -d compressedfile"] printUsage = mapM_ (\x->putStr (x++"\n")) usage main = do args <- getArgs if args ==[] then printUsage else if head args == "-d" then (decompfile (head (tail args)) >> putStr "done.") else (compfile (head args) >> putStr "\n")
720k のファイルで圧縮 88sec, 展開 ... 終わらん.130k で圧縮 15sec, 展開 7sec.ふむ,実用には絶対に耐えられん.意外に速かったけど.でも展開に変に時間がかかってるなぁ,どこが問題なのやら.ま,いいや.
- Newer: AWK - はじめ