aboutsummaryrefslogtreecommitdiff
path: root/bible/huffman.clj
diff options
context:
space:
mode:
authorPatrick Kingston <patrick@pkingston.xyz>2026-01-28 19:23:13 -0500
committerPatrick Kingston <patrick@pkingston.xyz>2026-01-28 19:23:13 -0500
commitfc8569a88200ec88c3773c338fdcf16b16ea51d5 (patch)
tree32b7616811d5248ef53dc8d98bd996652f25b967 /bible/huffman.clj
parent0e17cdb8d895589dbf1470f5b06a49b1b94e2e4d (diff)
Get compressor working
Diffstat (limited to 'bible/huffman.clj')
-rw-r--r--bible/huffman.clj10
1 files changed, 2 insertions, 8 deletions
diff --git a/bible/huffman.clj b/bible/huffman.clj
index c219b7e..fccdae1 100644
--- a/bible/huffman.clj
+++ b/bible/huffman.clj
@@ -149,14 +149,8 @@
(if (= basecount len)
base
(str (apply str (take (- len basecount) (repeat "0"))) base))))
-
;; --- little class for Huffman Codewords
-(def sorted-huffman-tree-codewords
- (->> huffman-tree-syms
- (sort-by (juxt (comp count val) key))
- (map #(->HuffmanCodeword (first %1) (Long/parseUnsignedLong (second %1) 2) (int (count (second %1)))))))
-
(defn build-canonical-encodings
"Build canonical huffman encodings from a huffman tree
takes [symbols] a list of huffman codes derived from a code-tree"
@@ -197,7 +191,7 @@
(build-canonical-encodings huffman-tree))
-(assert
+#_(assert
(= (map #(.length %1) sorted-huffman-tree-codewords)
(map #(.length %1) sorted-huffman-tree-codewords))
"Some of the codes changed length when canonicalizing")
@@ -228,7 +222,7 @@
{:bits totalbits
:bytes totalbytes
:kb totalkb
- :mb totalmb}))
+ :mb totalmb}))
(def stage1-optimized-token-encoding-length
(get-encoded-length canonical-encodings tokens)))