From 58e1e0a1eb05b729c47baf3a75aed1024005b58c Mon Sep 17 00:00:00 2001 From: Patrick Kingston Date: Fri, 23 Jan 2026 02:05:41 -0500 Subject: Generate canonical huffman encodings --- bible/encode.clj | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/bible/encode.clj b/bible/encode.clj index 5d91bd4..084db0a 100644 --- a/bible/encode.clj +++ b/bible/encode.clj @@ -135,19 +135,35 @@ next-sym (.sym current-codeword) next-base-code (unchecked-inc (.code prev-codeword)) prev-len (.length prev-codeword) - next-codeword (if (= (.length current-codeword) - (.length prev-codeword)) - (->HuffmanCodeword next-sym - next-base-code - prev-len) - (->HuffmanCodeword next-sym - (bit-shift-left next-base-code (inc prev-len)) - (inc prev-len)))] + next-codeword + (if (= (.length current-codeword) + (.length prev-codeword)) + (->HuffmanCodeword + next-sym + next-base-code + prev-len) + (->HuffmanCodeword + next-sym + (bit-shift-left + next-base-code + (- prev-len + (- 63 (Long/numberOfLeadingZeros next-base-code)))) + (inc prev-len)))] (recur (conj codes next-codeword) (rest symbols))) codes))) +;(0001 next 00011) +;(0000 next 1<<<<) +; (def canonical-encodings (build-canonical-encodings sorted-huffman-tree-codewords)) -(map #(Long/toBinaryString (.code %1)) (take 100 canonical-encodings)) ;; The results of this *seem* wrong. +(assert + (= (map #(.length %1) sorted-huffman-tree-codewords) + (map #(.length %1) sorted-huffman-tree-codewords))) + +(take 10 (sort-by (juxt (comp count val) key) huffman-tree-syms)) +(take 10 sorted-huffman-tree-codewords) +(take 10 canonical-encodings) +(take 10 (map #(Long/toBinaryString (.code %1)) canonical-encodings)) ;; The results of this *seem* wrong. -- cgit v1.2.3