diff options
| author | Patrick Kingston <patrick@pkingston.xyz> | 2026-01-23 02:05:41 -0500 |
|---|---|---|
| committer | Patrick Kingston <patrick@pkingston.xyz> | 2026-01-23 02:05:41 -0500 |
| commit | 58e1e0a1eb05b729c47baf3a75aed1024005b58c (patch) | |
| tree | 028c9fc83323cd8e887fcdc52fd799af93e5b70d | |
| parent | aed836605a48bd09294e459169599ef72eecd126 (diff) | |
Generate canonical huffman encodings
| -rw-r--r-- | bible/encode.clj | 34 |
1 files changed, 25 insertions, 9 deletions
diff --git a/bible/encode.clj b/bible/encode.clj index 5d91bd4..084db0a 100644 --- a/bible/encode.clj +++ b/bible/encode.clj @@ -135,19 +135,35 @@ next-sym (.sym current-codeword) next-base-code (unchecked-inc (.code prev-codeword)) prev-len (.length prev-codeword) - next-codeword (if (= (.length current-codeword) - (.length prev-codeword)) - (->HuffmanCodeword next-sym - next-base-code - prev-len) - (->HuffmanCodeword next-sym - (bit-shift-left next-base-code (inc prev-len)) - (inc prev-len)))] + next-codeword + (if (= (.length current-codeword) + (.length prev-codeword)) + (->HuffmanCodeword + next-sym + next-base-code + prev-len) + (->HuffmanCodeword + next-sym + (bit-shift-left + next-base-code + (- prev-len + (- 63 (Long/numberOfLeadingZeros next-base-code)))) + (inc prev-len)))] (recur (conj codes next-codeword) (rest symbols))) codes))) +;(0001 next 00011) +;(0000 next 1<<<<) +; (def canonical-encodings (build-canonical-encodings sorted-huffman-tree-codewords)) -(map #(Long/toBinaryString (.code %1)) (take 100 canonical-encodings)) ;; The results of this *seem* wrong. +(assert + (= (map #(.length %1) sorted-huffman-tree-codewords) + (map #(.length %1) sorted-huffman-tree-codewords))) + +(take 10 (sort-by (juxt (comp count val) key) huffman-tree-syms)) +(take 10 sorted-huffman-tree-codewords) +(take 10 canonical-encodings) +(take 10 (map #(Long/toBinaryString (.code %1)) canonical-encodings)) ;; The results of this *seem* wrong. |
