aboutsummaryrefslogtreecommitdiff
path: root/bible
diff options
context:
space:
mode:
authorPatrick Kingston <patrick@pkingston.xyz>2026-01-23 02:05:41 -0500
committerPatrick Kingston <patrick@pkingston.xyz>2026-01-23 02:05:41 -0500
commit58e1e0a1eb05b729c47baf3a75aed1024005b58c (patch)
tree028c9fc83323cd8e887fcdc52fd799af93e5b70d /bible
parentaed836605a48bd09294e459169599ef72eecd126 (diff)
Generate canonical huffman encodings
Diffstat (limited to 'bible')
-rw-r--r--bible/encode.clj34
1 files changed, 25 insertions, 9 deletions
diff --git a/bible/encode.clj b/bible/encode.clj
index 5d91bd4..084db0a 100644
--- a/bible/encode.clj
+++ b/bible/encode.clj
@@ -135,19 +135,35 @@
next-sym (.sym current-codeword)
next-base-code (unchecked-inc (.code prev-codeword))
prev-len (.length prev-codeword)
- next-codeword (if (= (.length current-codeword)
- (.length prev-codeword))
- (->HuffmanCodeword next-sym
- next-base-code
- prev-len)
- (->HuffmanCodeword next-sym
- (bit-shift-left next-base-code (inc prev-len))
- (inc prev-len)))]
+ next-codeword
+ (if (= (.length current-codeword)
+ (.length prev-codeword))
+ (->HuffmanCodeword
+ next-sym
+ next-base-code
+ prev-len)
+ (->HuffmanCodeword
+ next-sym
+ (bit-shift-left
+ next-base-code
+ (- prev-len
+ (- 63 (Long/numberOfLeadingZeros next-base-code))))
+ (inc prev-len)))]
(recur (conj codes next-codeword) (rest symbols)))
codes)))
+;(0001 next 00011)
+;(0000 next 1<<<<)
+;
(def canonical-encodings
(build-canonical-encodings sorted-huffman-tree-codewords))
-(map #(Long/toBinaryString (.code %1)) (take 100 canonical-encodings)) ;; The results of this *seem* wrong.
+(assert
+ (= (map #(.length %1) sorted-huffman-tree-codewords)
+ (map #(.length %1) sorted-huffman-tree-codewords)))
+
+(take 10 (sort-by (juxt (comp count val) key) huffman-tree-syms))
+(take 10 sorted-huffman-tree-codewords)
+(take 10 canonical-encodings)
+(take 10 (map #(Long/toBinaryString (.code %1)) canonical-encodings)) ;; The results of this *seem* wrong.