aboutsummaryrefslogtreecommitdiff
path: root/bible
diff options
context:
space:
mode:
authorPatrick Kingston <patrick@pkingston.xyz>2026-01-23 11:10:38 -0500
committerPatrick Kingston <patrick@pkingston.xyz>2026-01-23 11:10:38 -0500
commit905c5403d139b22ebf19dc752697e91eac87a060 (patch)
tree45774aa4d5485bf4abe1da29a4c5992c0fbdf0d0 /bible
parent58e1e0a1eb05b729c47baf3a75aed1024005b58c (diff)
Add documentation link, comment code
Diffstat (limited to 'bible')
-rw-r--r--bible/encode.clj25
1 files changed, 15 insertions, 10 deletions
diff --git a/bible/encode.clj b/bible/encode.clj
index 084db0a..ac6195d 100644
--- a/bible/encode.clj
+++ b/bible/encode.clj
@@ -109,6 +109,7 @@
;;; Build the canonical encodings
+;; From: https://en.wikipedia.org/wiki/Canonical_Huffman_code
;; Each of the existing codes are replaced with a new one of the same length, using the following algorithm:
;; The first symbol in the list gets assigned a codeword which is the same length as the symbol's original codeword but all zeros. This will often be a single zero ('0').
@@ -147,23 +148,27 @@
(bit-shift-left
next-base-code
(- prev-len
- (- 63 (Long/numberOfLeadingZeros next-base-code))))
+ (- 63 (Long/numberOfLeadingZeros next-base-code)))) ;Not 100% confident about this
(inc prev-len)))]
(recur (conj codes next-codeword) (rest symbols)))
codes)))
-;(0001 next 00011)
-;(0000 next 1<<<<)
-;
-
(def canonical-encodings
(build-canonical-encodings sorted-huffman-tree-codewords))
(assert
(= (map #(.length %1) sorted-huffman-tree-codewords)
- (map #(.length %1) sorted-huffman-tree-codewords)))
+ (map #(.length %1) sorted-huffman-tree-codewords))
+ "Some of the codes changed length when canonicalizing")
+
+(assert
+ (= (count canonical-encodings)
+ (count (set (map #(Long/toBinaryString (.code %1)) canonical-encodings))))
+ "There appears to be duplicate canonical encodings")
-(take 10 (sort-by (juxt (comp count val) key) huffman-tree-syms))
-(take 10 sorted-huffman-tree-codewords)
-(take 10 canonical-encodings)
-(take 10 (map #(Long/toBinaryString (.code %1)) canonical-encodings)) ;; The results of this *seem* wrong.
+(comment
+ "Some basic stuff"
+ (take 10 (sort-by (juxt (comp count val) key) huffman-tree-syms))
+ (take 10 sorted-huffman-tree-codewords)
+ (take 10 canonical-encodings)
+ (take 10 (map #(Long/toBinaryString (.code %1)) canonical-encodings)))