From 905c5403d139b22ebf19dc752697e91eac87a060 Mon Sep 17 00:00:00 2001 From: Patrick Kingston Date: Fri, 23 Jan 2026 11:10:38 -0500 Subject: Add documentation link, comment code --- bible/encode.clj | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/bible/encode.clj b/bible/encode.clj index 084db0a..ac6195d 100644 --- a/bible/encode.clj +++ b/bible/encode.clj @@ -109,6 +109,7 @@ ;;; Build the canonical encodings +;; From: https://en.wikipedia.org/wiki/Canonical_Huffman_code ;; Each of the existing codes are replaced with a new one of the same length, using the following algorithm: ;; The first symbol in the list gets assigned a codeword which is the same length as the symbol's original codeword but all zeros. This will often be a single zero ('0'). @@ -147,23 +148,27 @@ (bit-shift-left next-base-code (- prev-len - (- 63 (Long/numberOfLeadingZeros next-base-code)))) + (- 63 (Long/numberOfLeadingZeros next-base-code)))) ;Not 100% confident about this (inc prev-len)))] (recur (conj codes next-codeword) (rest symbols))) codes))) -;(0001 next 00011) -;(0000 next 1<<<<) -; - (def canonical-encodings (build-canonical-encodings sorted-huffman-tree-codewords)) (assert (= (map #(.length %1) sorted-huffman-tree-codewords) - (map #(.length %1) sorted-huffman-tree-codewords))) + (map #(.length %1) sorted-huffman-tree-codewords)) + "Some of the codes changed length when canonicalizing") + +(assert + (= (count canonical-encodings) + (count (set (map #(Long/toBinaryString (.code %1)) canonical-encodings)))) + "There appears to be duplicate canonical encodings") -(take 10 (sort-by (juxt (comp count val) key) huffman-tree-syms)) -(take 10 sorted-huffman-tree-codewords) -(take 10 canonical-encodings) -(take 10 (map #(Long/toBinaryString (.code %1)) canonical-encodings)) ;; The results of this *seem* wrong. +(comment + "Some basic stuff" + (take 10 (sort-by (juxt (comp count val) key) huffman-tree-syms)) + (take 10 sorted-huffman-tree-codewords) + (take 10 canonical-encodings) + (take 10 (map #(Long/toBinaryString (.code %1)) canonical-encodings))) -- cgit v1.2.3