diff options
| author | Patrick Kingston <patrick@pkingston.xyz> | 2026-01-23 11:10:38 -0500 |
|---|---|---|
| committer | Patrick Kingston <patrick@pkingston.xyz> | 2026-01-23 11:10:38 -0500 |
| commit | 905c5403d139b22ebf19dc752697e91eac87a060 (patch) | |
| tree | 45774aa4d5485bf4abe1da29a4c5992c0fbdf0d0 | |
| parent | 58e1e0a1eb05b729c47baf3a75aed1024005b58c (diff) | |
Add documentation link, comment code
| -rw-r--r-- | bible/encode.clj | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/bible/encode.clj b/bible/encode.clj index 084db0a..ac6195d 100644 --- a/bible/encode.clj +++ b/bible/encode.clj @@ -109,6 +109,7 @@ ;;; Build the canonical encodings +;; From: https://en.wikipedia.org/wiki/Canonical_Huffman_code ;; Each of the existing codes are replaced with a new one of the same length, using the following algorithm: ;; The first symbol in the list gets assigned a codeword which is the same length as the symbol's original codeword but all zeros. This will often be a single zero ('0'). @@ -147,23 +148,27 @@ (bit-shift-left next-base-code (- prev-len - (- 63 (Long/numberOfLeadingZeros next-base-code)))) + (- 63 (Long/numberOfLeadingZeros next-base-code)))) ;Not 100% confident about this (inc prev-len)))] (recur (conj codes next-codeword) (rest symbols))) codes))) -;(0001 next 00011) -;(0000 next 1<<<<) -; - (def canonical-encodings (build-canonical-encodings sorted-huffman-tree-codewords)) (assert (= (map #(.length %1) sorted-huffman-tree-codewords) - (map #(.length %1) sorted-huffman-tree-codewords))) + (map #(.length %1) sorted-huffman-tree-codewords)) + "Some of the codes changed length when canonicalizing") + +(assert + (= (count canonical-encodings) + (count (set (map #(Long/toBinaryString (.code %1)) canonical-encodings)))) + "There appears to be duplicate canonical encodings") -(take 10 (sort-by (juxt (comp count val) key) huffman-tree-syms)) -(take 10 sorted-huffman-tree-codewords) -(take 10 canonical-encodings) -(take 10 (map #(Long/toBinaryString (.code %1)) canonical-encodings)) ;; The results of this *seem* wrong. +(comment + "Some basic stuff" + (take 10 (sort-by (juxt (comp count val) key) huffman-tree-syms)) + (take 10 sorted-huffman-tree-codewords) + (take 10 canonical-encodings) + (take 10 (map #(Long/toBinaryString (.code %1)) canonical-encodings))) |
