Get compressor working

author: Patrick Kingston <patrick@pkingston.xyz> 2026-01-28 19:23:13 -0500
committer: Patrick Kingston <patrick@pkingston.xyz> 2026-01-28 19:23:13 -0500
commit: fc8569a88200ec88c3773c338fdcf16b16ea51d5 (patch)
tree: 32b7616811d5248ef53dc8d98bd996652f25b967 /bible/fullcompressor.clj
parent: 0e17cdb8d895589dbf1470f5b06a49b1b94e2e4d (diff)
1 files changed, 18 insertions, 2 deletions
diff --git a/bible/fullcompressor.clj b/bible/fullcompressor.clj
index 65a035b..e06a242 100644
--- a/bible/fullcompressor.clj
+++ b/bible/fullcompressor.clj
@@ -78,7 +78,7 @@
 ;;; TODO: Build dictionary
 
 ;;; Next, we'll run LZSS on our tok-id-list
-(def WINDOW-SIZE 1024) ; The maximum distance we look back, only allowing 1k RAM
+(def WINDOW-SIZE 512) ; The maximum distance we look back, only allowing 1k RAM 512 shorts
 (def MIN-MATCH 3)    ; Minimum length to bother with a reference
 
 (def MATCH-FLAG (count (frequencies dict-id-compressed-text))) 
@@ -128,7 +128,23 @@
 
             (if (and best-match (>= (:len best-match) MIN-MATCH))
               ;; CASE A: Match Found
-              (recur (+ cursor (:len best-match))
+              #_(let [match-len (:len best-match)] ;This is an optimziation the LLM came up with
+                (recur (+ cursor match-len)
+                       ;; EAGER INDEXING: Add all skipped positions to the index
+                       (reduce 
+                         (fn [idx i]
+                           (let [sub-triplet (if (<= (+ i MIN-MATCH) data-len)
+                                               (subvec data-vec i (+ i MIN-MATCH))
+                                               nil)]
+                             (if sub-triplet
+                               (assoc idx sub-triplet (conj (get idx sub-triplet []) i))
+                               idx)))
+                         index
+                         (range cursor (+ cursor match-len)))
+                       (conj! out {:type :match 
+                                   :dist (:dist best-match) 
+                                   :len match-len})))
+              (recur (+ cursor (:len best-match)) 
                      ;; Note: We are still "Lazy Indexing" here for speed. 
                      ;; To maximize compression, you'd loop here to add skipped parts to `index`.
                      (assoc index triplet (conj (get index triplet []) cursor))
author	Patrick Kingston <patrick@pkingston.xyz>	2026-01-28 19:23:13 -0500
committer	Patrick Kingston <patrick@pkingston.xyz>	2026-01-28 19:23:13 -0500
commit	fc8569a88200ec88c3773c338fdcf16b16ea51d5 (patch)
tree	32b7616811d5248ef53dc8d98bd996652f25b967 /bible/fullcompressor.clj
parent	0e17cdb8d895589dbf1470f5b06a49b1b94e2e4d (diff)