1 files changed, 18 insertions, 2 deletions
diff --git a/bible/fullcompressor.clj b/bible/fullcompressor.clj
index 65a035b..e06a242 100644
--- a/bible/fullcompressor.clj
+++ b/bible/fullcompressor.clj
@@ -78,7 +78,7 @@
 ;;; TODO: Build dictionary
 
 ;;; Next, we'll run LZSS on our tok-id-list
-(def WINDOW-SIZE 1024) ; The maximum distance we look back, only allowing 1k RAM
+(def WINDOW-SIZE 512) ; The maximum distance we look back, only allowing 1k RAM 512 shorts
 (def MIN-MATCH 3)    ; Minimum length to bother with a reference
 
 (def MATCH-FLAG (count (frequencies dict-id-compressed-text))) 
@@ -128,7 +128,23 @@
 
             (if (and best-match (>= (:len best-match) MIN-MATCH))
               ;; CASE A: Match Found
-              (recur (+ cursor (:len best-match))
+              #_(let [match-len (:len best-match)] ;This is an optimziation the LLM came up with
+                (recur (+ cursor match-len)
+                       ;; EAGER INDEXING: Add all skipped positions to the index
+                       (reduce 
+                         (fn [idx i]
+                           (let [sub-triplet (if (<= (+ i MIN-MATCH) data-len)
+                                               (subvec data-vec i (+ i MIN-MATCH))
+                                               nil)]
+                             (if sub-triplet
+                               (assoc idx sub-triplet (conj (get idx sub-triplet []) i))
+                               idx)))
+                         index
+                         (range cursor (+ cursor match-len)))
+                       (conj! out {:type :match 
+                                   :dist (:dist best-match) 
+                                   :len match-len})))
+              (recur (+ cursor (:len best-match)) 
                      ;; Note: We are still "Lazy Indexing" here for speed. 
                      ;; To maximize compression, you'd loop here to add skipped parts to `index`.
                      (assoc index triplet (conj (get index triplet []) cursor))