feat: add web server backend.

2026-04-28 23:55:41 +05:30
parent 3065a0adce
commit 3a0c32ea8f
8 changed files with 1705 additions and 0 deletions
@@ -0,0 +1,55 @@
+"""
+Persistent retrieval daemon.
+Loads the index ONCE on startup, then reads newline-delimited JSON requests
+from stdin and writes newline-delimited JSON responses to stdout forever.
+
+Protocol (one line each direction):
+  <- {"query": "...", "top_n": 5}
+  -> {"results": [...], "latency_seconds": 0.15}
+  -> {"error": "..."}          (on failure — process stays alive)
+
+inference.py is imported as a module — zero lines of it are modified.
+"""
+import sys
+import json
+import os
+
+ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
+sys.path.insert(0, os.path.join(ROOT, "src"))
+os.chdir(ROOT)
+
+import inference  # noqa: E402
+
+def main():
+    # Load once — this is the expensive step (~18s cold, ~0s warm)
+    try:
+        _, retriever = inference.load_or_build(force_rebuild=False)
+    except Exception as exc:
+        # Fatal: can't serve anything
+        sys.stdout.write(json.dumps({"error": f"Init failed: {exc}"}) + "\n")
+        sys.stdout.flush()
+        sys.exit(1)
+
+    # Signal to Node that we're ready
+    sys.stdout.write(json.dumps({"ready": True}) + "\n")
+    sys.stdout.flush()
+
+    # Serve requests forever
+    for raw_line in sys.stdin:
+        raw_line = raw_line.strip()
+        if not raw_line:
+            continue
+        try:
+            req = json.loads(raw_line)
+            query = req.get("query", "")
+            top_n = int(req.get("top_n", 5))
+            results, latency = retriever.retrieve(query, top_n=top_n)
+            response = {"results": results, "latency_seconds": round(latency, 4)}
+        except Exception as exc:
+            response = {"error": str(exc)}
+
+        sys.stdout.write(json.dumps(response) + "\n")
+        sys.stdout.flush()
+
+if __name__ == "__main__":
+    main()