docs: add JSDoc and normalize comments across server.

This commit is contained in:
K
2026-05-03 00:16:42 +05:30
parent 33fe20021a
commit f88a45968a
5 changed files with 127 additions and 84 deletions
+3 -3
View File
@@ -6,9 +6,9 @@ from stdin and writes newline-delimited JSON responses to stdout forever.
Protocol (one line each direction): Protocol (one line each direction):
<- {"query": "...", "top_n": 5} <- {"query": "...", "top_n": 5}
-> {"results": [...], "latency_seconds": 0.15} -> {"results": [...], "latency_seconds": 0.15}
-> {"error": "..."} (on failure process stays alive) -> {"error": "..."} (on failure -- process stays alive)
inference.py is imported as a module zero lines of it are modified. inference.py is imported as a module -- zero lines of it are modified.
""" """
import sys import sys
import json import json
@@ -21,7 +21,7 @@ os.chdir(ROOT)
import inference # noqa: E402 import inference # noqa: E402
def main(): def main():
# Load once this is the expensive step (~18s cold, ~0s warm) # Load once -- this is the expensive step (~18s cold, ~0s warm)
try: try:
_, retriever = inference.load_or_build(force_rebuild=False) _, retriever = inference.load_or_build(force_rebuild=False)
except Exception as exc: except Exception as exc:
+85 -49
View File
@@ -11,10 +11,11 @@ const { generateExplanation, answerQuestion, rewriteQuery } = require("./service
const { retrieve } = require("./services/retrieverService"); const { retrieve } = require("./services/retrieverService");
const app = express(); const app = express();
/** @type {number} - HTTP port, defaults to 5000. */
const PORT = process.env.PORT || 5000; const PORT = process.env.PORT || 5000;
// ── Startup checks ────────────────────────────────────────────────────────── // Warn early when the Groq key is absent so AI degradation is visible at boot.
if (!process.env.GROQ_API_KEY) { if (!process.env.GROQ_API_KEY) {
console.warn( console.warn(
"[WARN] GROQ_API_KEY is not set. AI features will return fallback values.\n" + "[WARN] GROQ_API_KEY is not set. AI features will return fallback values.\n" +
@@ -22,12 +23,12 @@ if (!process.env.GROQ_API_KEY) {
); );
} }
// ── Security headers ─────────────────────────────────────────────────────────
app.use(helmet()); app.use(helmet());
// ── CORS — restrict to configured origin or localhost dev ──────────────────── /**
* @type {string[]} - Allowed CORS origins; reads CORS_ORIGIN env var (comma-separated) or
* falls back to localhost dev/preview ports.
*/
const ALLOWED_ORIGINS = process.env.CORS_ORIGIN const ALLOWED_ORIGINS = process.env.CORS_ORIGIN
? process.env.CORS_ORIGIN.split(",").map((o) => o.trim()) ? process.env.CORS_ORIGIN.split(",").map((o) => o.trim())
: ["http://localhost:5173", "http://localhost:4173", `http://localhost:${PORT}`]; : ["http://localhost:5173", "http://localhost:4173", `http://localhost:${PORT}`];
@@ -42,8 +43,7 @@ app.use(cors({
allowedHeaders: ["Content-Type"], allowedHeaders: ["Content-Type"],
})); }));
// ── Rate limiting ───────────────────────────────────────────────────────────── /** @type {import('express-rate-limit').RateLimitRequestHandler} - 60 req/min applied to all /api/ routes. */
const apiLimiter = rateLimit({ const apiLimiter = rateLimit({
windowMs: 60 * 1000, windowMs: 60 * 1000,
max: 60, max: 60,
@@ -52,6 +52,7 @@ const apiLimiter = rateLimit({
message: { error: "Too many requests. Please wait a moment and try again." }, message: { error: "Too many requests. Please wait a moment and try again." },
}); });
/** @type {import('express-rate-limit').RateLimitRequestHandler} - 20 req/min applied to LLM-backed endpoints. */
const llmLimiter = rateLimit({ const llmLimiter = rateLimit({
windowMs: 60 * 1000, windowMs: 60 * 1000,
max: 20, max: 20,
@@ -67,8 +68,7 @@ app.use("/api/chat", llmLimiter);
app.use(express.json({ limit: "16kb" })); app.use(express.json({ limit: "16kb" }));
// ── Load data ─────────────────────────────────────────────────────────────── /** @type {string} - Absolute path to the processed data directory. */
const DATA_DIR = path.join(__dirname, "../../data/processed"); const DATA_DIR = path.join(__dirname, "../../data/processed");
let standards = []; let standards = [];
@@ -84,7 +84,7 @@ try {
// Pre-build lookups // Pre-build lookups
const standardsById = {}; const standardsById = {};
const chunksByStd = {}; // standard_id [chunk, ] const chunksByStd = {}; // standard_id: [chunk, ...]
const byCategory = {}; const byCategory = {};
const categories = new Set(); const categories = new Set();
@@ -99,34 +99,59 @@ for (const c of chunks) {
chunksByStd[c.standard_id].push(c); chunksByStd[c.standard_id].push(c);
} }
// ── Input sanitization ──────────────────────────────────────────────────────── /** @type {RegExp} - Matches ASCII control characters that should be stripped from user input. */
const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g; const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g;
/**
* Strips control characters and truncates a string to a safe length.
* Returns an empty string if the value is not a string.
* @param {*} value
* @param {number} [maxLen=500]
* @returns {string}
*/
function sanitizeText(value, maxLen = 500) { function sanitizeText(value, maxLen = 500) {
if (typeof value !== "string") return ""; if (typeof value !== "string") return "";
return value.replace(CONTROL_CHAR_RE, "").slice(0, maxLen).trim(); return value.replace(CONTROL_CHAR_RE, "").slice(0, maxLen).trim();
} }
// standard_id must match IS identifier pattern: letters/digits/spaces/colons/parens/dots/hyphens /** @type {RegExp} - Accepts IS standard IDs: letters, digits, spaces, colons, parens, dots, hyphens, slashes. */
const STANDARD_ID_RE = /^[A-Za-z0-9 :()./-]{1,60}$/; const STANDARD_ID_RE = /^[A-Za-z0-9 :()./-]{1,60}$/;
/**
* Returns true if the value is a well-formed IS standard identifier.
* @param {*} id
* @returns {boolean}
*/
function isValidStandardId(id) { function isValidStandardId(id) {
return typeof id === "string" && STANDARD_ID_RE.test(id.trim()); return typeof id === "string" && STANDARD_ID_RE.test(id.trim());
} }
// ── Structured logger ─────────────────────────────────────────────────────── /**
* Writes a structured JSON log line to stdout with a UTC timestamp.
* @param {string} endpoint - Route label, e.g. "POST /api/recommend".
* @param {object} data - Arbitrary key/value pairs to include in the log entry.
*/
function log(endpoint, data) { function log(endpoint, data) {
const ts = new Date().toISOString(); const ts = new Date().toISOString();
console.log(`[${ts}] ${endpoint} |`, JSON.stringify(data)); console.log(`[${ts}] ${endpoint} |`, JSON.stringify(data));
} }
// ── Keyword-based search helper (unchanged from original) ─────────────────── /**
* Normalises a string to lowercase alphanumeric tokens for keyword matching.
* @param {string} str
* @returns {string}
*/
function normalize(str) { function normalize(str) {
return str.toLowerCase().replace(/[^a-z0-9]/g, " ").replace(/\s+/g, " ").trim(); return str.toLowerCase().replace(/[^a-z0-9]/g, " ").replace(/\s+/g, " ").trim();
} }
/**
* Scores a standard against a query using weighted keyword matching across id, title,
* keywords, summary, and category fields.
* @param {object} standard - A standards.json record.
* @param {string} query - Raw search query string.
* @returns {number} Relevance score; higher is more relevant.
*/
function scoreStandard(standard, query) { function scoreStandard(standard, query) {
const q = normalize(query); const q = normalize(query);
const qTokens = q.split(" ").filter(Boolean); const qTokens = q.split(" ").filter(Boolean);
@@ -150,8 +175,13 @@ function scoreStandard(standard, query) {
return s; return s;
} }
// ── Best chunk selector ───────────────────────────────────────────────────── /**
* Returns the chunk from a standard that best matches the given question via token overlap.
* Falls back to the first chunk if no tokens produce a positive score.
* @param {string} standardId - IS standard identifier key into chunksByStd.
* @param {string} question - User question used for token matching.
* @returns {{ text: string, section: string, chunk_id: string, standard_id: string } | null}
*/
function bestChunk(standardId, question) { function bestChunk(standardId, question) {
const stdChunks = chunksByStd[standardId] || []; const stdChunks = chunksByStd[standardId] || [];
if (!stdChunks.length) return null; if (!stdChunks.length) return null;
@@ -168,11 +198,13 @@ function bestChunk(standardId, question) {
return best; return best;
} }
// ═══════════════════════════════════════════════════════════════════════════
// Routes // Routes
// ═══════════════════════════════════════════════════════════════════════════
// ── GET /api/standards ────────────────────────────────────────────────────── /**
* GET /api/standards
* Returns a paginated, optionally filtered and keyword-scored list of standards.
* Query params: q (search string), category, page, limit.
*/
app.get("/api/standards", (req, res) => { app.get("/api/standards", (req, res) => {
const q = sanitizeText(req.query.q || "", 200); const q = sanitizeText(req.query.q || "", 200);
const category = sanitizeText(req.query.category || "", 100); const category = sanitizeText(req.query.category || "", 100);
@@ -197,7 +229,10 @@ app.get("/api/standards", (req, res) => {
res.json({ data: paginated, meta: { total, page: pageNum, limit: limitNum, totalPages } }); res.json({ data: paginated, meta: { total, page: pageNum, limit: limitNum, totalPages } });
}); });
// ── GET /api/standards/:id ────────────────────────────────────────────────── /**
* GET /api/standards/:id
* Returns a single standard by its IS identifier; 404 if not found, 400 if the id is malformed.
*/
app.get("/api/standards/:id", (req, res) => { app.get("/api/standards/:id", (req, res) => {
const raw = decodeURIComponent(req.params.id); const raw = decodeURIComponent(req.params.id);
if (!isValidStandardId(raw)) { if (!isValidStandardId(raw)) {
@@ -208,7 +243,10 @@ app.get("/api/standards/:id", (req, res) => {
res.json(standard); res.json(standard);
}); });
// ── GET /api/categories ───────────────────────────────────────────────────── /**
* GET /api/categories
* Returns all categories sorted alphabetically, each with its standard count.
*/
app.get("/api/categories", (req, res) => { app.get("/api/categories", (req, res) => {
const result = [...categories].sort().map((cat) => ({ const result = [...categories].sort().map((cat) => ({
name: cat, name: cat,
@@ -217,7 +255,10 @@ app.get("/api/categories", (req, res) => {
res.json(result); res.json(result);
}); });
// ── GET /api/stats ────────────────────────────────────────────────────────── /**
* GET /api/stats
* Returns aggregate counts of standards, categories, and chunks loaded in memory.
*/
app.get("/api/stats", (req, res) => { app.get("/api/stats", (req, res) => {
res.json({ res.json({
totalStandards: standards.length, totalStandards: standards.length,
@@ -226,16 +267,13 @@ app.get("/api/stats", (req, res) => {
}); });
}); });
// ── POST /api/recommend ─────────────────────────────────────────────────────
/** /**
* Input: { query: string, top_n?: number, rewrite?: boolean } * POST /api/recommend
* Flow: * Hybrid retrieval endpoint: optionally rewrites the query, calls the Python daemon,
* 1. Optionally rewrite query with LLM (parallel, non-blocking on failure) * then attaches parallel LLM explanations to each result.
* 2. Call Python inference.py via bridge (retrieval logic untouched)
* 3. Enrich each result with LLM explanation (Promise.allSettled — no blocking)
* 4. Return standards + explanations + timing breakdown
* *
* Output: { standards, latency: { retrieval_ms, llm_ms, total_ms } } * @param {{ query: string, top_n?: number, rewrite?: boolean }} req.body
* @returns {{ query: string, standards: Array, latency: { retrieval_ms: number, llm_ms: number, total_ms: number } }}
*/ */
app.post("/api/recommend", async (req, res) => { app.post("/api/recommend", async (req, res) => {
const rawQuery = req.body?.query; const rawQuery = req.body?.query;
@@ -249,13 +287,13 @@ app.post("/api/recommend", async (req, res) => {
const t0 = Date.now(); const t0 = Date.now();
// Step 1 Optional query rewrite (fires concurrently, falls back silently) // Step 1 - Optional query rewrite (fires concurrently, falls back silently)
let effectiveQuery = query; let effectiveQuery = query;
if (rewrite && process.env.GROQ_API_KEY) { if (rewrite && process.env.GROQ_API_KEY) {
effectiveQuery = await rewriteQuery(query.trim()); // never throws effectiveQuery = await rewriteQuery(query.trim()); // never throws
} }
// Step 2 Python retrieval (inference.py untouched) // Step 2 - Python retrieval (inference.py untouched)
let retrievalResult; let retrievalResult;
const tRetStart = Date.now(); const tRetStart = Date.now();
try { try {
@@ -268,7 +306,7 @@ app.post("/api/recommend", async (req, res) => {
const { results: retrieved, latency_seconds: pyLatency } = retrievalResult; const { results: retrieved, latency_seconds: pyLatency } = retrievalResult;
// Step 3 LLM explanations fired in parallel (allSettled never blocks on failure) // Step 3 - LLM explanations fired in parallel (allSettled - never blocks on failure)
const tLlmStart = Date.now(); const tLlmStart = Date.now();
const explanationJobs = retrieved.map((r) => { const explanationJobs = retrieved.map((r) => {
const std = standardsById[r.standard_id]; const std = standardsById[r.standard_id];
@@ -281,7 +319,7 @@ app.post("/api/recommend", async (req, res) => {
const explanations = await Promise.all(explanationJobs); const explanations = await Promise.all(explanationJobs);
const llmMs = Date.now() - tLlmStart; const llmMs = Date.now() - tLlmStart;
// Step 4 Assemble response // Step 4 - Assemble response
const standardsOut = retrieved.map((r, i) => { const standardsOut = retrieved.map((r, i) => {
const std = standardsById[r.standard_id] || {}; const std = standardsById[r.standard_id] || {};
return { return {
@@ -316,15 +354,12 @@ app.post("/api/recommend", async (req, res) => {
}); });
}); });
// ── POST /api/ask ───────────────────────────────────────────────────────────
/** /**
* Input: { question: string, standard_id: string } * POST /api/ask
* Flow: * Answers a question grounded in the best-matching chunk of a specific standard.
* 1. Find best matching chunk for the question within the standard
* 2. Pass chunk text to answerQuestion() — strictly grounded
* 3. Return answer + chunk source info
* *
* Output: { answer, source: { standard_id, section, chunk_id } } * @param {{ question: string, standard_id: string }} req.body
* @returns {{ answer: string, source: { standard_id: string, section: string, chunk_id: string }, latency: object }}
*/ */
app.post("/api/ask", async (req, res) => { app.post("/api/ask", async (req, res) => {
const question = sanitizeText(req.body?.question, 500); const question = sanitizeText(req.body?.question, 500);
@@ -367,10 +402,12 @@ app.post("/api/ask", async (req, res) => {
}); });
}); });
// ── POST /api/chat ──────────────────────────────────────────────────────────
/** /**
* Conversational QA grounded in a standard's full text. * POST /api/chat
* Uses answerQuestion() from llmService — key never leaves server. * Conversational QA grounded in a standard's full text; 503 if GROQ_API_KEY is absent.
*
* @param {{ question: string, standard_id?: string }} req.body
* @returns {{ answer: string }}
*/ */
app.post("/api/chat", async (req, res) => { app.post("/api/chat", async (req, res) => {
if (!process.env.GROQ_API_KEY) { if (!process.env.GROQ_API_KEY) {
@@ -408,7 +445,6 @@ app.post("/api/chat", async (req, res) => {
res.json({ answer }); res.json({ answer });
}); });
// ── Start ───────────────────────────────────────────────────────────────────
const server = app.listen(PORT, () => { const server = app.listen(PORT, () => {
console.log(`[init] BIS API running on http://localhost:${PORT}`); console.log(`[init] BIS API running on http://localhost:${PORT}`);
}); });
+24 -22
View File
@@ -2,21 +2,25 @@
/** /**
* llmService.js * llmService.js
* All Groq LLM calls live here. Three functions: * All Groq LLM calls live here. Three functions:
* generateExplanation(standard) 2-3 sentence plain-English summary * generateExplanation(standard) - 2-3 sentence plain-English summary
* answerQuestion(question, chunk) grounded QA, strict context-only * answerQuestion(question, chunk) - grounded QA, strict context-only
* rewriteQuery(query) optional query expansion * rewriteQuery(query) - optional query expansion
* *
* Key rules enforced here: * Key rules enforced here:
* - GROQ_API_KEY never leaves this file toward the client * - GROQ_API_KEY never leaves this file toward the client
* - max_tokens kept short to minimise latency (<400 tokens each) * - max_tokens kept short to minimise latency (<400 tokens each)
* - Every function returns a fallback value on failure callers never throw * - Every function returns a fallback value on failure - callers never throw
*/ */
const GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"; const GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions";
const MODEL = "llama-3.1-8b-instant"; const MODEL = "llama-3.1-8b-instant";
// ── Core fetch wrapper ────────────────────────────────────────────────────── /**
* Sends a single chat-completion request to the Groq API.
* @param {{ systemPrompt: string, userMessage: string, maxTokens?: number, temperature?: number }} options
* @returns {Promise<string>} Trimmed text content from the first choice.
* @throws {Error} If the API key is missing or the response is non-2xx.
*/
async function _groqCall({ systemPrompt, userMessage, maxTokens = 256, temperature = 0.2 }) { async function _groqCall({ systemPrompt, userMessage, maxTokens = 256, temperature = 0.2 }) {
const key = process.env.GROQ_API_KEY; const key = process.env.GROQ_API_KEY;
if (!key) throw new Error("GROQ_API_KEY not set"); if (!key) throw new Error("GROQ_API_KEY not set");
@@ -47,11 +51,9 @@ async function _groqCall({ systemPrompt, userMessage, maxTokens = 256, temperatu
return data.choices?.[0]?.message?.content?.trim() ?? ""; return data.choices?.[0]?.message?.content?.trim() ?? "";
} }
// ── 1. generateExplanation ──────────────────────────────────────────────────
/** /**
* Produces a 2-3 sentence plain-English explanation of a standard. * Produces a 2-3 sentence plain-English explanation of a standard.
* Falls back to the standard's own summary on failure never throws. * Falls back to the standard's own summary on failure - never throws.
* *
* @param {{ standard_id: string, title: string, summary?: string, content?: string, key_sections?: object }} standard * @param {{ standard_id: string, title: string, summary?: string, content?: string, key_sections?: object }} standard
* @returns {Promise<string>} * @returns {Promise<string>}
@@ -64,7 +66,7 @@ async function generateExplanation(standard) {
systemPrompt: systemPrompt:
"You are a technical writer for the Bureau of Indian Standards (BIS). " + "You are a technical writer for the Bureau of Indian Standards (BIS). " +
"Explain building material standards in simple English for engineers and contractors. " + "Explain building material standards in simple English for engineers and contractors. " +
"Use ONLY the provided standard text do not add, invent, or infer anything not explicitly stated. " + "Use ONLY the provided standard text -- do not add, invent, or infer anything not explicitly stated. " +
"Write exactly 2-3 sentences. No bullet points. No headings.", "Write exactly 2-3 sentences. No bullet points. No headings.",
userMessage: userMessage:
`Explain this BIS standard in simple terms using ONLY the provided information:\n\n${context}`, `Explain this BIS standard in simple terms using ONLY the provided information:\n\n${context}`,
@@ -72,20 +74,18 @@ async function generateExplanation(standard) {
temperature: 0.2, temperature: 0.2,
}); });
} catch (err) { } catch (err) {
// Graceful fallback retrieval is unaffected // Graceful fallback -- retrieval is unaffected
return standard.summary || standard.title || ""; return standard.summary || standard.title || "";
} }
} }
// ── 2. answerQuestion ───────────────────────────────────────────────────────
/** /**
* Answers a user question strictly from a chunk of standard text. * Answers a user question strictly from a chunk of standard text.
* Returns "Not found in standard" when context doesn't contain the answer. * Returns "Not found in standard" when context doesn't contain the answer.
* Never throws. * Never throws.
* *
* @param {string} question * @param {string} question
* @param {string} chunkText raw chunk text from standards_chunks.json * @param {string} chunkText -- raw chunk text from standards_chunks.json
* @returns {Promise<string>} * @returns {Promise<string>}
*/ */
async function answerQuestion(question, chunkText) { async function answerQuestion(question, chunkText) {
@@ -111,11 +111,9 @@ async function answerQuestion(question, chunkText) {
} }
} }
// ── 3. rewriteQuery (optional) ──────────────────────────────────────────────
/** /**
* Rewrites a vague natural-language query into precise IS-standard keywords. * Rewrites a vague natural-language query into precise IS-standard keywords.
* Falls back to the original query on failure retrieval is never blocked. * Falls back to the original query on failure -- retrieval is never blocked.
* *
* @param {string} query * @param {string} query
* @returns {Promise<string>} * @returns {Promise<string>}
@@ -129,12 +127,12 @@ async function rewriteQuery(query) {
"You are a search query optimizer for the BIS SP-21 building materials standards database. " + "You are a search query optimizer for the BIS SP-21 building materials standards database. " +
"Convert the user's natural-language query into 3-6 precise technical keywords " + "Convert the user's natural-language query into 3-6 precise technical keywords " +
"suitable for searching Indian Standards (IS) documents. " + "suitable for searching Indian Standards (IS) documents. " +
"Output ONLY the keywords separated by spaces no explanation, no punctuation.", "Output ONLY the keywords separated by spaces -- no explanation, no punctuation.",
userMessage: query.trim(), userMessage: query.trim(),
maxTokens: 40, maxTokens: 40,
temperature: 0.1, temperature: 0.1,
}); });
// Sanity check if rewrite is too long or garbled, fall back // Sanity check -- if rewrite is too long or garbled, fall back
const words = rewritten.trim().split(/\s+/); const words = rewritten.trim().split(/\s+/);
if (words.length >= 2 && words.length <= 10) return rewritten.trim(); if (words.length >= 2 && words.length <= 10) return rewritten.trim();
return query; return query;
@@ -143,10 +141,14 @@ async function rewriteQuery(query) {
} }
} }
// ── Helpers ───────────────────────────────────────────────────────────────── /**
* Assembles a compact text block from a standard's fields for use as LLM context.
* Caps each key section at 300 characters to keep token count low.
* @param {{ standard_id: string, title: string, category?: string, summary?: string, key_sections?: object }} standard
* @returns {string}
*/
function buildStandardContext(standard) { function buildStandardContext(standard) {
const parts = [`Standard: ${standard.standard_id} ${standard.title}`]; const parts = [`Standard: ${standard.standard_id} -- ${standard.title}`];
if (standard.category) parts.push(`Category: ${standard.category}`); if (standard.category) parts.push(`Category: ${standard.category}`);
if (standard.summary) parts.push(`Summary: ${standard.summary}`); if (standard.summary) parts.push(`Summary: ${standard.summary}`);
+13 -8
View File
@@ -1,6 +1,6 @@
"use strict"; "use strict";
/** /**
* retrieverService.js persistent Python daemon. * retrieverService.js -- persistent Python daemon.
* *
* Spawns retrieve.py ONCE when the Node server starts. The Python process * Spawns retrieve.py ONCE when the Node server starts. The Python process
* loads the FAISS index and BM25 index once, then serves queries via * loads the FAISS index and BM25 index once, then serves queries via
@@ -17,12 +17,17 @@ const path = require("path");
const readline = require("readline"); const readline = require("readline");
const { EventEmitter } = require("events"); const { EventEmitter } = require("events");
/** @type {string} - Absolute path to bridge/retrieve.py. */
const BRIDGE = path.join(__dirname, "../bridge/retrieve.py"); const BRIDGE = path.join(__dirname, "../bridge/retrieve.py");
/** @type {string} - Repository root, used as cwd for the Python subprocess. */
const ROOT = path.join(__dirname, "../../.."); const ROOT = path.join(__dirname, "../../..");
/** @type {string} - Python executable; override with PYTHON_BIN env var. */
const PYTHON = process.env.PYTHON_BIN || "python"; const PYTHON = process.env.PYTHON_BIN || "python";
const BOOT_TIMEOUT_MS = 90_000; // Python cold-start budget /** @type {number} - Maximum milliseconds to wait for the daemon to signal ready on cold start. */
const QUERY_TIMEOUT_MS = 10_000; // per-query budget once warm const BOOT_TIMEOUT_MS = 90_000;
/** @type {number} - Maximum milliseconds to wait for a single query response once the daemon is warm. */
const QUERY_TIMEOUT_MS = 10_000;
class PythonRetriever extends EventEmitter { class PythonRetriever extends EventEmitter {
constructor() { constructor() {
@@ -40,7 +45,7 @@ class PythonRetriever extends EventEmitter {
this._ready = false; this._ready = false;
this._error = null; this._error = null;
console.log("[retriever] Starting Python daemon (first boot ~20s)"); console.log("[retriever] Starting Python daemon (first boot ~20s)...");
this._proc = spawn(PYTHON, [BRIDGE], { this._proc = spawn(PYTHON, [BRIDGE], {
cwd: ROOT, cwd: ROOT,
@@ -98,11 +103,11 @@ class PythonRetriever extends EventEmitter {
try { msg = JSON.parse(raw); } try { msg = JSON.parse(raw); }
catch { return; } // ignore non-JSON (e.g. sentence-transformers progress bars) catch { return; } // ignore non-JSON (e.g. sentence-transformers progress bars)
// ── Startup handshake ── // Startup handshake: wait for {"ready":true} before flushing the queue.
if (!this._ready) { if (!this._ready) {
if (msg.ready) { if (msg.ready) {
this._ready = true; this._ready = true;
console.log(`[retriever] Ready flushing ${this._queue.length} queued request(s).`); console.log(`[retriever] Ready -- flushing ${this._queue.length} queued request(s).`);
// Send all queued requests in order // Send all queued requests in order
for (const item of this._queue) { for (const item of this._queue) {
this._pending.push(item); this._pending.push(item);
@@ -118,7 +123,7 @@ class PythonRetriever extends EventEmitter {
return; return;
} }
// ── Query response FIFO ── // Query response -- resolve/reject the oldest in-flight request (FIFO).
const item = this._pending.shift(); const item = this._pending.shift();
if (!item) return; if (!item) return;
clearTimeout(item.timer); clearTimeout(item.timer);
@@ -178,7 +183,7 @@ class PythonRetriever extends EventEmitter {
} }
} }
// Singleton one daemon for the lifetime of the Node process // Singleton -- one daemon for the lifetime of the Node process
const retriever = new PythonRetriever(); const retriever = new PythonRetriever();
module.exports = { retrieve: (q, n) => retriever.retrieve(q, n) }; module.exports = { retrieve: (q, n) => retriever.retrieve(q, n) };
+2 -2
View File
@@ -1,5 +1,5 @@
/** /**
* start.js safe server launcher * start.js - safe server launcher
* Kills any process already on PORT before starting index.js. * Kills any process already on PORT before starting index.js.
* Run with: node web/server/start.js * Run with: node web/server/start.js
*/ */
@@ -24,7 +24,7 @@ function killPort(port) {
execSync(`fuser -k ${port}/tcp`, { stdio: "ignore" }); execSync(`fuser -k ${port}/tcp`, { stdio: "ignore" });
} }
} catch { } catch {
// No process on that port fine // No process on that port -- fine
} }
} }