feat: add web server backend.

This commit is contained in:
K
2026-04-28 23:55:41 +05:30
parent 3065a0adce
commit 3a0c32ea8f
8 changed files with 1705 additions and 0 deletions
+164
View File
@@ -0,0 +1,164 @@
"use strict";
/**
* llmService.js
* All Groq LLM calls live here. Three functions:
* generateExplanation(standard) — 2-3 sentence plain-English summary
* answerQuestion(question, chunk) — grounded QA, strict context-only
* rewriteQuery(query) — optional query expansion
*
* Key rules enforced here:
* - GROQ_API_KEY never leaves this file toward the client
* - max_tokens kept short to minimise latency (<400 tokens each)
* - Every function returns a fallback value on failure — callers never throw
*/
const GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions";
const MODEL = "llama-3.1-8b-instant";
// ── Core fetch wrapper ──────────────────────────────────────────────────────
async function _groqCall({ systemPrompt, userMessage, maxTokens = 256, temperature = 0.2 }) {
const key = process.env.GROQ_API_KEY;
if (!key) throw new Error("GROQ_API_KEY not set");
const res = await fetch(GROQ_API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${key}`,
},
body: JSON.stringify({
model: MODEL,
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userMessage },
],
max_tokens: maxTokens,
temperature,
}),
});
if (!res.ok) {
const body = await res.json().catch(() => ({}));
throw new Error(`Groq ${res.status}: ${body?.error?.message || "unknown error"}`);
}
const data = await res.json();
return data.choices?.[0]?.message?.content?.trim() ?? "";
}
// ── 1. generateExplanation ──────────────────────────────────────────────────
/**
* Produces a 2-3 sentence plain-English explanation of a standard.
* Falls back to the standard's own summary on failure — never throws.
*
* @param {{ standard_id: string, title: string, summary?: string, content?: string, key_sections?: object }} standard
* @returns {Promise<string>}
*/
async function generateExplanation(standard) {
const context = buildStandardContext(standard);
try {
return await _groqCall({
systemPrompt:
"You are a technical writer for the Bureau of Indian Standards (BIS). " +
"Explain building material standards in simple English for engineers and contractors. " +
"Use ONLY the provided standard text — do not add, invent, or infer anything not explicitly stated. " +
"Write exactly 2-3 sentences. No bullet points. No headings.",
userMessage:
`Explain this BIS standard in simple terms using ONLY the provided information:\n\n${context}`,
maxTokens: 180,
temperature: 0.2,
});
} catch (err) {
// Graceful fallback — retrieval is unaffected
return standard.summary || standard.title || "";
}
}
// ── 2. answerQuestion ───────────────────────────────────────────────────────
/**
* Answers a user question strictly from a chunk of standard text.
* Returns "Not found in standard" when context doesn't contain the answer.
* Never throws.
*
* @param {string} question
* @param {string} chunkText — raw chunk text from standards_chunks.json
* @returns {Promise<string>}
*/
async function answerQuestion(question, chunkText) {
if (!question?.trim() || !chunkText?.trim()) {
return "Not found in standard";
}
try {
return await _groqCall({
systemPrompt:
"You are a precise technical assistant for BIS Indian Standards. " +
"Answer questions using ONLY the provided context text. " +
"If the answer is not present in the context, respond with exactly: 'Not found in standard'. " +
"Do not speculate. Do not reference other standards not mentioned in the context. " +
"Keep answers under 100 words.",
userMessage:
`CONTEXT:\n${chunkText}\n\nQUESTION: ${question.trim()}`,
maxTokens: 200,
temperature: 0.1,
});
} catch (err) {
return "Not found in standard";
}
}
// ── 3. rewriteQuery (optional) ──────────────────────────────────────────────
/**
* Rewrites a vague natural-language query into precise IS-standard keywords.
* Falls back to the original query on failure — retrieval is never blocked.
*
* @param {string} query
* @returns {Promise<string>}
*/
async function rewriteQuery(query) {
if (!query?.trim()) return query;
try {
const rewritten = await _groqCall({
systemPrompt:
"You are a search query optimizer for the BIS SP-21 building materials standards database. " +
"Convert the user's natural-language query into 3-6 precise technical keywords " +
"suitable for searching Indian Standards (IS) documents. " +
"Output ONLY the keywords separated by spaces — no explanation, no punctuation.",
userMessage: query.trim(),
maxTokens: 40,
temperature: 0.1,
});
// Sanity check — if rewrite is too long or garbled, fall back
const words = rewritten.trim().split(/\s+/);
if (words.length >= 2 && words.length <= 10) return rewritten.trim();
return query;
} catch {
return query;
}
}
// ── Helpers ─────────────────────────────────────────────────────────────────
function buildStandardContext(standard) {
const parts = [`Standard: ${standard.standard_id}${standard.title}`];
if (standard.category) parts.push(`Category: ${standard.category}`);
if (standard.summary) parts.push(`Summary: ${standard.summary}`);
const sections = Object.entries(standard.key_sections || {}).slice(0, 3);
if (sections.length) {
parts.push("Key Sections:");
for (const [name, text] of sections) {
// Cap each section to 300 chars to keep token count low
parts.push(` ${name}: ${text.slice(0, 300)}`);
}
}
return parts.join("\n");
}
module.exports = { generateExplanation, answerQuestion, rewriteQuery };
+184
View File
@@ -0,0 +1,184 @@
"use strict";
/**
* retrieverService.js — persistent Python daemon.
*
* Spawns retrieve.py ONCE when the Node server starts. The Python process
* loads the FAISS index and BM25 index once, then serves queries via
* newline-delimited JSON on stdin/stdout.
*
* First query: ~150ms (index already warm from startup).
* Cold start happens in the background while the server is booting.
*
* inference.py is never modified.
*/
const { spawn } = require("child_process");
const path = require("path");
const readline = require("readline");
const { EventEmitter } = require("events");
const BRIDGE = path.join(__dirname, "../bridge/retrieve.py");
const ROOT = path.join(__dirname, "../../..");
const PYTHON = process.env.PYTHON_BIN || "python";
const BOOT_TIMEOUT_MS = 90_000; // Python cold-start budget
const QUERY_TIMEOUT_MS = 10_000; // per-query budget once warm
class PythonRetriever extends EventEmitter {
constructor() {
super();
this._proc = null;
this._rl = null;
this._ready = false;
this._error = null;
this._queue = []; // requests queued before ready: [{query,top_n,resolve,reject,timer}]
this._pending = []; // in-flight requests sent to Python: [{resolve,reject,timer}]
this._start();
}
_start() {
this._ready = false;
this._error = null;
console.log("[retriever] Starting Python daemon (first boot ~20s)…");
this._proc = spawn(PYTHON, [BRIDGE], {
cwd: ROOT,
env: { ...process.env },
stdio: ["pipe", "pipe", "pipe"],
});
this._rl = readline.createInterface({ input: this._proc.stdout, crlfDelay: Infinity });
this._rl.on("line", (raw) => this._onLine(raw));
this._proc.stderr.on("data", (d) => {
const text = d.toString();
// Suppress routine model-loading noise
if (
text.includes("Loading cached") ||
text.includes("BM25 index") ||
text.includes("Loaded ") ||
text.includes("Building BM25")
) return;
process.stderr.write("[py] " + text);
});
this._proc.on("close", (code) => {
console.warn(`[retriever] Daemon exited (code ${code}), restarting on next query.`);
this._ready = false;
this._proc = null;
this._rl = null;
// Reject anything still in flight
for (const p of [...this._queue, ...this._pending]) {
clearTimeout(p.timer);
p.reject(new Error("Python retriever restarted unexpectedly."));
}
this._queue.length = 0;
this._pending.length = 0;
});
this._proc.on("error", (err) => {
console.error("[retriever] Spawn error:", err.message);
this._error = err.message;
for (const p of [...this._queue, ...this._pending]) {
clearTimeout(p.timer);
p.reject(new Error(`Retriever spawn failed: ${err.message}`));
}
this._queue.length = 0;
this._pending.length = 0;
});
}
_onLine(raw) {
raw = raw.trim();
if (!raw) return;
let msg;
try { msg = JSON.parse(raw); }
catch { return; } // ignore non-JSON (e.g. sentence-transformers progress bars)
// ── Startup handshake ──
if (!this._ready) {
if (msg.ready) {
this._ready = true;
console.log(`[retriever] Ready — flushing ${this._queue.length} queued request(s).`);
// Send all queued requests in order
for (const item of this._queue) {
this._pending.push(item);
this._send(item);
}
this._queue.length = 0;
} else if (msg.error) {
this._error = msg.error;
console.error("[retriever] Init failed:", msg.error);
for (const p of this._queue) { clearTimeout(p.timer); p.reject(new Error(msg.error)); }
this._queue.length = 0;
}
return;
}
// ── Query response — FIFO ──
const item = this._pending.shift();
if (!item) return;
clearTimeout(item.timer);
if (msg.error) {
item.reject(new Error(msg.error));
} else {
item.resolve({
results: msg.results || [],
latency_seconds: msg.latency_seconds ?? 0,
});
}
}
_send(item) {
if (!this._proc || this._proc.killed) return;
this._proc.stdin.write(
JSON.stringify({ query: item.query, top_n: item.top_n }) + "\n"
);
}
/**
* @param {string} query
* @param {number} topN
* @returns {Promise<{ results: Array, latency_seconds: number }>}
*/
retrieve(query, topN = 5) {
// Restart if crashed
if (!this._proc) this._start();
return new Promise((resolve, reject) => {
if (this._error) {
return reject(new Error(this._error));
}
const timeoutMs = this._ready ? QUERY_TIMEOUT_MS : BOOT_TIMEOUT_MS;
const item = { query, top_n: topN, resolve, reject, timer: null };
item.timer = setTimeout(() => {
// Remove from whichever queue it's in
let idx = this._queue.indexOf(item);
if (idx !== -1) this._queue.splice(idx, 1);
idx = this._pending.indexOf(item);
if (idx !== -1) this._pending.splice(idx, 1);
reject(new Error(`Retriever timed out after ${timeoutMs}ms`));
}, timeoutMs);
if (this._ready) {
// Send immediately and wait for response
this._pending.push(item);
this._send(item);
} else {
// Queue until daemon signals ready
this._queue.push(item);
}
});
}
}
// Singleton — one daemon for the lifetime of the Node process
const retriever = new PythonRetriever();
module.exports = { retrieve: (q, n) => retriever.retrieve(q, n) };