docs: add JSDoc and normalize comments across server.

2026-05-03 00:16:42 +05:30
parent 33fe20021a
commit f88a45968a
5 changed files with 127 additions and 84 deletions
@@ -11,10 +11,11 @@ const { generateExplanation, answerQuestion, rewriteQuery } = require("./service
 const { retrieve } = require("./services/retrieverService");

 const app  = express();
+
+/** @type {number} - HTTP port, defaults to 5000. */
 const PORT = process.env.PORT || 5000;

-// ── Startup checks ──────────────────────────────────────────────────────────
-
+// Warn early when the Groq key is absent so AI degradation is visible at boot.
 if (!process.env.GROQ_API_KEY) {
  console.warn(
    "[WARN] GROQ_API_KEY is not set. AI features will return fallback values.\n" +
@@ -22,12 +23,12 @@ if (!process.env.GROQ_API_KEY) {
  );
 }

-// ── Security headers ─────────────────────────────────────────────────────────
-
 app.use(helmet());

-// ── CORS — restrict to configured origin or localhost dev ────────────────────
-
+/**
+ * @type {string[]} - Allowed CORS origins; reads CORS_ORIGIN env var (comma-separated) or
+ *   falls back to localhost dev/preview ports.
+ */
 const ALLOWED_ORIGINS = process.env.CORS_ORIGIN
  ? process.env.CORS_ORIGIN.split(",").map((o) => o.trim())
  : ["http://localhost:5173", "http://localhost:4173", `http://localhost:${PORT}`];
@@ -42,8 +43,7 @@ app.use(cors({
  allowedHeaders: ["Content-Type"],
 }));

-// ── Rate limiting ─────────────────────────────────────────────────────────────
-
+/** @type {import('express-rate-limit').RateLimitRequestHandler} - 60 req/min applied to all /api/ routes. */
 const apiLimiter = rateLimit({
  windowMs: 60 * 1000,
  max: 60,
@@ -52,6 +52,7 @@ const apiLimiter = rateLimit({
  message: { error: "Too many requests. Please wait a moment and try again." },
 });

+/** @type {import('express-rate-limit').RateLimitRequestHandler} - 20 req/min applied to LLM-backed endpoints. */
 const llmLimiter = rateLimit({
  windowMs: 60 * 1000,
  max: 20,
@@ -67,8 +68,7 @@ app.use("/api/chat",      llmLimiter);

 app.use(express.json({ limit: "16kb" }));

-// ── Load data ───────────────────────────────────────────────────────────────
-
+/** @type {string} - Absolute path to the processed data directory. */
 const DATA_DIR = path.join(__dirname, "../../data/processed");

 let standards = [];
@@ -84,7 +84,7 @@ try {

 // Pre-build lookups
 const standardsById  = {};
-const chunksByStd    = {};   // standard_id → [chunk, …]
+const chunksByStd    = {};   // standard_id: [chunk, ...]
 const byCategory     = {};
 const categories     = new Set();

@@ -99,34 +99,59 @@ for (const c of chunks) {
  chunksByStd[c.standard_id].push(c);
 }

-// ── Input sanitization ────────────────────────────────────────────────────────
-
+/** @type {RegExp} - Matches ASCII control characters that should be stripped from user input. */
 const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g;

+/**
+ * Strips control characters and truncates a string to a safe length.
+ * Returns an empty string if the value is not a string.
+ * @param {*} value
+ * @param {number} [maxLen=500]
+ * @returns {string}
+ */
 function sanitizeText(value, maxLen = 500) {
  if (typeof value !== "string") return "";
  return value.replace(CONTROL_CHAR_RE, "").slice(0, maxLen).trim();
 }

-// standard_id must match IS identifier pattern: letters/digits/spaces/colons/parens/dots/hyphens
+/** @type {RegExp} - Accepts IS standard IDs: letters, digits, spaces, colons, parens, dots, hyphens, slashes. */
 const STANDARD_ID_RE = /^[A-Za-z0-9 :()./-]{1,60}$/;
+
+/**
+ * Returns true if the value is a well-formed IS standard identifier.
+ * @param {*} id
+ * @returns {boolean}
+ */
 function isValidStandardId(id) {
  return typeof id === "string" && STANDARD_ID_RE.test(id.trim());
 }

-// ── Structured logger ───────────────────────────────────────────────────────
-
+/**
+ * Writes a structured JSON log line to stdout with a UTC timestamp.
+ * @param {string} endpoint - Route label, e.g. "POST /api/recommend".
+ * @param {object} data - Arbitrary key/value pairs to include in the log entry.
+ */
 function log(endpoint, data) {
  const ts = new Date().toISOString();
  console.log(`[${ts}] ${endpoint} |`, JSON.stringify(data));
 }

-// ── Keyword-based search helper (unchanged from original) ───────────────────
-
+/**
+ * Normalises a string to lowercase alphanumeric tokens for keyword matching.
+ * @param {string} str
+ * @returns {string}
+ */
 function normalize(str) {
  return str.toLowerCase().replace(/[^a-z0-9]/g, " ").replace(/\s+/g, " ").trim();
 }

+/**
+ * Scores a standard against a query using weighted keyword matching across id, title,
+ * keywords, summary, and category fields.
+ * @param {object} standard - A standards.json record.
+ * @param {string} query - Raw search query string.
+ * @returns {number} Relevance score; higher is more relevant.
+ */
 function scoreStandard(standard, query) {
  const q       = normalize(query);
  const qTokens = q.split(" ").filter(Boolean);
@@ -150,8 +175,13 @@ function scoreStandard(standard, query) {
  return s;
 }

-// ── Best chunk selector ─────────────────────────────────────────────────────
-
+/**
+ * Returns the chunk from a standard that best matches the given question via token overlap.
+ * Falls back to the first chunk if no tokens produce a positive score.
+ * @param {string} standardId - IS standard identifier key into chunksByStd.
+ * @param {string} question - User question used for token matching.
+ * @returns {{ text: string, section: string, chunk_id: string, standard_id: string } | null}
+ */
 function bestChunk(standardId, question) {
  const stdChunks = chunksByStd[standardId] || [];
  if (!stdChunks.length) return null;
@@ -168,11 +198,13 @@ function bestChunk(standardId, question) {
  return best;
 }

-// ═══════════════════════════════════════════════════════════════════════════
 // Routes
-// ═══════════════════════════════════════════════════════════════════════════

-// ── GET /api/standards ──────────────────────────────────────────────────────
+/**
+ * GET /api/standards
+ * Returns a paginated, optionally filtered and keyword-scored list of standards.
+ * Query params: q (search string), category, page, limit.
+ */
 app.get("/api/standards", (req, res) => {
  const q        = sanitizeText(req.query.q || "", 200);
  const category = sanitizeText(req.query.category || "", 100);
@@ -197,7 +229,10 @@ app.get("/api/standards", (req, res) => {
  res.json({ data: paginated, meta: { total, page: pageNum, limit: limitNum, totalPages } });
 });

-// ── GET /api/standards/:id ──────────────────────────────────────────────────
+/**
+ * GET /api/standards/:id
+ * Returns a single standard by its IS identifier; 404 if not found, 400 if the id is malformed.
+ */
 app.get("/api/standards/:id", (req, res) => {
  const raw = decodeURIComponent(req.params.id);
  if (!isValidStandardId(raw)) {
@@ -208,7 +243,10 @@ app.get("/api/standards/:id", (req, res) => {
  res.json(standard);
 });

-// ── GET /api/categories ─────────────────────────────────────────────────────
+/**
+ * GET /api/categories
+ * Returns all categories sorted alphabetically, each with its standard count.
+ */
 app.get("/api/categories", (req, res) => {
  const result = [...categories].sort().map((cat) => ({
    name:  cat,
@@ -217,7 +255,10 @@ app.get("/api/categories", (req, res) => {
  res.json(result);
 });

-// ── GET /api/stats ──────────────────────────────────────────────────────────
+/**
+ * GET /api/stats
+ * Returns aggregate counts of standards, categories, and chunks loaded in memory.
+ */
 app.get("/api/stats", (req, res) => {
  res.json({
    totalStandards:  standards.length,
@@ -226,16 +267,13 @@ app.get("/api/stats", (req, res) => {
  });
 });

-// ── POST /api/recommend ─────────────────────────────────────────────────────
 /**
- * Input:  { query: string, top_n?: number, rewrite?: boolean }
- * Flow:
- *   1. Optionally rewrite query with LLM (parallel, non-blocking on failure)
- *   2. Call Python inference.py via bridge (retrieval logic untouched)
- *   3. Enrich each result with LLM explanation (Promise.allSettled — no blocking)
- *   4. Return standards + explanations + timing breakdown
+ * POST /api/recommend
+ * Hybrid retrieval endpoint: optionally rewrites the query, calls the Python daemon,
+ * then attaches parallel LLM explanations to each result.
 *
- * Output: { standards, latency: { retrieval_ms, llm_ms, total_ms } }
+ * @param {{ query: string, top_n?: number, rewrite?: boolean }} req.body
+ * @returns {{ query: string, standards: Array, latency: { retrieval_ms: number, llm_ms: number, total_ms: number } }}
 */
 app.post("/api/recommend", async (req, res) => {
  const rawQuery = req.body?.query;
@@ -249,13 +287,13 @@ app.post("/api/recommend", async (req, res) => {

  const t0 = Date.now();

-  // Step 1 — Optional query rewrite (fires concurrently, falls back silently)
+  // Step 1 - Optional query rewrite (fires concurrently, falls back silently)
  let effectiveQuery = query;
  if (rewrite && process.env.GROQ_API_KEY) {
    effectiveQuery = await rewriteQuery(query.trim()); // never throws
  }

-  // Step 2 — Python retrieval (inference.py untouched)
+  // Step 2 - Python retrieval (inference.py untouched)
  let retrievalResult;
  const tRetStart = Date.now();
  try {
@@ -268,7 +306,7 @@ app.post("/api/recommend", async (req, res) => {

  const { results: retrieved, latency_seconds: pyLatency } = retrievalResult;

-  // Step 3 — LLM explanations fired in parallel (allSettled — never blocks on failure)
+  // Step 3 - LLM explanations fired in parallel (allSettled - never blocks on failure)
  const tLlmStart = Date.now();
  const explanationJobs = retrieved.map((r) => {
    const std = standardsById[r.standard_id];
@@ -281,7 +319,7 @@ app.post("/api/recommend", async (req, res) => {
  const explanations = await Promise.all(explanationJobs);
  const llmMs = Date.now() - tLlmStart;

-  // Step 4 — Assemble response
+  // Step 4 - Assemble response
  const standardsOut = retrieved.map((r, i) => {
    const std = standardsById[r.standard_id] || {};
    return {
@@ -316,15 +354,12 @@ app.post("/api/recommend", async (req, res) => {
  });
 });

-// ── POST /api/ask ───────────────────────────────────────────────────────────
 /**
- * Input:  { question: string, standard_id: string }
- * Flow:
- *   1. Find best matching chunk for the question within the standard
- *   2. Pass chunk text to answerQuestion() — strictly grounded
- *   3. Return answer + chunk source info
+ * POST /api/ask
+ * Answers a question grounded in the best-matching chunk of a specific standard.
 *
- * Output: { answer, source: { standard_id, section, chunk_id } }
+ * @param {{ question: string, standard_id: string }} req.body
+ * @returns {{ answer: string, source: { standard_id: string, section: string, chunk_id: string }, latency: object }}
 */
 app.post("/api/ask", async (req, res) => {
  const question    = sanitizeText(req.body?.question, 500);
@@ -367,10 +402,12 @@ app.post("/api/ask", async (req, res) => {
  });
 });

-// ── POST /api/chat ──────────────────────────────────────────────────────────
 /**
- * Conversational QA grounded in a standard's full text.
- * Uses answerQuestion() from llmService — key never leaves server.
+ * POST /api/chat
+ * Conversational QA grounded in a standard's full text; 503 if GROQ_API_KEY is absent.
+ *
+ * @param {{ question: string, standard_id?: string }} req.body
+ * @returns {{ answer: string }}
 */
 app.post("/api/chat", async (req, res) => {
  if (!process.env.GROQ_API_KEY) {
@@ -408,7 +445,6 @@ app.post("/api/chat", async (req, res) => {
  res.json({ answer });
 });

-// ── Start ───────────────────────────────────────────────────────────────────
 const server = app.listen(PORT, () => {
  console.log(`[init] BIS API running on http://localhost:${PORT}`);
 });