fix: harden server input validation and prevent information leakage.
This commit is contained in:
@@ -42,11 +42,11 @@ def main():
|
|||||||
try:
|
try:
|
||||||
req = json.loads(raw_line)
|
req = json.loads(raw_line)
|
||||||
query = req.get("query", "")
|
query = req.get("query", "")
|
||||||
top_n = int(req.get("top_n", 5))
|
top_n = max(1, min(int(req.get("top_n", 5)), 20))
|
||||||
results, latency = retriever.retrieve(query, top_n=top_n)
|
results, latency = retriever.retrieve(query, top_n=top_n)
|
||||||
response = {"results": results, "latency_seconds": round(latency, 4)}
|
response = {"results": results, "latency_seconds": round(latency, 4)}
|
||||||
except Exception as exc:
|
except Exception:
|
||||||
response = {"error": str(exc)}
|
response = {"error": "retrieval_failed"}
|
||||||
|
|
||||||
sys.stdout.write(json.dumps(response) + "\n")
|
sys.stdout.write(json.dumps(response) + "\n")
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|||||||
+5
-5
@@ -99,8 +99,8 @@ for (const c of chunks) {
|
|||||||
chunksByStd[c.standard_id].push(c);
|
chunksByStd[c.standard_id].push(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @type {RegExp} - Matches ASCII control characters that should be stripped from user input. */
|
/** @type {RegExp} - Matches ASCII control characters and Unicode BiDi override characters that should be stripped from user input. */
|
||||||
const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g;
|
const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\u202A-\u202E\u2066-\u2069]/g;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Strips control characters and truncates a string to a safe length.
|
* Strips control characters and truncates a string to a safe length.
|
||||||
@@ -114,8 +114,8 @@ function sanitizeText(value, maxLen = 500) {
|
|||||||
return value.replace(CONTROL_CHAR_RE, "").slice(0, maxLen).trim();
|
return value.replace(CONTROL_CHAR_RE, "").slice(0, maxLen).trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @type {RegExp} - Accepts IS standard IDs: letters, digits, spaces, colons, parens, dots, hyphens, slashes. */
|
/** @type {RegExp} - Accepts IS standard IDs: letters, digits, spaces, colons, parens, dots, hyphens. */
|
||||||
const STANDARD_ID_RE = /^[A-Za-z0-9 :()./-]{1,60}$/;
|
const STANDARD_ID_RE = /^[A-Za-z0-9 :().-]{1,60}$/;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the value is a well-formed IS standard identifier.
|
* Returns true if the value is a well-formed IS standard identifier.
|
||||||
@@ -336,7 +336,7 @@ app.post("/api/recommend", async (req, res) => {
|
|||||||
const totalMs = Date.now() - t0;
|
const totalMs = Date.now() - t0;
|
||||||
|
|
||||||
log("POST /api/recommend", {
|
log("POST /api/recommend", {
|
||||||
query: effectiveQuery,
|
query: sanitizeText(effectiveQuery, 200),
|
||||||
results: retrieved.length,
|
results: retrieved.length,
|
||||||
retrieval_ms: retrievalMs,
|
retrieval_ms: retrievalMs,
|
||||||
llm_ms: llmMs,
|
llm_ms: llmMs,
|
||||||
|
|||||||
@@ -21,8 +21,14 @@ const { EventEmitter } = require("events");
|
|||||||
const BRIDGE = path.join(__dirname, "../bridge/retrieve.py");
|
const BRIDGE = path.join(__dirname, "../bridge/retrieve.py");
|
||||||
/** @type {string} - Repository root, used as cwd for the Python subprocess. */
|
/** @type {string} - Repository root, used as cwd for the Python subprocess. */
|
||||||
const ROOT = path.join(__dirname, "../../..");
|
const ROOT = path.join(__dirname, "../../..");
|
||||||
/** @type {string} - Python executable; override with PYTHON_BIN env var. */
|
/** @type {string} - Python executable; override with PYTHON_BIN env var (must be "python", "python3", or an absolute path to a Python interpreter). */
|
||||||
const PYTHON = process.env.PYTHON_BIN || "python";
|
const _pythonRaw = process.env.PYTHON_BIN || "python";
|
||||||
|
const _PYTHON_ALLOWLIST = /^(python[23]?|\/[^\0]+)$/;
|
||||||
|
if (!_PYTHON_ALLOWLIST.test(_pythonRaw)) {
|
||||||
|
console.error(`[retriever] Invalid PYTHON_BIN value: ${JSON.stringify(_pythonRaw)}. Must be "python", "python3", or an absolute path.`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
const PYTHON = _pythonRaw;
|
||||||
|
|
||||||
/** @type {number} - Maximum milliseconds to wait for the daemon to signal ready on cold start. */
|
/** @type {number} - Maximum milliseconds to wait for the daemon to signal ready on cold start. */
|
||||||
const BOOT_TIMEOUT_MS = 90_000;
|
const BOOT_TIMEOUT_MS = 90_000;
|
||||||
@@ -131,9 +137,17 @@ class PythonRetriever extends EventEmitter {
|
|||||||
if (msg.error) {
|
if (msg.error) {
|
||||||
item.reject(new Error(msg.error));
|
item.reject(new Error(msg.error));
|
||||||
} else {
|
} else {
|
||||||
|
const raw = Array.isArray(msg.results) ? msg.results : [];
|
||||||
|
const ALLOWED = new Set(["standard_id", "title", "category", "matched_section", "score"]);
|
||||||
|
const results = raw.map((r) => {
|
||||||
|
if (typeof r !== "object" || r === null) return null;
|
||||||
|
const safe = {};
|
||||||
|
for (const k of ALLOWED) if (k in r) safe[k] = r[k];
|
||||||
|
return safe;
|
||||||
|
}).filter(Boolean);
|
||||||
item.resolve({
|
item.resolve({
|
||||||
results: msg.results || [],
|
results,
|
||||||
latency_seconds: msg.latency_seconds ?? 0,
|
latency_seconds: typeof msg.latency_seconds === "number" ? msg.latency_seconds : 0,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+10
-4
@@ -3,8 +3,14 @@
|
|||||||
* Kills any process already on PORT before starting index.js.
|
* Kills any process already on PORT before starting index.js.
|
||||||
* Run with: node web/server/start.js
|
* Run with: node web/server/start.js
|
||||||
*/
|
*/
|
||||||
const { execSync, spawn } = require("child_process");
|
const { execSync, spawnSync, spawn } = require("child_process");
|
||||||
const PORT = process.env.PORT || 5000;
|
|
||||||
|
const rawPort = process.env.PORT || "5000";
|
||||||
|
const PORT = parseInt(rawPort, 10);
|
||||||
|
if (!Number.isInteger(PORT) || PORT < 1 || PORT > 65535) {
|
||||||
|
console.error(`[start] Invalid PORT value: ${JSON.stringify(rawPort)}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
function killPort(port) {
|
function killPort(port) {
|
||||||
try {
|
try {
|
||||||
@@ -18,10 +24,10 @@ function killPort(port) {
|
|||||||
}
|
}
|
||||||
for (const pid of pids) {
|
for (const pid of pids) {
|
||||||
console.log(`[start] Killing stale process PID ${pid} on port ${port}`);
|
console.log(`[start] Killing stale process PID ${pid} on port ${port}`);
|
||||||
execSync(`taskkill /PID ${pid} /F`, { stdio: "ignore" });
|
spawnSync("taskkill", ["/PID", pid, "/F"], { stdio: "ignore" });
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
execSync(`fuser -k ${port}/tcp`, { stdio: "ignore" });
|
spawnSync("fuser", [`${port}/tcp`, "-k"], { stdio: "ignore" });
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// No process on that port -- fine
|
// No process on that port -- fine
|
||||||
|
|||||||
Reference in New Issue
Block a user