fix: harden server input validation and prevent information leakage.
This commit is contained in:
@@ -42,11 +42,11 @@ def main():
|
||||
try:
|
||||
req = json.loads(raw_line)
|
||||
query = req.get("query", "")
|
||||
top_n = int(req.get("top_n", 5))
|
||||
top_n = max(1, min(int(req.get("top_n", 5)), 20))
|
||||
results, latency = retriever.retrieve(query, top_n=top_n)
|
||||
response = {"results": results, "latency_seconds": round(latency, 4)}
|
||||
except Exception as exc:
|
||||
response = {"error": str(exc)}
|
||||
except Exception:
|
||||
response = {"error": "retrieval_failed"}
|
||||
|
||||
sys.stdout.write(json.dumps(response) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
+5
-5
@@ -99,8 +99,8 @@ for (const c of chunks) {
|
||||
chunksByStd[c.standard_id].push(c);
|
||||
}
|
||||
|
||||
/** @type {RegExp} - Matches ASCII control characters that should be stripped from user input. */
|
||||
const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g;
|
||||
/** @type {RegExp} - Matches ASCII control characters and Unicode BiDi override characters that should be stripped from user input. */
|
||||
const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\u202A-\u202E\u2066-\u2069]/g;
|
||||
|
||||
/**
|
||||
* Strips control characters and truncates a string to a safe length.
|
||||
@@ -114,8 +114,8 @@ function sanitizeText(value, maxLen = 500) {
|
||||
return value.replace(CONTROL_CHAR_RE, "").slice(0, maxLen).trim();
|
||||
}
|
||||
|
||||
/** @type {RegExp} - Accepts IS standard IDs: letters, digits, spaces, colons, parens, dots, hyphens, slashes. */
|
||||
const STANDARD_ID_RE = /^[A-Za-z0-9 :()./-]{1,60}$/;
|
||||
/** @type {RegExp} - Accepts IS standard IDs: letters, digits, spaces, colons, parens, dots, hyphens. */
|
||||
const STANDARD_ID_RE = /^[A-Za-z0-9 :().-]{1,60}$/;
|
||||
|
||||
/**
|
||||
* Returns true if the value is a well-formed IS standard identifier.
|
||||
@@ -336,7 +336,7 @@ app.post("/api/recommend", async (req, res) => {
|
||||
const totalMs = Date.now() - t0;
|
||||
|
||||
log("POST /api/recommend", {
|
||||
query: effectiveQuery,
|
||||
query: sanitizeText(effectiveQuery, 200),
|
||||
results: retrieved.length,
|
||||
retrieval_ms: retrievalMs,
|
||||
llm_ms: llmMs,
|
||||
|
||||
@@ -21,8 +21,14 @@ const { EventEmitter } = require("events");
|
||||
const BRIDGE = path.join(__dirname, "../bridge/retrieve.py");
|
||||
/** @type {string} - Repository root, used as cwd for the Python subprocess. */
|
||||
const ROOT = path.join(__dirname, "../../..");
|
||||
/** @type {string} - Python executable; override with PYTHON_BIN env var. */
|
||||
const PYTHON = process.env.PYTHON_BIN || "python";
|
||||
/** @type {string} - Python executable; override with PYTHON_BIN env var (must be "python", "python3", or an absolute path to a Python interpreter). */
|
||||
const _pythonRaw = process.env.PYTHON_BIN || "python";
|
||||
const _PYTHON_ALLOWLIST = /^(python[23]?|\/[^\0]+)$/;
|
||||
if (!_PYTHON_ALLOWLIST.test(_pythonRaw)) {
|
||||
console.error(`[retriever] Invalid PYTHON_BIN value: ${JSON.stringify(_pythonRaw)}. Must be "python", "python3", or an absolute path.`);
|
||||
process.exit(1);
|
||||
}
|
||||
const PYTHON = _pythonRaw;
|
||||
|
||||
/** @type {number} - Maximum milliseconds to wait for the daemon to signal ready on cold start. */
|
||||
const BOOT_TIMEOUT_MS = 90_000;
|
||||
@@ -131,9 +137,17 @@ class PythonRetriever extends EventEmitter {
|
||||
if (msg.error) {
|
||||
item.reject(new Error(msg.error));
|
||||
} else {
|
||||
const raw = Array.isArray(msg.results) ? msg.results : [];
|
||||
const ALLOWED = new Set(["standard_id", "title", "category", "matched_section", "score"]);
|
||||
const results = raw.map((r) => {
|
||||
if (typeof r !== "object" || r === null) return null;
|
||||
const safe = {};
|
||||
for (const k of ALLOWED) if (k in r) safe[k] = r[k];
|
||||
return safe;
|
||||
}).filter(Boolean);
|
||||
item.resolve({
|
||||
results: msg.results || [],
|
||||
latency_seconds: msg.latency_seconds ?? 0,
|
||||
results,
|
||||
latency_seconds: typeof msg.latency_seconds === "number" ? msg.latency_seconds : 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
+10
-4
@@ -3,8 +3,14 @@
|
||||
* Kills any process already on PORT before starting index.js.
|
||||
* Run with: node web/server/start.js
|
||||
*/
|
||||
const { execSync, spawn } = require("child_process");
|
||||
const PORT = process.env.PORT || 5000;
|
||||
const { execSync, spawnSync, spawn } = require("child_process");
|
||||
|
||||
const rawPort = process.env.PORT || "5000";
|
||||
const PORT = parseInt(rawPort, 10);
|
||||
if (!Number.isInteger(PORT) || PORT < 1 || PORT > 65535) {
|
||||
console.error(`[start] Invalid PORT value: ${JSON.stringify(rawPort)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
function killPort(port) {
|
||||
try {
|
||||
@@ -18,10 +24,10 @@ function killPort(port) {
|
||||
}
|
||||
for (const pid of pids) {
|
||||
console.log(`[start] Killing stale process PID ${pid} on port ${port}`);
|
||||
execSync(`taskkill /PID ${pid} /F`, { stdio: "ignore" });
|
||||
spawnSync("taskkill", ["/PID", pid, "/F"], { stdio: "ignore" });
|
||||
}
|
||||
} else {
|
||||
execSync(`fuser -k ${port}/tcp`, { stdio: "ignore" });
|
||||
spawnSync("fuser", [`${port}/tcp`, "-k"], { stdio: "ignore" });
|
||||
}
|
||||
} catch {
|
||||
// No process on that port -- fine
|
||||
|
||||
Reference in New Issue
Block a user