diff --git a/data/processed/retrieval_results.json b/data/processed/retrieval_results.json index 0018cbb..5ebfff5 100644 --- a/data/processed/retrieval_results.json +++ b/data/processed/retrieval_results.json @@ -46,7 +46,10 @@ "matched_section": "Degree Of Whiteness" } ], - "latency_seconds": 0.0586 + "latency_seconds": 0.024, + "expected_standards": [ + "IS 269: 1989" + ] }, { "id": "PUB-02", @@ -95,7 +98,10 @@ "matched_section": "Scope" } ], - "latency_seconds": 0.0478 + "latency_seconds": 0.0168, + "expected_standards": [ + "IS 383: 1970" + ] }, { "id": "PUB-03", @@ -144,7 +150,10 @@ "matched_section": "Tests" } ], - "latency_seconds": 0.0448 + "latency_seconds": 0.0165, + "expected_standards": [ + "IS 458: 2003" + ] }, { "id": "PUB-04", @@ -193,7 +202,10 @@ "matched_section": "Scope" } ], - "latency_seconds": 0.0452 + "latency_seconds": 0.0161, + "expected_standards": [ + "IS 2185 (Part 2): 1983" + ] }, { "id": "PUB-05", @@ -242,7 +254,10 @@ "matched_section": "Scope" } ], - "latency_seconds": 0.0402 + "latency_seconds": 0.0154, + "expected_standards": [ + "IS 459: 1992" + ] }, { "id": "PUB-06", @@ -291,7 +306,10 @@ "matched_section": "Scope" } ], - "latency_seconds": 0.0361 + "latency_seconds": 0.0152, + "expected_standards": [ + "IS 455: 1989" + ] }, { "id": "PUB-07", @@ -340,7 +358,10 @@ "matched_section": "Physical Requirements" } ], - "latency_seconds": 0.0384 + "latency_seconds": 0.0174, + "expected_standards": [ + "IS 1489 (Part 2): 1991" + ] }, { "id": "PUB-08", @@ -389,7 +410,10 @@ "matched_section": "Classification" } ], - "latency_seconds": 0.0352 + "latency_seconds": 0.0167, + "expected_standards": [ + "IS 3466: 1988" + ] }, { "id": "PUB-09", @@ -438,7 +462,10 @@ "matched_section": "Design And Manufacture" } ], - "latency_seconds": 0.0432 + "latency_seconds": 0.0178, + "expected_standards": [ + "IS 6909: 1990" + ] }, { "id": "PUB-10", @@ -487,6 +514,9 @@ "matched_section": "Delivery" } ], - "latency_seconds": 0.0333 + "latency_seconds": 0.0156, + "expected_standards": [ + "IS 8042: 1989" + ] } ] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d209a8c..d470d43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,5 @@ pymupdf>=1.24.0 +faiss-cpu>=1.7.4 +rank-bm25>=0.2.2 +sentence-transformers>=3.0.0 +numpy>=1.26.0 diff --git a/src/inference.py b/src/inference.py index ca231a6..9ec799a 100644 --- a/src/inference.py +++ b/src/inference.py @@ -267,7 +267,7 @@ class Retriever: "standard_id": sid, "title": std_rec.get("title", std_chunk_repr[sid].get("title", "")), "category": std_rec.get("category", std_chunk_repr[sid].get("category", "")), - "score": round(score, 4), + "score": round(float(score), 4), "matched_section": std_chunk_repr[sid].get("section", ""), }) @@ -303,14 +303,23 @@ def load_or_build(force_rebuild: bool = False) -> tuple[RetrievalIndex, Retrieve # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- -def _format_result(query_id: str, query: str, results: list[dict], latency: float) -> dict: - return { +def _format_result( + query_id: str, + query: str, + results: list[dict], + latency: float, + expected_standards: list[str] | None = None, +) -> dict: + out: dict[str, Any] = { "id": query_id, "query": query, "retrieved_standards": [r["standard_id"] for r in results], "details": results, "latency_seconds": round(latency, 4), } + if expected_standards is not None: + out["expected_standards"] = expected_standards + return out def main() -> None: @@ -348,9 +357,9 @@ def main() -> None: qtext = q.get("query", "") results, latency = retriever.retrieve(qtext) latencies.append(latency) - out = _format_result(qid, qtext, results, latency) - all_results.append(out) expected = q.get("expected_standards", []) + out = _format_result(qid, qtext, results, latency, expected_standards=expected or None) + all_results.append(out) hit = any(r["standard_id"] in expected for r in results) print(f"[{qid}] latency={latency:.3f}s hit={hit} retrieved={[r['standard_id'] for r in results]}")