fix: complete requirements.txt and inference.py output correctness.

- Add faiss-cpu, rank-bm25, sentence-transformers, numpy to requirements.txt. (previously only pymupdf was listed; other deps were manual-install only) - Cast score to float() before round() to avoid numpy type serialization errors. - Pass expected_standards through _format_result for eval script compatibility. - Update retrieval_results.json with expected_standards per query for eval.
2026-05-03 00:03:03 +05:30
parent 8e1348fb63
commit 29b32dfcac
3 changed files with 58 additions and 15 deletions
@@ -46,7 +46,10 @@
        "matched_section": "Degree Of Whiteness"
      }
    ],
-    "latency_seconds": 0.0586
+    "latency_seconds": 0.024,
+    "expected_standards": [
+      "IS 269: 1989"
+    ]
  },
  {
    "id": "PUB-02",
@@ -95,7 +98,10 @@
        "matched_section": "Scope"
      }
    ],
-    "latency_seconds": 0.0478
+    "latency_seconds": 0.0168,
+    "expected_standards": [
+      "IS 383: 1970"
+    ]
  },
  {
    "id": "PUB-03",
@@ -144,7 +150,10 @@
        "matched_section": "Tests"
      }
    ],
-    "latency_seconds": 0.0448
+    "latency_seconds": 0.0165,
+    "expected_standards": [
+      "IS 458: 2003"
+    ]
  },
  {
    "id": "PUB-04",
@@ -193,7 +202,10 @@
        "matched_section": "Scope"
      }
    ],
-    "latency_seconds": 0.0452
+    "latency_seconds": 0.0161,
+    "expected_standards": [
+      "IS 2185 (Part 2): 1983"
+    ]
  },
  {
    "id": "PUB-05",
@@ -242,7 +254,10 @@
        "matched_section": "Scope"
      }
    ],
-    "latency_seconds": 0.0402
+    "latency_seconds": 0.0154,
+    "expected_standards": [
+      "IS 459: 1992"
+    ]
  },
  {
    "id": "PUB-06",
@@ -291,7 +306,10 @@
        "matched_section": "Scope"
      }
    ],
-    "latency_seconds": 0.0361
+    "latency_seconds": 0.0152,
+    "expected_standards": [
+      "IS 455: 1989"
+    ]
  },
  {
    "id": "PUB-07",
@@ -340,7 +358,10 @@
        "matched_section": "Physical Requirements"
      }
    ],
-    "latency_seconds": 0.0384
+    "latency_seconds": 0.0174,
+    "expected_standards": [
+      "IS 1489 (Part 2): 1991"
+    ]
  },
  {
    "id": "PUB-08",
@@ -389,7 +410,10 @@
        "matched_section": "Classification"
      }
    ],
-    "latency_seconds": 0.0352
+    "latency_seconds": 0.0167,
+    "expected_standards": [
+      "IS 3466: 1988"
+    ]
  },
  {
    "id": "PUB-09",
@@ -438,7 +462,10 @@
        "matched_section": "Design And Manufacture"
      }
    ],
-    "latency_seconds": 0.0432
+    "latency_seconds": 0.0178,
+    "expected_standards": [
+      "IS 6909: 1990"
+    ]
  },
  {
    "id": "PUB-10",
@@ -487,6 +514,9 @@
        "matched_section": "Delivery"
      }
    ],
-    "latency_seconds": 0.0333
+    "latency_seconds": 0.0156,
+    "expected_standards": [
+      "IS 8042: 1989"
+    ]
  }
 ]
@@ -1 +1,5 @@
 pymupdf>=1.24.0
+faiss-cpu>=1.7.4
+rank-bm25>=0.2.2
+sentence-transformers>=3.0.0
+numpy>=1.26.0
@@ -267,7 +267,7 @@ class Retriever:
                "standard_id": sid,
                "title": std_rec.get("title", std_chunk_repr[sid].get("title", "")),
                "category": std_rec.get("category", std_chunk_repr[sid].get("category", "")),
-                "score": round(score, 4),
+                "score": round(float(score), 4),
                "matched_section": std_chunk_repr[sid].get("section", ""),
            })

@@ -303,14 +303,23 @@ def load_or_build(force_rebuild: bool = False) -> tuple[RetrievalIndex, Retrieve
 # ---------------------------------------------------------------------------
 # CLI
 # ---------------------------------------------------------------------------
-def _format_result(query_id: str, query: str, results: list[dict], latency: float) -> dict:
-    return {
+def _format_result(
+    query_id: str,
+    query: str,
+    results: list[dict],
+    latency: float,
+    expected_standards: list[str] | None = None,
+) -> dict:
+    out: dict[str, Any] = {
        "id": query_id,
        "query": query,
        "retrieved_standards": [r["standard_id"] for r in results],
        "details": results,
        "latency_seconds": round(latency, 4),
    }
+    if expected_standards is not None:
+        out["expected_standards"] = expected_standards
+    return out


 def main() -> None:
@@ -348,9 +357,9 @@ def main() -> None:
            qtext = q.get("query", "")
            results, latency = retriever.retrieve(qtext)
            latencies.append(latency)
-            out = _format_result(qid, qtext, results, latency)
-            all_results.append(out)
            expected = q.get("expected_standards", [])
+            out = _format_result(qid, qtext, results, latency, expected_standards=expected or None)
+            all_results.append(out)
            hit = any(r["standard_id"] in expected for r in results)
            print(f"[{qid}] latency={latency:.3f}s  hit={hit}  retrieved={[r['standard_id'] for r in results]}")