Add API-level validation for generation parameters and guard retriever initialization

Sasyamerugu · Sasyamerugu · commit 1cd4e45f4b88 · 2026-02-23T19:15:17.000+05:30
Invalid generation parameters such as negative top_k were previously
forwarded to the model layer, which could result in runtime errors.

This change adds ge/le constraints to generation parameters in
PromptedLLMRequest so that invalid inputs are rejected with a 422
validation error at the API layer.

Additionally, retriever initialization is now guarded to prevent
startup errors when DOC_PATHS is empty.
diff --git a/app/routes/api.py b/app/routes/api.py
@@ -3,7 +3,7 @@
 """
 from fastapi import APIRouter, Depends, HTTPException, Header, Query, Request
 from sqlalchemy.orm import Session
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field 
 import time
 import logging
 import os
@@ -21,17 +21,47 @@ class ChatMessage(BaseModel):
     content: str
 
 class PromptedLLMRequest(BaseModel):
-    """Request model for ask-llm-prompted endpoint"""
-    chat: bool = Field(False, description="Enable chat mode (uses messages instead of question)")
-    question: Optional[str] = Field(None, description="The question to ask (required if chat=False)")
-    custom_prompt: Optional[str] = Field(None, description="Custom prompt to replace system prompt (required if chat=False)")
-    messages: Optional[List[ChatMessage]] = Field(None, description="List of chat messages (required if chat=True)")
-    max_length: int = Field(1024, description="Maximum length of generated text")
-    truncation: bool = Field(True, description="Whether to truncate input if too long")
-    repetition_penalty: float = Field(1.1, description="Repetition penalty")
-    temperature: float = Field(0.7, description="Temperature for sampling")
-    top_p: float = Field(0.9, description="Top-p (nucleus) sampling parameter")
-    top_k: int = Field(50, description="Top-k sampling parameter")
+    chat: bool = False
+    question: Optional[str] = None
+    custom_prompt: Optional[str] = None
+    messages: Optional[List[ChatMessage]] = None
+
+    max_length: int = Field(
+        1024,
+        ge=1,
+        le=2048,
+        description="Must be between 1 and 2048"
+    )
+
+    truncation: bool = True
+
+    repetition_penalty: float = Field(
+        1.1,
+        ge=1.0,
+        le=1.5,
+        description="Must be between 1.0 and 1.5"
+    )
+
+    temperature: float = Field(
+        0.7,
+        ge=0.0,
+        le=1.0,
+        description="Must be between 0 and 1"
+    )
+
+    top_p: float = Field(
+        0.9,
+        ge=0.0,
+        le=1.0,
+        description="Must be between 0 and 1"
+    )
+
+    top_k: int = Field(
+        50,
+        ge=1,
+        le=100,
+        description="Must be between 1 and 100"
+    )
 
 router = APIRouter(tags=["api"])
 
@@ -40,7 +70,8 @@ class PromptedLLMRequest(BaseModel):
 
 # load ai agent and document paths
 agent = RAGAgent(model=settings.DEFAULT_MODEL)
-agent.retriever = agent.setup_vectorstore(settings.DOC_PATHS)
+if settings.DOC_PATHS:
+    agent.retriever = agent.setup_vectorstore(settings.DOC_PATHS)
 
 # user quotas tracking
 user_quotas: Dict[str, Dict] = {}