packages / mcp-server / src / sdkServer.ts

import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import type { RagToolHandlerSet } from "../../core/src/runtimeContracts";
import { createDefaultMcpRuntime } from "./defaultRuntime";
import { createMcpToolHandlers } from "./handlers";
import { createLmStudioMcpRuntime } from "./lmstudioRuntime";

const inlineDocumentShape = {
  id: z.string().min(1).describe("Document identifier"),
  name: z.string().min(1).describe("Document name"),
  content: z.string().describe("Full document content"),
  metadata: z.record(z.unknown()).optional().describe("Optional document metadata"),
};

const prechunkedCandidateShape = {
  sourceId: z.string().min(1).describe("Source identifier"),
  sourceName: z.string().min(1).describe("Source display name"),
  content: z.string().min(1).describe("Candidate chunk text"),
  score: z.number().finite().nonnegative().describe("Initial relevance score"),
  metadata: z.record(z.unknown()).optional().describe("Optional chunk metadata"),
};

const groupedOptionsShape = {
  policy: z
    .object({
      groundingMode: z
        .enum(["off", "warn-on-weak-evidence", "require-evidence"])
        .optional(),
      answerabilityGateEnabled: z.boolean().optional(),
      answerabilityGateThreshold: z.number().min(0).max(1).optional(),
      ambiguousQueryBehavior: z
        .enum(["proceed", "ask-for-clarification", "warn"])
        .optional(),
    })
    .optional(),
  routing: z
    .object({
      requestedRoute: z
        .enum(["auto", "no-retrieval", "full-context", "retrieval", "corrective"])
        .optional(),
      fullContextTokenLimit: z.number().int().min(1).optional(),
      activeModelContextTokens: z.number().int().min(1).optional(),
      correctiveEnabled: z.boolean().optional(),
      correctiveMaxAttempts: z.number().int().min(0).max(4).optional(),
    })
    .optional(),
  retrieval: z
    .object({
      multiQueryEnabled: z.boolean().optional(),
      multiQueryCount: z.number().int().min(1).max(8).optional(),
      fusionMethod: z.enum(["reciprocal-rank-fusion", "max-score"]).optional(),
      hybridEnabled: z.boolean().optional(),
      maxCandidates: z.number().int().min(1).max(32).optional(),
      maxEvidenceBlocks: z.number().int().min(1).max(20).optional(),
      minScore: z.number().min(0).max(1).optional(),
      dedupeSimilarityThreshold: z.number().min(0).max(1).optional(),
    })
    .optional(),
  rerank: z
    .object({
      enabled: z.boolean().optional(),
      strategy: z.enum(["heuristic-v1", "heuristic-then-llm"]).optional(),
      topK: z.number().int().min(1).max(20).optional(),
      modelSource: z.enum(["active-chat-model", "auto-detect", "manual-model-id"]).optional(),
      modelId: z.string().min(1).optional(),
    })
    .optional(),
  safety: z
    .object({
      sanitizeRetrievedText: z.boolean().optional(),
      stripInstructionalSpans: z.boolean().optional(),
      requireEvidence: z.boolean().optional(),
    })
    .optional(),
};

const retrievalOverridesShape = {
  multiQueryEnabled: z.boolean().optional(),
  multiQueryCount: z.number().int().min(1).max(8).optional(),
  fusionMethod: z.enum(["reciprocal-rank-fusion", "max-score"]).optional(),
  hybridEnabled: z.boolean().optional(),
  rerankEnabled: z.boolean().optional(),
  rerankTopK: z.number().int().min(1).max(20).optional(),
  rerankModelSource: z.enum(["active-chat-model", "auto-detect", "manual-model-id"]).optional(),
  rerankModelId: z.string().min(1).optional(),
  maxEvidenceBlocks: z.number().int().min(1).max(20).optional(),
};

const corpusInputShape = {
  documents: z.array(z.object(inlineDocumentShape)).optional(),
  paths: z.array(z.string().min(1)).optional(),
  chunks: z.array(z.object(prechunkedCandidateShape)).optional(),
};

export function createOfficialMcpServer(handlers: RagToolHandlerSet) {
  const server = new McpServer({
    name: "rag-v2-mcp",
    version: "0.1.0",
  });
  const registerTool = (server as any).registerTool.bind(server) as any;

  registerTool(
    "rag_answer",
    {
      description:
        "Answer a grounded question over inline documents, filesystem paths, or pre-chunked corpora.",
      inputSchema: {
        query: z.string().min(1).describe("User query to answer"),
        mode: z
          .enum(["auto", "full-context", "retrieval", "corrective"])
          .optional()
          .describe("Preferred retrieval mode"),
        groundingMode: z
          .enum(["off", "warn-on-weak-evidence", "require-evidence"])
          .optional()
          .describe("Legacy grounding alias; prefer options.policy.groundingMode"),
        options: z.object(groupedOptionsShape).optional(),
        retrieval: z
          .object(retrievalOverridesShape)
          .optional()
          .describe("Legacy retrieval alias; prefer options.retrieval / options.rerank"),
        ...corpusInputShape,
      } as any,
    },
    async (args: any) => {
      const result = await handlers.ragAnswer(args);
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(result, null, 2),
          },
        ],
        structuredContent: result,
      };
    }
  );

  registerTool(
    "rag_search",
    {
      description:
        "Search a grounded corpus and return ranked candidate chunks.",
      inputSchema: {
        query: z.string().min(1).describe("Query to retrieve against the corpus"),
        options: z.object(groupedOptionsShape).optional(),
        retrieval: z
          .object(retrievalOverridesShape)
          .optional()
          .describe("Legacy retrieval alias; prefer options.retrieval / options.rerank"),
        ...corpusInputShape,
      } as any,
    },
    async (args: any) => {
      const result = await handlers.ragSearch(args);
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(result, null, 2),
          },
        ],
        structuredContent: result,
      };
    }
  );

  registerTool(
    "rag_prepare_prompt",
    {
      description:
        "Prepare a grounded prompt package without synthesizing the final answer.",
      inputSchema: {
        query: z.string().min(1).describe("User query to ground"),
        mode: z
          .enum(["auto", "full-context", "retrieval", "corrective"])
          .optional()
          .describe("Preferred retrieval mode"),
        groundingMode: z
          .enum(["off", "warn-on-weak-evidence", "require-evidence"])
          .optional()
          .describe("Legacy grounding alias; prefer options.policy.groundingMode"),
        options: z.object(groupedOptionsShape).optional(),
        retrieval: z
          .object(retrievalOverridesShape)
          .optional()
          .describe("Legacy retrieval alias; prefer options.retrieval / options.rerank"),
        ...corpusInputShape,
      } as any,
    },
    async (args: any) => {
      const result = await handlers.ragPreparePrompt(args);
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(result, null, 2),
          },
        ],
        structuredContent: result,
      };
    }
  );

  registerTool(
    "filesystem_browse",
    {
      description:
        "Browse the filesystem without ingesting it as a RAG corpus. Use this to inspect target directories before searching or answering.",
      inputSchema: {
        path: z.string().min(1).describe("Filesystem path to inspect"),
        recursive: z.boolean().optional().describe("Whether to recurse into subdirectories"),
        maxDepth: z.number().int().min(0).max(32).optional().describe("Maximum recursion depth when recursive is enabled"),
        maxEntries: z.number().int().min(1).max(5000).optional().describe("Maximum number of entries to return"),
        includeHidden: z.boolean().optional().describe("Whether to include hidden files and directories"),
      } as any,
    },
    async (args: any) => {
      const result = await handlers.filesystemBrowse(args);
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(result, null, 2),
          },
        ],
        structuredContent: result,
      };
    }
  );

  registerTool(
    "file_info",
    {
      description:
        "Get metadata about a filesystem path without ingesting it as a RAG corpus.",
      inputSchema: {
        path: z.string().min(1).describe("Filesystem path to inspect"),
      } as any,
    },
    async (args: any) => {
      const result = await handlers.fileInfo(args);
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(result, null, 2),
          },
        ],
        structuredContent: result,
      };
    }
  );

  registerTool(
    "read_file",
    {
      description:
        "Read a bounded text excerpt from a file without ingesting it as a RAG corpus.",
      inputSchema: {
        path: z.string().min(1).describe("Filesystem path to a text-like file"),
        startLine: z.number().int().min(0).optional().describe("0-based starting line"),
        maxLines: z.number().int().min(1).max(2000).optional().describe("Maximum lines to read"),
        maxChars: z.number().int().min(1).max(200000).optional().describe("Maximum characters to return"),
      } as any,
    },
    async (args: any) => {
      const result = await handlers.readFile(args);
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(result, null, 2),
          },
        ],
        structuredContent: result,
      };
    }
  );

  registerTool(
    "corpus_inspect",
    {
      description:
        "Inspect a corpus and recommend whether full-context or retrieval is more appropriate.",
      inputSchema: {
        ...corpusInputShape,
      } as any,
    },
    async (args: any) => {
      const result = await handlers.corpusInspect(args);
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(result, null, 2),
          },
        ],
        structuredContent: result,
      };
    }
  );

  registerTool(
    "rerank_only",
    {
      description:
        "Rerank pre-supplied candidate chunks for a query.",
      inputSchema: {
        query: z.string().min(1).describe("Query used for reranking"),
        candidates: z
          .array(z.object(prechunkedCandidateShape))
          .min(1)
          .describe("Candidate chunks to rerank"),
        topK: z.number().int().min(1).max(20).optional(),
      } as any,
    },
    async (args: any) => {
      const result = await handlers.rerankOnly(args);
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify(result, null, 2),
          },
        ],
        structuredContent: result,
      };
    }
  );

  return server;
}

export async function startOfficialStdioMcpServer() {
  const runtimeMode = process.env.RAG_V2_MCP_RUNTIME?.toLowerCase() ?? "default";
  const runtime =
    runtimeMode === "lmstudio"
      ? await createLmStudioMcpRuntime()
      : createDefaultMcpRuntime();
  const handlers = createMcpToolHandlers(runtime);
  const server = createOfficialMcpServer(handlers);
  const transport = new StdioServerTransport();
  await server.connect(transport);
  console.error(`rag-v2 MCP server running on stdio (${runtimeMode} runtime)`);
}