src / toolsProvider.ts

/**
 * @file toolsProvider.ts
 * Registers all four tools with LM Studio.
 */

import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";

import { configSchematics } from "./config";
import { runDeepResearch } from "./researcher";
import { ResearchConfig } from "./types";
import { DepthPreset, getDepthProfile } from "./constants";
import { searchDDG } from "./net/ddg";
import { fetchPage } from "./net/http";
import { extractPage, computeRelevance } from "./net/extractor";
import {
  isPdfUrl,
  isPdfContentType,
  extractPdf,
  PdfImage,
} from "./net/pdf-extractor";
import { scoreCandidate, rankCandidates } from "./scoring/authority";
import { sleep } from "./net/http";
import {
  MULTI_READ_BATCH_DELAY_MS,
  CONTENT_LIMIT_MIN,
  CONTENT_LIMIT_MAX,
  CONTENT_LIMIT_EXTENDED,
  CONTENT_LIMIT_DEFAULT,
  SEARCH_RESULTS_MIN,
  SEARCH_RESULTS_MAX,
} from "./constants";

import {
  getGlobalStore,
  LocalCollection,
  LibraryPriority,
  LibraryTag,
} from "./local/store";
import { isLocalUrl } from "./local/search";

function readConfig(ctl: ToolsProviderController) {
  const c = ctl.getPluginConfig(configSchematics);
  const depth = c.get("researchDepth") as string;
  const depthPreset: DepthPreset =
    depth === "shallow"
      ? "shallow"
      : depth === "deep"
        ? "deep"
        : depth === "deeper"
          ? "deeper"
          : depth === "exhaustive"
            ? "exhaustive"
            : "standard";
  return {
    depthPreset,
    contentLimitPerPage:
      (c.get("contentLimitPerPage") as number) ||
      getDepthProfile(depthPreset).defaultContentLimit,
    enableLinkFollowing: (c.get("enableLinkFollowing") as string) !== "off",
    enableAIPlanning: (c.get("enableAIPlanning") as string) !== "off",
    safeSearch:
      (c.get("safeSearch") as "strict" | "moderate" | "off") || "moderate",
    enableLocalSources: (c.get("enableLocalSources") as string) !== "off",
  } as const;
}

export async function toolsProvider(
  ctl: ToolsProviderController,
): Promise<Tool[]> {
  const deepResearchTool = tool({
    name: "Deep Research",
    description: `Performs autonomous, multi-round deep web research using a Kimi-style Agent Swarm with AI-powered synthesis.

HOW IT WORKS:
  1. AI TASK DECOMPOSITION: The loaded model analyses the topic and dynamically creates specialised worker agents with roles. Each worker gets custom queries tailored to its assignment.

  2. PARALLEL SWARM EXECUTION: All workers launch simultaneously:
     • Workers search DuckDuckGo, score candidates by domain authority, fetch pages concurrently
     • Post-fetch RELEVANCE FILTERING discards off-topic pages
     • Multi-window content fingerprinting prevents duplicates
     • Depth and Academic workers follow in-page citations

  3. INTER-AGENT COMMUNICATION: After Round 1, an AI coordinator summarises key findings and suggests follow-up angles for gap-fill workers.

  4. ADAPTIVE GAP-FILL: Coverage gaps are filled by TARGETED workers (e.g., Academic worker for missing evidence, Critical worker for missing controversy).

  5. ADAPTIVE SOURCE COLLECTION: No hard source cap — each worker has its own page budget that scales with depth preset. Collection stops only when: all research dimensions are covered, a round yields zero new sources (stagnation), or all rounds are exhausted.

  6. AI NARRATIVE SYNTHESIS: The loaded model writes a coherent, multi-paragraph research analysis with inline citations.

  7. CONTRADICTION DETECTION: The model identifies claims where sources disagree, with severity ratings.

  8. LOCAL DOCUMENT INTEGRATION: When enabled, each worker searches your indexed RAG libraries BEFORE hitting the web using a PROGRESSIVE SOURCE APPROACH:
     • PROPRIETARY libraries are searched first (highest trust, your confidential data)
     • INTERNAL libraries second (shared team knowledge)
     • REFERENCE libraries third (curated reference materials)
     • GENERAL libraries last (miscellaneous)
     Workers auto-route to the right library by tag: the academic worker searches 'academic'-tagged libraries, the regulatory worker searches 'legal'/'policy' ones, etc.
     Local sources are blended into the final report with [local] origin tags.

WHAT YOU GET:
  A structured Markdown report including:
  - AI-written narrative analysis (primary section)
  - Cross-source contradictions with severity ratings
  - Coverage table (upto 12 research dimensions)
  - Swarm activity summary (sources per worker)
  - Cross-source consensus detection
  - Key findings grouped by dimension (detail layer)
  - Full source details with domain authority, relevance score, and publication date
  - Numbered citation index

USE THIS TOOL for thorough, cited research. Not for simple lookups.
When Local Document Sources is enabled in settings, your indexed RAG libraries are searched progressively (proprietary → internal → reference → general) alongside the web — each worker draws from your most trusted data first, then fills gaps from public sources. Use 'RAG Add Library' to create libraries with priority tiers and auto-routing tags.`,
    parameters: {
      topic: z
        .string()
        .min(3)
        .describe(
          "The research topic or question. Be specific. " +
            "Example: 'long-term safety profile of GLP-1 receptor agonists' rather than just 'weight loss drugs'.",
        ),
      focusAreas: z
        .array(z.string())
        .max(6)
        .optional()
        .describe(
          "Optional sub-topics or angles to emphasise across all worker queries. " +
            "Example: ['side effects', 'clinical trial data', 'FDA approval status']",
        ),
      depthOverride: z
        .enum(["shallow", "standard", "deep", "deeper", "exhaustive"])
        .optional()
        .describe(
          "Override depth for this call only. " +
            "shallow = 1 round (~10-25 sources, fast) · " +
            "standard = 3 rounds (~30-60 sources) · " +
            "deep = 5 rounds (~60-120 sources, thorough) · " +
            "deeper = 10 rounds (~100-200+ sources, very thorough) · " +
            "exhaustive = 15 rounds (200+ sources, maximum depth)",
        ),
      contentLimitOverride: z
        .number()
        .int()
        .min(CONTENT_LIMIT_MIN)
        .max(CONTENT_LIMIT_MAX)
        .optional()
        .describe(
          "Override chars-per-page for this call only. " +
            "Higher = richer context per source but slower overall.",
        ),
    },

    implementation: async (
      { topic, focusAreas, depthOverride, contentLimitOverride },
      { status, warn, signal },
    ) => {
      const cfg = readConfig(ctl);

      const researchCfg: ResearchConfig = {
        topic,
        focusAreas: focusAreas ?? [],
        depthPreset: (depthOverride as DepthPreset) ?? cfg.depthPreset,
        contentLimitPerPage: contentLimitOverride ?? cfg.contentLimitPerPage,
        enableLinkFollowing: cfg.enableLinkFollowing,
        enableAIPlanning: cfg.enableAIPlanning,
        safeSearch: cfg.safeSearch,
        enableLocalSources: cfg.enableLocalSources,
      };

      try {
        const result = await runDeepResearch(researchCfg, status, warn, signal);

        return {
          topic,
          totalRounds: result.totalRounds,
          totalSources: result.totalSources,
          queriesUsed: result.queriesUsed,
          coveredDimensions: result.report.coveredDims,
          gapDimensions: result.report.gapDims,
          hasAISynthesis: !!result.report.aiSynthesis,
          contradictions: result.report.contradictions.length,
          report: result.report.markdown,
          sourceIndex: result.report.sources.map((s) => ({
            index: s.index,
            title: s.title,
            url: s.url,
            published: s.published,
            domainScore: s.domainScore,
            tier: s.tier,
            workerRole: s.workerRole,
            workerLabel: s.workerLabel,
            relevance: Math.round(s.relevanceScore * 100),
            origin: s.origin,
            excerpt: s.description.slice(0, 200),
          })),
        };
      } catch (err: unknown) {
        if (isAbortError(err) || signal.aborted)
          return "Research cancelled by user.";
        const msg = errorMessage(err);
        warn(`Deep research error: ${msg}`);
        return `Error during deep research: ${msg}`;
      }
    },
  });

  const researchSearchTool = tool({
    name: "Research Search",
    description:
      "Search DuckDuckGo and return scored, ranked results with domain authority tiers. " +
      "Each result includes a domain score (0-100), source tier (academic/government/news/etc.), " +
      "URL quality score, and freshness estimate. Results are ranked by combined quality. " +
      "Use this for focused lookups. For full research, use 'Deep Research'." +
      "Don't use this for searching local files.",
    parameters: {
      query: z
        .string()
        .min(2)
        .describe(
          "Search query — use natural language as you would type into a search engine.",
        ),
      maxResults: z
        .number()
        .int()
        .min(SEARCH_RESULTS_MIN)
        .max(SEARCH_RESULTS_MAX)
        .optional()
        .describe("Max results to return (default: 8)."),
    },

    implementation: async ({ query, maxResults }, { status, warn, signal }) => {
      const cfg = readConfig(ctl);
      const max = maxResults ?? 8;

      status(`Searching: "${query}"`);

      try {
        const hits = await searchDDG(query, max, cfg.safeSearch, signal);
        const scored = hits.map((h) => scoreCandidate(h, query));
        const ranked = rankCandidates(scored, max);

        status(`Found ${ranked.length} ranked results.`);

        return ranked.map((c, i) => ({
          rank: i + 1,
          url: c.url,
          title: c.title,
          snippet: c.snippet,
          domainScore: c.domainScore,
          freshnessScore: c.freshnessScore,
          urlQuality: c.urlQuality,
          totalScore: c.totalScore,
          tier: c.tier,
        }));
      } catch (err: unknown) {
        if (isAbortError(err) || signal.aborted) return "Search cancelled.";
        const msg = errorMessage(err);
        warn(`Search error: ${msg}`);
        return `Error during search: ${msg}`;
      }
    },
  });

  const researchReadPageTool = tool({
    name: "Research Read Page",
    description:
      "Visit a website URL and return cleanly extracted text using Mozilla Readability " +
      "(the same engine as Firefox Reader Mode). " +
      "Automatically detects PDF URLs (arXiv, Springer, IEEE, etc.) and extracts " +
      "text content and embedded images from the PDF instead of returning garbled bytes. " +
      "Also returns: title, description, detected publication date, word count, " +
      "domain authority score, source tier, and top outbound links. " +
      "For PDFs, embedded images are saved to temp files and returned as file paths " +
      "with dimensions and size metadata (not inline base64). " +
      "Use this to read individual pages. For reading multiple URLs at once use 'Research Multi-Read'." +
      "with dimensions and size metadata (not inline base64). " +
      "Don't use this for reading local files.",
    parameters: {
      url: z.string().url().describe("The URL to visit and read."),
      contentLimit: z
        .number()
        .int()
        .min(CONTENT_LIMIT_MIN)
        .max(CONTENT_LIMIT_EXTENDED)
        .optional()
        .describe(
          "Maximum characters to extract from the page " +
            "(default: plugin content-per-page setting).",
        ),
    },

    implementation: async ({ url, contentLimit }, { status, warn, signal }) => {
      const cfg = readConfig(ctl);
      const limit = contentLimit ?? cfg.contentLimitPerPage;

      status(`Reading: ${url}`);

      try {
        const fetchResult = await fetchPage(url, signal);
        const { finalUrl } = fetchResult;

        const isPdf =
          (fetchResult.rawBuffer &&
            isPdfContentType(fetchResult.contentType)) ||
          (!fetchResult.rawBuffer && isPdfUrl(url));

        let page: ReturnType<typeof extractPage> & {
          images?: ReadonlyArray<PdfImage>;
        };
        let images: ReadonlyArray<PdfImage> = [];

        if (isPdf && fetchResult.rawBuffer) {
          status("Found PDF — extracting contents");
          const pdfResult = await extractPdf(
            fetchResult.rawBuffer,
            url,
            finalUrl,
            limit,
            true,
            20,
          );
          page = pdfResult;
          images = pdfResult.images;
        } else if (
          isPdf &&
          fetchResult.html &&
          fetchResult.html.startsWith("%PDF")
        ) {
          status("Found PDF — extracting contents");
          const buf = Buffer.from(fetchResult.html, "binary");
          const pdfResult = await extractPdf(
            buf,
            url,
            finalUrl,
            limit,
            true,
            20,
          );
          page = pdfResult;
          images = pdfResult.images;
        } else {
          page = extractPage(fetchResult.html, url, finalUrl, limit);
        }

        const scored = scoreCandidate(
          { url, title: page.title, snippet: page.description },
          "",
        );

        status(
          images.length > 0
            ? `Page read successfully. Extracted ${images.length} image(s).`
            : "Page read successfully.",
        );

        const result: Record<string, unknown> = {
          url: page.finalUrl,
          title: page.title,
          description: page.description,
          published: page.published,
          wordCount: page.wordCount,
          domainScore: scored.domainScore,
          tier: scored.tier,
          content: page.text,
          topLinks: page.outlinks.slice(0, 10).map((l) => ({
            text: l.text,
            href: l.href,
          })),
        };

        if (images.length > 0) {
          result.images = images.map((img, idx) => ({
            index: idx + 1,
            page: img.page,
            format: img.format,
            width: img.width,
            height: img.height,
            sizeKB: Math.round(img.byteSize / 1024),
            filePath: img.filePath,
          }));
          result.imageCount = images.length;

          const imageNote = images
            .map(
              (img, idx) =>
                `[Image ${idx + 1} on page ${img.page}: ${img.width}×${img.height}, ${Math.round(img.byteSize / 1024)} KB — saved to ${img.filePath}]`,
            )
            .join("\n");
          result.content =
            (result.content as string) +
            "\n\n--- Extracted Images ---\n" +
            imageNote;
        }

        return result;
      } catch (err: unknown) {
        if (isAbortError(err) || signal.aborted) return "Page read cancelled.";
        const msg = errorMessage(err);
        warn(`Read error: ${msg}`);
        return `Error reading page: ${msg}`;
      }
    },
  });

  const researchMultiReadTool = tool({
    name: "Research Multi-Read",
    description:
      "Fetch up to 10 URLs concurrently (3 at a time) and return extracted text " +
      "and metadata for all of them. Automatically handles PDF URLs — extracts " +
      "clean text instead of returning garbled binary data. Returns domain authority " +
      "score, publication date, and word count per page. " +
      "Use this when you already have a list of URLs and want to read them all " +
      "at once without running a full deep research session.",
    parameters: {
      urls: z
        .array(z.string().url())
        .min(1)
        .max(10)
        .describe("List of URLs to read (1-10)."),
      contentLimit: z
        .number()
        .int()
        .min(CONTENT_LIMIT_MIN)
        .max(CONTENT_LIMIT_EXTENDED)
        .optional()
        .describe(
          "Maximum characters to extract per page " +
            "(default: plugin content-per-page setting).",
        ),
    },

    implementation: async (
      { urls, contentLimit },
      { status, warn, signal },
    ) => {
      const cfg = readConfig(ctl);
      const limit = contentLimit ?? cfg.contentLimitPerPage;

      status(`Reading ${urls.length} page(s) — 3 at a time…`);

      const CONCURRENCY = 3;
      const results: Array<{
        index: number;
        url: string;
        title: string;
        published: string | null;
        wordCount: number;
        domainScore: number;
        tier: string;
        content: string;
        error: string | null;
      }> = [];

      for (let i = 0; i < urls.length; i += CONCURRENCY) {
        if (signal.aborted) break;

        const batch = urls.slice(i, i + CONCURRENCY);
        const settled = await Promise.allSettled(
          batch.map(async (url, bi) => {
            const fetchResult = await fetchPage(url, signal);
            const { finalUrl } = fetchResult;

            const isPdf =
              (fetchResult.rawBuffer &&
                isPdfContentType(fetchResult.contentType)) ||
              (!fetchResult.rawBuffer && isPdfUrl(url));

            let page;
            if (isPdf && fetchResult.rawBuffer) {
              page = await extractPdf(
                fetchResult.rawBuffer,
                url,
                finalUrl,
                limit,
                false,
              );
            } else if (
              isPdf &&
              fetchResult.html &&
              fetchResult.html.startsWith("%PDF")
            ) {
              const buf = Buffer.from(fetchResult.html, "binary");
              page = await extractPdf(buf, url, finalUrl, limit, false);
            } else {
              page = extractPage(fetchResult.html, url, finalUrl, limit);
            }

            const scored = scoreCandidate(
              { url, title: page.title, snippet: page.description },
              "",
            );
            return {
              index: i + bi + 1,
              url: page.finalUrl,
              title: page.title,
              published: page.published,
              wordCount: page.wordCount,
              domainScore: scored.domainScore,
              tier: scored.tier,
              content: page.text,
              error: null as string | null,
            };
          }),
        );

        for (let bi = 0; bi < settled.length; bi++) {
          const outcome = settled[bi];
          if (outcome.status === "fulfilled") {
            results.push(outcome.value);
          } else {
            const msg = errorMessage(outcome.reason);
            if (!isAbortError(outcome.reason)) {
              warn(`Failed to read ${batch[bi]}: ${msg}`);
            }
            results.push({
              index: i + bi + 1,
              url: batch[bi],
              title: "",
              published: null,
              wordCount: 0,
              domainScore: 0,
              tier: "general",
              content: "",
              error: msg,
            });
          }
        }

        if (i + CONCURRENCY < urls.length)
          await sleep(MULTI_READ_BATCH_DELAY_MS);
      }

      const succeeded = results.filter((r) => r.error === null).length;
      status(`Done: ${succeeded}/${urls.length} pages read successfully.`);

      if (succeeded === 0) {
        return "All page reads failed. Verify the URLs are publicly accessible.";
      }

      return results;
    },
  });

  const ragAddLibraryTool = tool({
    name: "RAG Add Library",
    description:
      "Index a local folder into a searchable RAG library with priority and tag metadata. " +
      "Multiple libraries can coexist — like GPT4All's multi-library model. " +
      "Each library has a priority tier (proprietary > internal > reference > general) " +
      "and tags for automatic worker routing (e.g. 'legal', 'academic', 'technical'). " +
      "When Deep Research runs with local sources enabled, workers search the right " +
      "libraries based on their role: the academic worker prefers 'academic'-tagged libraries, " +
      "the regulatory worker prefers 'legal'/'policy'-tagged ones, etc. " +
      "Supports 30+ file types: text, markdown, HTML, code, CSV, JSON, XML, Jupyter notebooks, and more. " +
      "Re-indexing a folder that was already indexed replaces the old library. " +
      "For backward compatibility, 'Local Docs Add Collection' still works as an alias.",
    parameters: {
      name: z
        .string()
        .min(1)
        .max(100)
        .describe(
          "A descriptive name for this library, e.g. 'Company Policies', " +
            "'Research Papers', 'Client Reports'. Used in search results and reports.",
        ),
      folderPath: z
        .string()
        .min(1)
        .describe(
          "Absolute path to the folder containing your documents. " +
            "All supported files in subdirectories will be included.",
        ),
      priority: z
        .enum(["proprietary", "internal", "reference", "general"])
        .optional()
        .describe(
          "Priority tier for progressive source retrieval. " +
            "proprietary = searched first, highest trust (your own confidential data). " +
            "internal = second priority (shared team knowledge). " +
            "reference = third priority (curated reference materials). " +
            "general = lowest priority (miscellaneous). " +
            "Default: general.",
        ),
      tags: z
        .array(
          z.enum([
            "legal",
            "academic",
            "technical",
            "financial",
            "medical",
            "policy",
            "reports",
            "code",
            "general",
          ]),
        )
        .optional()
        .describe(
          "Tags for automatic worker routing. Workers search matching libraries first. " +
            "Examples: ['legal'] for contracts/policies, ['academic', 'technical'] for papers, " +
            "['financial', 'reports'] for financial data. Default: ['general'].",
        ),
      description: z
        .string()
        .max(500)
        .optional()
        .describe("Optional description of what this library contains."),
    },

    implementation: async (
      { name, folderPath, priority, tags, description },
      { status },
    ) => {
      try {
        const store = getGlobalStore();
        const cfg = readConfig(ctl);
        const library = await store.indexLibrary(
          name,
          folderPath,
          description ?? "",
          (priority as LibraryPriority) ?? "general",
          (tags as LibraryTag[]) ?? ["general"],
          cfg.contentLimitPerPage,
          status,
        );
        return {
          success: true,
          library: {
            id: library.id,
            name: library.name,
            folderPath: library.folderPath,
            description: library.description,
            priority: library.priority,
            tags: library.tags,
            fileCount: library.fileCount,
            chunkCount: library.chunkCount,
            totalWords: library.totalWords,
            indexedAt: library.indexedAt,
            fileTypes: summariseFileTypes(library.files),
          },
          instructions:
            "Library indexed. Enable 'Local Document Sources' in plugin settings " +
            "to include these documents in Deep Research results. " +
            `Priority: ${library.priority} — ` +
            (library.priority === "proprietary"
              ? "will be searched first, before all other sources."
              : library.priority === "internal"
                ? "will be searched after proprietary libraries."
                : "will be searched alongside other libraries."),
        };
      } catch (err: unknown) {
        return `Error indexing library: ${errorMessage(err)}`;
      }
    },
  });

  const localDocsAddTool = tool({
    name: "Local Docs Add Collection",
    description:
      "Index a local folder into a searchable collection (alias for 'RAG Add Library'). " +
      "For advanced features like priority tiers and tags, use 'RAG Add Library' instead.",
    parameters: {
      name: z.string().min(1).max(100).describe("Collection name."),
      folderPath: z.string().min(1).describe("Absolute path to folder."),
    },
    implementation: async ({ name, folderPath }, { status }) => {
      try {
        const store = getGlobalStore();
        const cfg = readConfig(ctl);
        const library = await store.indexCollection(
          name,
          folderPath,
          cfg.contentLimitPerPage,
          status,
        );
        return {
          success: true,
          collection: {
            id: library.id,
            name: library.name,
            folderPath: library.folderPath,
            fileCount: library.fileCount,
            chunkCount: library.chunkCount,
            totalWords: library.totalWords,
            indexedAt: library.indexedAt,
          },
          tip: "For priority tiers and auto-routing tags, use 'RAG Add Library' instead.",
        };
      } catch (err: unknown) {
        return `Error indexing collection: ${errorMessage(err)}`;
      }
    },
  });

  const ragListLibrariesTool = tool({
    name: "RAG List Libraries",
    description:
      "List all indexed RAG libraries with their metadata, stats, and priority tiers. " +
      "Shows library name, priority, tags, folder path, file counts, chunk counts, " +
      "word totals, and file type breakdown. Libraries are sorted by priority.",
    parameters: {},

    implementation: async () => {
      const store = getGlobalStore();
      const libraries = store.getLibraries();

      if (libraries.length === 0) {
        return {
          libraries: [],
          message:
            "No libraries indexed yet. Use 'RAG Add Library' to index a folder.",
        };
      }

      return {
        libraries: libraries.map((lib) => ({
          id: lib.id,
          name: lib.name,
          folderPath: lib.folderPath,
          description: lib.description,
          priority: lib.priority,
          tags: lib.tags,
          fileCount: lib.fileCount,
          chunkCount: lib.chunkCount,
          totalWords: lib.totalWords,
          indexedAt: lib.indexedAt,
          fileTypes: summariseFileTypes(lib.files),
        })),
        stats: store.getStats(),
      };
    },
  });

  const localDocsListTool = tool({
    name: "Local Docs List Collections",
    description:
      "List all indexed collections (alias for 'RAG List Libraries').",
    parameters: {},
    implementation: async () => {
      const store = getGlobalStore();
      const libraries = store.getLibraries();
      if (libraries.length === 0) {
        return { collections: [], message: "No collections indexed." };
      }
      return {
        collections: libraries.map((c) => ({
          id: c.id,
          name: c.name,
          folderPath: c.folderPath,
          priority: c.priority,
          tags: c.tags,
          fileCount: c.fileCount,
          chunkCount: c.chunkCount,
          totalWords: c.totalWords,
          indexedAt: c.indexedAt,
        })),
        stats: store.getStats(),
      };
    },
  });

  const ragRemoveLibraryTool = tool({
    name: "RAG Remove Library",
    description:
      "Remove an indexed RAG library by its ID. " +
      "Use 'RAG List Libraries' first to find the library ID.",
    parameters: {
      libraryId: z
        .string()
        .uuid()
        .describe("The UUID of the library to remove."),
    },

    implementation: async ({ libraryId }, { status }) => {
      const store = getGlobalStore();
      const library = store.getLibrary(libraryId);

      if (!library) {
        return `Library not found: ${libraryId}`;
      }

      const name = library.name;
      const removed = store.removeLibrary(libraryId);

      if (removed) {
        status(`Removed library "${name}"`);
        return {
          success: true,
          removedLibrary: name,
          remainingLibraries: store.getLibraries().length,
        };
      }

      return "Failed to remove library.";
    },
  });

  const localDocsRemoveTool = tool({
    name: "Local Docs Remove Collection",
    description:
      "Remove an indexed collection (alias for 'RAG Remove Library').",
    parameters: {
      collectionId: z.string().uuid().describe("Collection UUID."),
    },
    implementation: async ({ collectionId }, { status }) => {
      const store = getGlobalStore();
      const library = store.getCollection(collectionId);
      if (!library) return `Collection not found: ${collectionId}`;
      const name = library.name;
      const removed = store.removeCollection(collectionId);
      if (removed) {
        status(`Removed collection "${name}"`);
        return {
          success: true,
          removedCollection: name,
          remainingCollections: store.getCollections().length,
        };
      }
      return "Failed to remove collection.";
    },
  });

  const ragSearchTool = tool({
    name: "RAG Search",
    description:
      "Search across your indexed RAG libraries using BM25 + fuzzy n-gram hybrid scoring. " +
      "Returns the most relevant chunks ranked by relevance with context windows " +
      "(text from surrounding chunks for richer understanding). " +
      "Supports progressive mode: searches proprietary libraries first, then internal, " +
      "then reference, then general — stopping early when enough results are found. " +
      "For full research that blends local and web sources, use 'Deep Research' with Local Document Sources enabled.",
    parameters: {
      query: z
        .string()
        .min(1)
        .describe(
          "Search query — natural language works best. Use '*' to list all chunks.",
        ),
      maxResults: z
        .number()
        .int()
        .min(1)
        .max(30)
        .optional()
        .describe("Maximum results to return (default: 8)."),
      libraryId: z
        .string()
        .uuid()
        .optional()
        .describe("Optional: limit search to a specific library by its ID."),
      progressive: z
        .boolean()
        .optional()
        .describe(
          "Use progressive search (default: true). Searches libraries in priority order: " +
            "proprietary → internal → reference → general. Set to false to search all at once.",
        ),
      includeContext: z
        .boolean()
        .optional()
        .describe(
          "Include surrounding chunk text for richer context (default: true). " +
            "Adds ~200 chars before and after each matched chunk.",
        ),
    },

    implementation: async (
      { query, maxResults, libraryId, progressive, includeContext },
      { status },
    ) => {
      const store = getGlobalStore();

      if (!store.hasLibraries()) {
        return "No libraries indexed. Use 'RAG Add Library' first.";
      }

      const max = maxResults ?? 8;
      const isWildcard = query.trim() === "*";
      const useProgressive = progressive !== false && !libraryId;

      status(
        isWildcard
          ? "Listing all document chunks…"
          : `Searching RAG libraries: "${query}"${useProgressive ? " (progressive)" : ""}`,
      );

      let hits;
      if (isWildcard) {
        const targetIds = libraryId ? [libraryId] : undefined;
        hits = store.listAll(max, targetIds);
      } else if (useProgressive) {
        hits = store.searchProgressive(query, max);
      } else {
        const targetIds = libraryId ? [libraryId] : undefined;
        hits = store.search(query, max, targetIds);
      }

      if (hits.length === 0) {
        return {
          results: [],
          message: "No relevant documents found for this query.",
        };
      }

      status(
        `Found ${hits.length} relevant chunks across ${new Set(hits.map((h) => h.libraryName)).size} library(ies).`,
      );

      const showContext = includeContext !== false;

      return hits.map((h, i) => {
        const result: Record<string, unknown> = {
          rank: i + 1,
          library: h.libraryName,
          priority: h.libraryPriority,
          file: h.fileRelPath || h.fileName,
          fileType: h.fileType,
          heading: h.heading || undefined,
          score: Math.round(h.score * 1000) / 1000,
          bm25Score: Math.round(h.bm25Score * 1000) / 1000,
          wordCount: h.wordCount,
          chunkPosition: `${h.chunkIndex + 1} of ${h.totalChunks}`,
          content: h.text,
        };

        if (showContext) {
          if (h.contextBefore) result.contextBefore = h.contextBefore;
          if (h.contextAfter) result.contextAfter = h.contextAfter;
        }

        return result;
      });
    },
  });

  const localDocsSearchTool = tool({
    name: "Local Docs Search",
    description: "Search local documents (alias for 'RAG Search').",
    parameters: {
      query: z.string().min(1).describe("Search query. Use '*' to list all."),
      maxResults: z
        .number()
        .int()
        .min(1)
        .max(20)
        .optional()
        .describe("Max results (default: 8)."),
      collectionId: z
        .string()
        .uuid()
        .optional()
        .describe("Limit to a specific collection."),
    },
    implementation: async ({ query, maxResults, collectionId }, { status }) => {
      const store = getGlobalStore();
      if (!store.hasCollections())
        return "No collections indexed. Use 'Local Docs Add Collection' first.";
      const max = maxResults ?? 8;
      const targetIds = collectionId ? [collectionId] : undefined;
      const isWildcard = query.trim() === "*";
      status(
        isWildcard ? "Listing all document chunks…" : `Searching: "${query}"`,
      );
      const hits = isWildcard
        ? store.listAll(max, targetIds)
        : store.search(query, max, targetIds);
      if (hits.length === 0)
        return { results: [], message: "No relevant documents found." };
      status(`Found ${hits.length} relevant chunks.`);
      return hits.map((h, i) => ({
        rank: i + 1,
        collection: h.libraryName,
        file: h.fileName,
        priority: h.libraryPriority,
        score: Math.round(h.score * 1000) / 1000,
        wordCount: h.wordCount,
        content: h.text,
      }));
    },
  });

  const ragUpdateLibraryTool = tool({
    name: "RAG Update Library",
    description:
      "Update a library's metadata (name, description, priority, tags) without re-indexing. " +
      "Use this to change a library's priority tier or add/remove tags.",
    parameters: {
      libraryId: z
        .string()
        .uuid()
        .describe("The UUID of the library to update."),
      name: z.string().min(1).max(100).optional().describe("New name."),
      description: z.string().max(500).optional().describe("New description."),
      priority: z
        .enum(["proprietary", "internal", "reference", "general"])
        .optional()
        .describe("New priority tier."),
      tags: z
        .array(
          z.enum([
            "legal",
            "academic",
            "technical",
            "financial",
            "medical",
            "policy",
            "reports",
            "code",
            "general",
          ]),
        )
        .optional()
        .describe("New tags for worker routing."),
    },

    implementation: async (
      { libraryId, name, description, priority, tags },
      { status },
    ) => {
      const store = getGlobalStore();
      const updated = store.updateLibraryMeta(libraryId, {
        name,
        description,
        priority: priority as LibraryPriority | undefined,
        tags: tags as LibraryTag[] | undefined,
      });

      if (!updated) return `Library not found: ${libraryId}`;

      status(`Updated library "${updated.name}"`);
      return {
        success: true,
        library: {
          id: updated.id,
          name: updated.name,
          description: updated.description,
          priority: updated.priority,
          tags: updated.tags,
        },
      };
    },
  });

  const ragCheckChangesTool = tool({
    name: "RAG Check Changes",
    description:
      "Check if files in a library have changed since indexing. " +
      "Shows modified, deleted, and newly added files. " +
      "If changes are found, you can re-index with 'RAG Add Library'.",
    parameters: {
      libraryId: z
        .string()
        .uuid()
        .describe("The UUID of the library to check."),
    },

    implementation: async ({ libraryId }) => {
      const store = getGlobalStore();
      const library = store.getLibrary(libraryId);
      if (!library) return `Library not found: ${libraryId}`;

      const changes = store.checkForChanges(libraryId);
      const hasChanges =
        changes.modified.length > 0 ||
        changes.deleted.length > 0 ||
        changes.added.length > 0;

      return {
        library: library.name,
        hasChanges,
        modified: changes.modified,
        deleted: changes.deleted,
        added: changes.added,
        suggestion: hasChanges
          ? `Re-index with: RAG Add Library(name="${library.name}", folderPath="${library.folderPath}", ` +
            `priority="${library.priority}", tags=${JSON.stringify(library.tags)})`
          : "Library is up to date — no re-indexing needed.",
      };
    },
  });

  const ragSaveIndexTool = tool({
    name: "RAG Save Index",
    description:
      "Save the current RAG index to disk so libraries persist across sessions. " +
      "Saves all libraries, chunks, and metadata to a JSON file. " +
      "Load it later with 'RAG Load Index' to avoid re-indexing.",
    parameters: {
      filePath: z
        .string()
        .min(1)
        .describe(
          "Path to save the index file, e.g. '~/.lmstudio/rag-index.json'. " +
            "Parent directories are created automatically.",
        ),
    },

    implementation: async ({ filePath }, { status }) => {
      try {
        const store = getGlobalStore();
        const resolvedPath = filePath.replace(/^~/, process.env.HOME || "~");
        store.saveIndex(resolvedPath);
        const stats = store.getStats();
        status(`RAG index saved to ${resolvedPath}`);
        return {
          success: true,
          path: resolvedPath,
          stats: {
            libraries: stats.libraries,
            totalChunks: stats.totalChunks,
            totalWords: stats.totalWords,
          },
        };
      } catch (err: unknown) {
        return `Error saving index: ${errorMessage(err)}`;
      }
    },
  });

  const ragLoadIndexTool = tool({
    name: "RAG Load Index",
    description:
      "Load a previously saved RAG index from disk. " +
      "Restores all libraries and chunks without re-scanning files. " +
      "Libraries whose folders no longer exist are skipped.",
    parameters: {
      filePath: z.string().min(1).describe("Path to the saved index file."),
    },

    implementation: async ({ filePath }, { status }) => {
      try {
        const store = getGlobalStore();
        const resolvedPath = filePath.replace(/^~/, process.env.HOME || "~");
        const result = store.loadIndex(resolvedPath);
        const stats = store.getStats();
        status(
          `Loaded ${result.loaded} library(ies)` +
            (result.skipped > 0
              ? `, skipped ${result.skipped} (missing folders)`
              : ""),
        );
        return {
          success: true,
          loaded: result.loaded,
          skipped: result.skipped,
          stats: {
            libraries: stats.libraries,
            totalChunks: stats.totalChunks,
            totalWords: stats.totalWords,
          },
        };
      } catch (err: unknown) {
        return `Error loading index: ${errorMessage(err)}`;
      }
    },
  });

  return [
    deepResearchTool,
    researchSearchTool,
    researchReadPageTool,
    researchMultiReadTool,
    ragAddLibraryTool,
    ragListLibrariesTool,
    ragRemoveLibraryTool,
    ragSearchTool,
    ragUpdateLibraryTool,
    ragCheckChangesTool,
    ragSaveIndexTool,
    ragLoadIndexTool,
    localDocsAddTool,
    localDocsListTool,
    localDocsRemoveTool,
    localDocsSearchTool,
  ];
}

function summariseFileTypes(
  files: ReadonlyArray<{ fileType: string }>,
): Record<string, number> {
  const counts: Record<string, number> = {};
  for (const f of files) {
    const ext = f.fileType || "unknown";
    counts[ext] = (counts[ext] ?? 0) + 1;
  }
  return counts;
}

function isAbortError(err: unknown): boolean {
  return err instanceof DOMException && err.name === "AbortError";
}

function errorMessage(err: unknown): string {
  return err instanceof Error ? err.message : String(err ?? "unknown error");
}