src / tools / webTools.ts

/**
 * @file webTools.ts
 * Web tools: DuckDuckGo search, fetch content, Wikipedia.
 */

import { tool, type Tool } from "@lmstudio/sdk";
import { z } from "zod";
import { createSafeToolImplementation } from "./shared";

export function createWebTools(config: { enableWikipedia: boolean }): Tool[] {
  const tools: Tool[] = [];

  tools.push(tool({
    name: "web_search",
    description: "Search the web using DuckDuckGo API.",
    parameters: { query: z.string() },
    implementation: async ({ query }) => {
      const { search, SafeSearchType } = await import("duck-duck-scrape");
      let attempt = 0;
      while (attempt < 2) {
        try {
          const r = await search(query, { safeSearch: SafeSearchType.OFF });
          if (r.results && r.results.length > 0) {
            return {
              results: r.results.slice(0, 5).map((result: any) => ({ title: result.title, link: result.url, snippet: result.description })),
              total_found: r.results.length,
            };
          }
          return { results: [], total_found: 0 };
        } catch (e) {
          attempt++;
          if (attempt >= 2) return { error: `Search failed: ${e instanceof Error ? e.message : String(e)}` };
          await new Promise(r => setTimeout(r, 1000));
        }
      }
      return { error: "Search failed after retries" };
    },
  }));

  tools.push(tool({
    name: "fetch_web_content",
    description: "Fetch the clean, text-based content of a webpage URL.",
    parameters: { url: z.string() },
    implementation: async ({ url }) => {
      try {
        const response = await fetch(url);
        if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`);
        let text = await response.text();
        const result: any = { url, status: response.status };
        const titleMatch = text.match(/<title[^>]*>([^<]+)<\/title>/i);
        if (titleMatch) result.title = titleMatch[1];

        let previousLength;
        do { previousLength = text.length; text = text.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, ""); text = text.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, ""); } while (text.length < previousLength);
        text = text.replace(/<nav\b[^>]*>[\s\S]*?<\/nav>/gi, "").replace(/<footer\b[^>]*>[\s\S]*?<\/footer>/gi, "").replace(/<header\b[^>]*>[\s\S]*?<\/header>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
        text = text.replace(/<\/div>/gi, "\n").replace(/<\/p>/gi, "\n").replace(/<br\s*\/?>/gi, "\n");
        do { previousLength = text.length; text = text.replace(/<[^>]+>/g, ""); } while (text.length < previousLength);
        text = text.replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&nbsp;/g, " ").replace(/&amp;/g, "&");
        text = text.replace(/[ \t]+/g, ' ').replace(/\n\s*\n/g, "\n\n").trim();

        const MAX = 6000;
        result.content = text.substring(0, MAX) + (text.length > MAX ? `\n... (truncated, showing ${MAX} of ${text.length} chars)` : "");
        return result;
      } catch (error) {
        return { error: `Failed to fetch URL: ${error instanceof Error ? error.message : String(error)}` };
      }
    },
  }));

  tools.push(tool({
    name: "wikipedia_search",
    description: "Search Wikipedia for a given query and return page summaries.",
    parameters: {
      query: z.string(),
      lang: z.string().optional().describe("Language code (default: en)"),
    },
    implementation: createSafeToolImplementation(
      async ({ query, lang = "en" }) => {
        try {
          const searchUrl = `https://${lang}.wikipedia.org/w/api.php?action=query&list=search&srsearch=${encodeURIComponent(query)}&format=json`;
          const searchData = await (await fetch(searchUrl)).json();
          if (!searchData.query?.search?.length) return { results: "No Wikipedia articles found." };
          const results = [];
          for (const item of searchData.query.search.slice(0, 3)) {
            const pageUrl = `https://${lang}.wikipedia.org/w/api.php?action=query&prop=extracts&exintro&explaintext&pageids=${item.pageid}&format=json`;
            const pageData = await (await fetch(pageUrl)).json();
            const page = pageData.query.pages[item.pageid];
            results.push({
              title: item.title,
              summary: page.extract.substring(0, 2000) + (page.extract.length > 2000 ? "..." : ""),
              url: `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(item.title.replace(/ /g, "_"))}`,
            });
          }
          return { results };
        } catch (error) {
          return { error: `Wikipedia search failed: ${error instanceof Error ? error.message : String(error)}` };
        }
      },
      config.enableWikipedia,
      "wikipedia_search",
    ),
  }));

  return tools;
}