duck-duck-go-reworked

Public

Forked from danielsig/duckduckgo

src / toolsProvider.ts

import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";
import { join } from "path";
import { writeFile } from "fs/promises";
import { configSchematics } from "./config";
// import { search, searchImages, SafeSearchType } from "duck-duck-scrape";



export async function toolsProvider(ctl:ToolsProviderController):Promise<Tool[]> {
	const tools: Tool[] = [];
	const COMPACT_INLINE_LIMIT = 2;
	const COMPACT_GALLERY_LIMIT = 6;

	let lastRequestTimestamp = 0;
	const TIME_BETWEEN_REQUESTS = 2000; // 2 seconds
	const waitIfNeeded = () => {
		const timestamp = Date.now();
		const difference = timestamp - lastRequestTimestamp;
		lastRequestTimestamp = timestamp;
		if (difference < TIME_BETWEEN_REQUESTS)
			return new Promise(resolve => setTimeout(resolve, TIME_BETWEEN_REQUESTS - difference));
		return Promise.resolve();
	}
	
	const duckDuckGoWebSearchTool = tool({
		name: "Web Search",
		description: "Search for web pages on DuckDuckGo using a query string and return a list of URLs.",
		parameters: {
			query: z.string().describe("The search query for finding web pages"),
			pageSize: z.number().int().min(1).max(10).optional().describe("Number of web results per page"),
			safeSearch: z.enum(["strict", "moderate", "off"]).optional().describe("Safe Search"),
			page: z.number().int().min(1).max(100).optional().default(1).describe("Page number for pagination"),
		},
		implementation: async ({ query, pageSize, safeSearch, page }, { status, warn, signal }) => {
			status("Initiating DuckDuckGo web search...");
			await waitIfNeeded(); // Wait if needed to avoid rate limiting
			try {
				pageSize = undefinedIfAuto(ctl.getPluginConfig(configSchematics).get("pageSize"), 0)
					?? pageSize
					?? 5;
				safeSearch = undefinedIfAuto(ctl.getPluginConfig(configSchematics).get("safeSearch"), "auto")
					?? safeSearch
					?? "moderate";
				
				// Construct the DuckDuckGo API URL
				const headers = spoofHeaders();
				const url = new URL("https://html.duckduckgo.com/html/");
				url.searchParams.append("q", query);
				if (safeSearch !== "moderate")
					url.searchParams.append("p", safeSearch === "strict" ? "-1" : "1");
				if (page > 1)
					url.searchParams.append("s", ((pageSize * (page - 1)) || 0).toString()); // Start at the appropriate index
				// Perform the fetch request with abort signal
				console.log(`Fetching DuckDuckGo search results for query: ${url.toString() }`);
				const response = await fetch(url.toString(), {
					method: "GET",
					signal,
					headers,
				});
				if (!response.ok) {
					warn(`Failed to fetch search results: ${response.statusText}`);
					return `Error: Failed to fetch search results: ${response.statusText}`;
				}
				const html = await response.text();
				// Extract result links and decode DDG redirect targets when present.
				const links: [string, string][] = [];
				const anchorRegex = /<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gmi;
				let match: RegExpExecArray | null;
				while (links.length < pageSize && (match = anchorRegex.exec(html))) {
					const rawHref = match[1];
					const rawLabel = match[2];
					const normalizedUrl = normalizeSearchResultUrl(rawHref);
					if (!normalizedUrl) {
						continue;
					}
					const label = stripHtml(rawLabel).replace(/\s+/g, " ").trim() || normalizedUrl;
					if (!links.some(([, existingUrl]) => existingUrl === normalizedUrl)) {
						links.push([label, normalizedUrl]);
					}
				}
				if (links.length === 0) {
					return "No web pages found for the query.";
				}
				status(`Found ${links.length} web pages.`);
				return {
					links,
					count: links.length,
				};
			} catch (error: any) {
				if (error instanceof DOMException && error.name === "AbortError") {
					return "Search aborted by user.";
				}
				console.error(error);
				warn(`Error during search: ${error.message}`);
				return `Error: ${error.message}`;
			}
		},
	});

	const duckDuckGoImageSearchTool = tool({
		name: "Image Search",
		description:
			"Searches DuckDuckGo for images, downloads them into the working directory, and returns Markdown that embeds local filenames (thumbnails) plus compactGalleryMarkdown—not a list of remote URLs.",
		parameters: {
			query: z.string().describe("The search query for finding images"),
			pageSize: z.number().int().min(1).max(10).optional().default(10).describe("Number of image results per page"),
			safeSearch: z.enum(["strict", "moderate", "off"]).optional().default("moderate").describe("Safe Search"),
			page: z.number().int().min(1).max(100).optional().default(1).describe("Page number for pagination"),
		},
		implementation: async ({ query, pageSize, safeSearch, page }, { status, warn, signal }) => {
			status("Initiating DuckDuckGo image search...");
			await waitIfNeeded(); // Wait if needed to avoid rate limiting
			try {
				pageSize = undefinedIfAuto(ctl.getPluginConfig(configSchematics).get("pageSize"), 0)
					?? pageSize
					?? 5;
				safeSearch = undefinedIfAuto(ctl.getPluginConfig(configSchematics).get("safeSearch"), "auto")
					?? safeSearch
					?? "moderate";
					
				// Step 1: Fetch the vqd token
				const headers = spoofHeaders("document");
				const initialUrl = new URL("https://duckduckgo.com/");
				initialUrl.searchParams.append("q", query);
				initialUrl.searchParams.append("iax", "images");
				initialUrl.searchParams.append("ia", "images");

				const initialResponse = await fetchWithRetries(initialUrl.toString(), {
					method: "GET",
					signal,
					headers,
				}, [429, 500, 502, 503, 504], 3);

				if (!initialResponse.ok) {
					warn(`Failed to fetch initial response: ${initialResponse.status} ${initialResponse.statusText}`);
					return `Error: Failed to fetch initial response (${initialResponse.status})`;
				}

				const initialHtml = await initialResponse.text();
				const vqd = extractVqdToken(initialHtml);
				if (!vqd) {
					warn("Failed to extract vqd token.");
					status("Image endpoint blocked, trying web fallback...");
					const fallbackDownloaded = await runWebFallbackImageSearch(ctl, query, pageSize, safeSearch, page, signal, warn);
					if (fallbackDownloaded.length > 0) {
						return buildImagePresentationPayload(
							fallbackDownloaded.slice(0, pageSize),
							"web_fallback",
							COMPACT_INLINE_LIMIT,
							COMPACT_GALLERY_LIMIT,
						);
					}
					return {
						error: "blocked_by_waf",
						stage: "vqd",
						try_web_fallback: true,
						message: "Unable to extract vqd token.",
					};
				}

				// Step 2: sleep with jitter to avoid rate limiting
				await delay(800 + Math.floor(Math.random() * 500));

				// Step 3: Fetch image results using the i.js endpoint
				const searchUrl = new URL("https://duckduckgo.com/i.js");
				searchUrl.searchParams.append("q", query);
				searchUrl.searchParams.append("o", "json");
				searchUrl.searchParams.append("l", "us-en"); // Global region
				searchUrl.searchParams.append("vqd", vqd);
				searchUrl.searchParams.append("f", ",,,,,");
				if(safeSearch !== "moderate")
					searchUrl.searchParams.append("p", safeSearch === "strict" ? "-1" : "1");
				if (page > 1)
					searchUrl.searchParams.append("s", ((pageSize * (page - 1)) || 0).toString()); // Start at the appropriate index

				const searchResponse = await fetchWithRetries(searchUrl.toString(), {
					method: "GET",
					signal,
					headers: {
						...spoofHeaders("json"),
						Referer: initialUrl.toString(),
					},
				}, [403, 429, 500, 502, 503, 504], 3);

				if (!searchResponse.ok) {
					warn(`Failed to fetch image results: ${searchResponse.status} ${searchResponse.statusText}`);
					if (isLikelyBlockedStatus(searchResponse.status)) {
						status("Image endpoint blocked, trying web fallback...");
						const fallbackDownloaded = await runWebFallbackImageSearch(ctl, query, pageSize, safeSearch, page, signal, warn);
						if (fallbackDownloaded.length > 0) {
							return buildImagePresentationPayload(
								fallbackDownloaded.slice(0, pageSize),
								"web_fallback",
								COMPACT_INLINE_LIMIT,
								COMPACT_GALLERY_LIMIT,
							);
						}
						return {
							error: "blocked_by_waf",
							stage: "i.js",
							status: searchResponse.status,
							try_web_fallback: true,
							message: "Image endpoint blocked.",
						};
					}
					return `Error: Failed to fetch image results (${searchResponse.status} ${searchResponse.statusText || "unknown"}).`;
				}

				const data = await searchResponse.json();
				const imageResults = data.results || [];
				const imageURLs = imageResults
					.slice(0, pageSize)
					.map((result: any) => selectImageUrl(result))
					.filter((url: string) => typeof url === "string" && /^https?:\/\//i.test(url));

				if (imageURLs.length === 0) {
					return {
						error: "blocked_by_waf",
						stage: "i.js",
						try_web_fallback: true,
						message: "No image URLs returned.",
					};
				}

				status(`Found ${imageURLs.length} images. Fetching...`);

				// const ddgSafeSearch =
				// 	{
				// 		strict: SafeSearchType.STRICT,
				// 		moderate: SafeSearchType.MODERATE,
				// 		off: SafeSearchType.OFF,
				// 	}[safeSearch] || SafeSearchType.MODERATE;
				// const offset = pageSize * (page - 1);

				// const results = await searchImages(query, {
				// 	offset,
				// 	safeSearch: ddgSafeSearch,
				// });
				// const imageURLs = results.results.map(img => img.image);

				const downloadedImageEntries = (await downloadImageUrls(ctl, imageURLs, signal, warn)).slice(0, pageSize);
				if (downloadedImageEntries.length === 0) {
					warn("Error fetching images from source hosts");
					status("Direct image hosts blocked, trying web fallback...");
					const fallbackDownloaded = await runWebFallbackImageSearch(ctl, query, pageSize, safeSearch, page, signal, warn);
					if (fallbackDownloaded.length > 0) {
						return buildImagePresentationPayload(
							fallbackDownloaded.slice(0, pageSize),
							"web_fallback",
							COMPACT_INLINE_LIMIT,
							COMPACT_GALLERY_LIMIT,
						);
					}
					return "Found image candidates, but source websites blocked direct download.";
				}

				status(`Downloaded ${downloadedImageEntries.length} images successfully.`);

				return buildImagePresentationPayload(
					downloadedImageEntries,
					"duckduckgo",
					COMPACT_INLINE_LIMIT,
					COMPACT_GALLERY_LIMIT,
				);
			} catch (error: any) {
				if (error instanceof DOMException && error.name === "AbortError") {
					return "Search aborted by user.";
				}
				console.error(error);
				warn(`Error during search: ${error.message}`);
				return `Error: ${error.message}`;
			}
		},
	});


	tools.push(duckDuckGoWebSearchTool);
	tools.push(duckDuckGoImageSearchTool);
	return tools;
}

const undefinedIfAuto = (value: unknown, autoValue: unknown) =>
	value === autoValue ? undefined : value as undefined;
const THUMB_MAX_WIDTH = 360;
const THUMB_WEBP_QUALITY = 60;

const spoofedUserAgents = [
	// Random spoofed realistic user agents for DuckDuckGo
	"Mozilla/5.0 (Linux; Android 10; SM-M515F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Mobile Safari/537.36",
	"Mozilla/5.0 (Linux; Android 6.0; E5533) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.101 Mobile Safari/537.36",
	"Mozilla/5.0 (Linux; Android 8.1.0; AX1082) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.83 Mobile Safari/537.36",
	"Mozilla/5.0 (Linux; Android 8.1.0; TM-MID1020A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.96 Safari/537.36",
	"Mozilla/5.0 (Linux; Android 9; POT-LX1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Mobile Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3.1 Safari/605.1.15",
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0",
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0",
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36 Edg/97.0.1072.71",
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36",
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36 Edg/98.0.1108.62",
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36",
	"Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
	"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36",
	"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0",
	"Opera/9.80 (Android 7.0; Opera Mini/36.2.2254/119.132; U; id) Presto/2.12.423 Version/12.16",
]

function spoofHeaders(mode: "document" | "json" | "image" = "document", targetUrl?: string){
	const referer = targetUrl
		? (() => {
			try {
				const parsed = new URL(targetUrl);
				return `${parsed.protocol}//${parsed.hostname}/`;
			} catch {
				return "https://duckduckgo.com/";
			}
		})()
		: "https://duckduckgo.com/";
	const baseHeaders = {
		'User-Agent': spoofedUserAgents[Math.floor(Math.random() * spoofedUserAgents.length)],
		'Accept-Language': 'en-US,en;q=0.9',
		'Accept-Encoding': 'gzip, deflate, br',
		'Connection': 'keep-alive',
		'Referer': referer,
		'Origin': 'https://duckduckgo.com',
		'Cache-Control': 'no-cache',
	};
	if (mode === "json") {
		return {
			...baseHeaders,
			'Accept': 'application/json,text/plain,*/*',
			'Sec-Fetch-Dest': 'empty',
			'Sec-Fetch-Mode': 'cors',
			'Sec-Fetch-Site': 'same-origin',
		};
	}
	if (mode === "image") {
		return {
			...baseHeaders,
			'Accept': 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8',
			'Sec-Fetch-Dest': 'image',
			'Sec-Fetch-Mode': 'no-cors',
			'Sec-Fetch-Site': 'cross-site',
		};
	}
	return {
		...baseHeaders,
		'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
		'Upgrade-Insecure-Requests': '1',
		'Sec-Fetch-Dest': 'document',
		'Sec-Fetch-Mode': 'navigate',
		'Sec-Fetch-Site': 'same-origin',
		'Sec-Fetch-User': '?1',
		'Cache-Control': 'max-age=0',
	};
}

async function fetchWithRetries(
	url: string,
	init: RequestInit,
	retryStatusCodes: number[],
	attempts: number,
): Promise<Response> {
	let lastResponse: Response | null = null;
	let lastError: unknown = null;
	for (let attempt = 0; attempt < attempts; attempt++) {
		try {
			const response = await fetch(url, init);
			lastResponse = response;
			if (response.ok || !retryStatusCodes.includes(response.status) || attempt === attempts - 1) {
				return response;
			}
		} catch (error) {
			lastError = error;
			if (init.signal instanceof AbortSignal && init.signal.aborted) {
				throw error;
			}
			if (attempt === attempts - 1) {
				throw error;
			}
		}
		const backoff = 400 * Math.pow(2, attempt) + Math.floor(Math.random() * 300);
		await delay(backoff);
	}
	if (lastResponse) return lastResponse;
	throw lastError ?? new Error("Request failed");
}

function extractVqdToken(html: string): string | null {
	return html.match(/vqd=['"]([^'"]+)['"]/)?.[1] || html.match(/vqd=([\d-]+)/)?.[1] || null;
}

function isLikelyBlockedStatus(statusCode: number): boolean {
	return statusCode === 401 || statusCode === 403 || statusCode === 429 || statusCode === 503;
}

function selectImageUrl(result: any): string | null {
	return result?.image || result?.thumbnail || result?.url || null;
}

function toAbsoluteUrl(rawUrl: string, baseUrl: string): string | null {
	if (!rawUrl) return null;
	try {
		const resolved = new URL(rawUrl, baseUrl).toString();
		return /^https?:\/\//i.test(resolved) ? resolved : null;
	} catch {
		return null;
	}
}

function extractImageCandidatesFromHtml(html: string, pageUrl: string): string[] {
	const candidates: string[] = [];
	const imgTagRegex = /<img[^>]+>/gmi;
	const attr = (tag: string, name: string) => tag.match(new RegExp(`${name}="([^"]+)"`, "i"))?.[1];
	let m: RegExpExecArray | null;
	while ((m = imgTagRegex.exec(html))) {
		const tag = m[0];
		const src = attr(tag, "src")
			|| attr(tag, "data-src")
			|| attr(tag, "data-original")
			|| attr(tag, "srcset")?.split(",")?.[0]?.trim()?.split(/\s+/)?.[0];
		if (!src) continue;
		const resolved = toAbsoluteUrl(src, pageUrl);
		if (resolved) candidates.push(resolved);
	}
	const markdownImageRegex = /!\[[^\]]*\]\((https?:\/\/[^)\s]+)\)/gmi;
	while ((m = markdownImageRegex.exec(html))) {
		const resolved = toAbsoluteUrl(m[1], pageUrl);
		if (resolved) candidates.push(resolved);
	}
	return [...new Set(candidates)];
}

async function searchWebLinks(
	query: string,
	pageSize: number,
	safeSearch: "strict" | "moderate" | "off",
	page: number,
	signal: AbortSignal,
): Promise<string[]> {
	const headers = spoofHeaders("document");
	const url = new URL("https://html.duckduckgo.com/html/");
	url.searchParams.append("q", query);
	if (safeSearch !== "moderate") {
		url.searchParams.append("p", safeSearch === "strict" ? "-1" : "1");
	}
	if (page > 1) {
		url.searchParams.append("s", ((pageSize * (page - 1)) || 0).toString());
	}
	const response = await fetchWithRetries(url.toString(), { method: "GET", signal, headers }, [429, 500, 502, 503, 504], 2);
	if (!response.ok) return [];
	const html = await response.text();
	const links: string[] = [];
	const anchorRegex = /<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gmi;
	let match: RegExpExecArray | null;
	while (links.length < pageSize && (match = anchorRegex.exec(html))) {
		const normalizedUrl = normalizeSearchResultUrl(match[1]);
		if (!normalizedUrl) continue;
		if (!links.includes(normalizedUrl)) {
			links.push(normalizedUrl);
		}
	}
	return links;
}

async function downloadImageUrls(
	ctl: ToolsProviderController,
	imageURLs: string[],
	signal: AbortSignal,
	warn: (message: string) => void,
): Promise<Array<{ fullPath: string; thumbPath: string }>> {
	const workingDirectory = ctl.getWorkingDirectory();
	const timestamp = Date.now();
	const downloadPromises = imageURLs.map(async (url: string, i: number) => {
		const index = i + 1;
		try {
			const imageResponse = await fetchWithRetries(url, {
				method: "GET",
				signal,
				headers: spoofHeaders("image", url),
			}, [429, 500, 502, 503, 504], 2);
			if (!imageResponse.ok) {
				warn(`Failed to fetch image ${index}: ${imageResponse.status} ${imageResponse.statusText}`);
				return null;
			}
			const contentType = imageResponse.headers.get("content-type") || "";
			if (!contentType.toLowerCase().startsWith("image/")) return null;
			const bytes = await imageResponse.bytes();
			if (bytes.length === 0) return null;
			const fileExtension = /image\/([\w]+)/.exec(contentType)?.[1]
				|| /\.([\w]+)(?:\?.*)$/.exec(url)?.[1]
				|| "jpg";
			const fileName = `${timestamp}-${index}.${fileExtension}`;
			const filePath = join(workingDirectory, fileName);
			const localPath = fileName;
			await writeFile(filePath, bytes, "binary");
			const thumbFileName = `${timestamp}-${index}-thumb.webp`;
			const thumbFilePath = join(workingDirectory, thumbFileName);
			const thumbCreated = await createThumbnailWebp(bytes, thumbFilePath);
			const thumbLocalPath = thumbCreated ? thumbFileName : localPath;
			return { fullPath: localPath, thumbPath: thumbLocalPath };
		} catch (error: any) {
			if (error instanceof DOMException && error.name === "AbortError") return null;
			warn(`Error fetching image ${index}: ${error.message}`);
			return null;
		}
	});
	return (await Promise.all(downloadPromises)).filter((x): x is { fullPath: string; thumbPath: string } => !!x);
}

async function runWebFallbackImageSearch(
	ctl: ToolsProviderController,
	query: string,
	pageSize: number,
	safeSearch: "strict" | "moderate" | "off",
	page: number,
	signal: AbortSignal,
	warn: (message: string) => void,
): Promise<Array<{ fullPath: string; thumbPath: string }>> {
	const links = await searchWebLinks(query, Math.max(5, pageSize), safeSearch, page, signal);
	const candidateImageURLs: string[] = [];
	for (const link of links.slice(0, 6)) {
		if (candidateImageURLs.length >= pageSize * 5) break;
		try {
			const pageResponse = await fetchWithRetries(link, {
				method: "GET",
				signal,
				headers: spoofHeaders("document", link),
			}, [429, 500, 502, 503, 504], 2);
			if (!pageResponse.ok) continue;
			const pageHtml = await pageResponse.text();
			candidateImageURLs.push(...extractImageCandidatesFromHtml(pageHtml, link));
		} catch (error: any) {
			if (error instanceof DOMException && error.name === "AbortError") throw error;
			warn(`Fallback page fetch failed: ${error.message}`);
		}
	}
	const uniqueCandidates = [...new Set(candidateImageURLs)].slice(0, pageSize * 8);
	return downloadImageUrls(ctl, uniqueCandidates, signal, warn);
}

function delay(ms: number): Promise<void> {
	return new Promise(resolve => setTimeout(resolve, ms));
}

function normalizeSearchResultUrl(rawHref: string): string | null {
	if (!rawHref || rawHref.startsWith("#") || rawHref.startsWith("javascript:") || rawHref.startsWith("mailto:")) {
		return null;
	}
	const href = rawHref.startsWith("//") ? `https:${rawHref}` : rawHref;
	try {
		const parsed = new URL(href, "https://duckduckgo.com");
		const isDuckDuckGo = parsed.hostname.endsWith("duckduckgo.com");
		if (isDuckDuckGo) {
			const redirected = parsed.searchParams.get("uddg");
			if (redirected) {
				return decodeURIComponent(redirected);
			}
			// Skip DDG internal links so we don't return the homepage as "result".
			return null;
		}
		return parsed.toString();
	} catch {
		return null;
	}
}

function stripHtml(input: string): string {
	return input
		.replace(/<[^>]+>/g, " ")
		.replace(/&nbsp;/g, " ")
		.replace(/&amp;/g, "&")
		.replace(/&quot;/g, "\"")
		.replace(/&#39;/g, "'");
}

function buildImagePresentationPayload(
	localImages: Array<{ fullPath: string; thumbPath: string }>,
	source = "duckduckgo",
	inlineLimit = 2,
	galleryLimit = 6,
) {
	const thumbImages = localImages.map((x, i) => `![Image ${i + 1}](${x.thumbPath})`);
	const hint = "These images are already downloaded and renderable (stored locally). Use compactGalleryMarkdown when you want a compact table gallery; otherwise, please, embed images where they fit with ![](imagefile). The user will not see images unless you embed them. Only standard Markdown (no HTML <img>). Do NOT call Download Images as these are already downloaded local files";
	const result: Record<string, unknown> = {
		count: localImages.length,
		source,
		compactGalleryMarkdown: buildCompactGalleryMarkdown(thumbImages),
		hint,
		thumbnailsUsed: true,
	};
	if (thumbImages.length <= inlineLimit) {
		result.images = thumbImages;
	}
	return result;
}

function buildCompactGalleryMarkdown(imageMarkdowns: string[]): string {
	if (imageMarkdowns.length === 0) return "";
	const rows: string[] = ["| 1 | 2 |", "| --- | --- |"];
	for (let i = 0; i < imageMarkdowns.length; i += 2) {
		const left = imageMarkdowns[i] || "";
		const right = imageMarkdowns[i + 1] || "";
		rows.push(`| ${left} | ${right} |`);
	}
	return rows.join("\n");
}

async function createThumbnailWebp(
	inputBytes: Uint8Array,
	outputPath: string,
): Promise<boolean> {
	try {
		const sharpModule = await import("sharp");
		const sharp = (sharpModule as any).default || sharpModule;
		const outputBytes = await sharp(Buffer.from(inputBytes))
			.resize({ width: THUMB_MAX_WIDTH, fit: "inside", withoutEnlargement: true })
			.webp({ quality: THUMB_WEBP_QUALITY })
			.toBuffer();
		if (!outputBytes?.length) {
			return false;
		}
		await writeFile(outputPath, outputBytes, "binary");
		return true;
	} catch {
		return false;
	}
}