analyze-images

Public
Plugin
Revisions
Back
Project Files
src
index.ts
toolsProvider.ts
manifest.json
package-lock.json
package.json
README.md
tsconfig.json
src / toolsProvider.ts
import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk";
import { readdir, stat } from "fs/promises";
import { basename, dirname, extname, isAbsolute, join, normalize, relative } from "path";
import { z } from "zod";

const IMAGE_EXTENSIONS = new Set([
  ".jpg",
  ".jpeg",
  ".png",
  ".webp",
  ".gif",
  ".bmp",
  ".tiff",
  ".tif",
  ".avif",
]);
const MAX_ANALYSIS_TOKENS_HARD_CAP = 2048;
const ANALYSIS_TOKENS_SOFT_TARGET = 512;

type FoundImage = {
  relativePath: string;
  absolutePath: string;
  sizeBytes: number;
  modifiedAt: string;
};

export async function toolsProvider(ctl: ToolsProviderController): Promise<Tool[]> {
  const analyzeLocalImageTool = tool({
    name: "Analyze Local Image",
    description:
      "Allows you to analyze one local image from the working directory.",
    parameters: {
      imageName: z
        .string()
        .describe("Image file name from List Local Images, e.g. '1774334299591-9-thumb.webp'."),
      prompt: z
        .string()
        .describe("Required analysis task/question. Keep it clear and specific."),
      context: z
        .string()
        .describe("Required known context for this image and task (source, intent, constraints, known facts, prior findings)."),
    },
    implementation: async ({ imageName, prompt, context }, { status, warn }) => {
      const workingDirectory = ctl.getWorkingDirectory();
      const safeImageName = sanitizeRelativeInput(imageName);
      if (!safeImageName) {
        return "Error: imageName is empty or invalid.";
      }

      const resolvedImagePath = await resolveImagePathByName(workingDirectory, safeImageName, warn);
      if (!resolvedImagePath) {
        return `Error: image not found: ${safeImageName}`;
      }

      status("Preparing image for multimodal model...");
      const model = await ctl.client.llm.model();
      if (!model.vision) {
        return "Error: currently loaded model does not support vision. Load a vision model and retry.";
      }

      const fileHandle = await ctl.client.files.prepareImage(resolvedImagePath.absolutePath);
      const userPrompt = prompt.trim();
      const userContext = context.trim();
      const analysisPrompt =
        `You are a vision assistant. Analyze the provided image and give a concise final answer. ` +
        `Do not provide hidden reasoning or step-by-step chain-of-thought. ` +
        `If uncertain, state uncertainty briefly. ` +
        `Target up to ${ANALYSIS_TOKENS_SOFT_TARGET} tokens in the final answer.\n\n` +
        `Known context:\n${userContext}\n\n` +
        `User request:\n${userPrompt}`;
      const effectiveMaxTokens = MAX_ANALYSIS_TOKENS_HARD_CAP;

      status("Running multimodal analysis...");
      const result = await model.respond(
        [
          {
            role: "user",
            content: analysisPrompt,
            images: [fileHandle],
          },
        ],
        {
          maxTokens: effectiveMaxTokens,
        },
      );

      return result.content;
    },
  });

  return [analyzeLocalImageTool];
}

async function collectImages(
  directoryPath: string,
  recursive: boolean,
  maxResults: number,
  warn: (text: string) => void,
): Promise<FoundImage[]> {
  const found: FoundImage[] = [];
  const queue: string[] = [directoryPath];
  const root = directoryPath;

  while (queue.length > 0 && found.length < maxResults) {
    const current = queue.shift() as string;
    let entries: Array<{ name: string; isFile: () => boolean; isDirectory: () => boolean }>;
    try {
      entries = await readdir(current, { withFileTypes: true });
    } catch (error: any) {
      warn(`Cannot read directory '${current}': ${error?.message || String(error)}`);
      continue;
    }

    for (const entry of entries) {
      if (found.length >= maxResults) break;
      const absolutePath = join(current, entry.name);

      if (entry.isDirectory()) {
        if (recursive) {
          queue.push(absolutePath);
        }
        continue;
      }

      if (!entry.isFile()) continue;
      if (!isImagePath(entry.name)) continue;

      try {
        const metadata = await stat(absolutePath);
        found.push({
          absolutePath,
          relativePath: normalize(relative(root, absolutePath)).replace(/\\/g, "/"),
          sizeBytes: metadata.size,
          modifiedAt: metadata.mtime.toISOString(),
        });
      } catch (error: any) {
        warn(`Cannot stat file '${absolutePath}': ${error?.message || String(error)}`);
      }
    }
  }

  return found;
}

function isImagePath(value: string): boolean {
  return IMAGE_EXTENSIONS.has(extname(value).toLowerCase());
}

function sanitizeRelativeInput(input?: string): string | null {
  if (!input) return null;
  const trimmed = input.trim();
  if (!trimmed) return null;
  if (isAbsolute(trimmed)) return null;
  const normalized = normalize(trimmed).replace(/\\/g, "/").replace(/^\.\/+/, "");
  if (!normalized || normalized.startsWith("../") || normalized.includes("/../")) return null;
  return normalized;
}

async function resolveImagePathByName(
  workingDirectory: string,
  imageName: string,
  warn: (text: string) => void,
): Promise<{ absolutePath: string; relativePath: string } | null> {
  // 1) Try working-directory root first: <workingDirectory>/<imageName>
  const directAbsolutePath = join(workingDirectory, imageName);
  const directStats = await stat(directAbsolutePath).catch(() => null);
  if (directStats?.isFile() && isImagePath(imageName)) {
    return await preferFullImageIfThumb(directAbsolutePath, imageName);
  }

  // 2) Fallback: recursive basename match across all images
  const allImages = await collectImages(workingDirectory, true, 1000, warn);
  const targetBasename = basename(imageName).toLowerCase();
  const matched = allImages.find((item) => basename(item.relativePath).toLowerCase() === targetBasename);
  if (!matched) return null;

  return await preferFullImageIfThumb(matched.absolutePath, matched.relativePath);
}

async function preferFullImageIfThumb(
  absolutePath: string,
  relativePath: string,
): Promise<{ absolutePath: string; relativePath: string }> {
  const fileName = basename(relativePath);
  const thumbMatch = fileName.match(/^(.*)-thumb\.webp$/i);
  if (!thumbMatch) {
    return { absolutePath, relativePath };
  }

  const baseNameWithoutThumb = thumbMatch[1];
  const parentDirAbsolute = dirname(absolutePath);
  const parentDirRelative = dirname(relativePath).replace(/\\/g, "/");
  const candidateExtensions = [".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff", ".tif", ".avif"];

  for (const extension of candidateExtensions) {
    const candidateFileName = `${baseNameWithoutThumb}${extension}`;
    const candidateAbsolutePath = join(parentDirAbsolute, candidateFileName);
    const candidateStats = await stat(candidateAbsolutePath).catch(() => null);
    if (!candidateStats?.isFile()) continue;
    if (!isImagePath(candidateFileName)) continue;
    const candidateRelativePath =
      parentDirRelative === "." ? candidateFileName : `${parentDirRelative}/${candidateFileName}`;
    return {
      absolutePath: candidateAbsolutePath,
      relativePath: candidateRelativePath,
    };
  }

  return { absolutePath, relativePath };
}