src / media / videoAnalysis.ts

/**
 * @file Video analysis tool — extracts evenly-spaced frames using ffmpeg (absolute path).
 *
 * Returns an array of low-resolution base64 frames for vision model analysis.
 * Requires ffmpeg installed on the system (brew install ffmpeg on macOS).
 */

import { tool, type Tool, type ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";
import { mkdir, readFile, rm, stat } from "fs/promises";
import { extname, isAbsolute, join, resolve } from "path";
import { tmpdir } from "os";
import { randomBytes } from "crypto";
import { execFile } from "child_process";
import { promisify } from "util";
import { getFfmpegPath, getFfprobePath } from "./ffmpegPath";

const execFileAsync = promisify(execFile);

const SUPPORTED_VIDEO_EXTENSIONS = new Set([
  ".mp4", ".mov", ".avi", ".mkv", ".webm", ".m4v",
]);

const BLOCKED_PREFIXES = ["/etc", "/var", "/usr", "/System", "/Library", "/private"];
function isSafePath(p: string): boolean {
  return !BLOCKED_PREFIXES.some(prefix => p.startsWith(prefix));
}

export function createVideoAnalysisTool(ctl: ToolsProviderController, configFrameCount: number = 4, configMaxDim: number = 384): Tool {
  return tool({
    name: "analyze_video",
    description:
      "Extract evenly-spaced frames from a local video file and return them as low-resolution base64 images. " +
      "Supports MP4, MOV, AVI, MKV, WebM. " +
      "Use this to describe video content, detect scenes, or analyze screen recordings.",
    parameters: {
      file_path: z.string().describe("Absolute path to the video file."),
      frame_count: z
        .number()
        .int()
        .min(1)
        .max(10)
        .optional()
        .describe("Number of evenly-spaced frames to extract. Default: 4."),
      max_dimension: z
        .number()
        .int()
        .min(128)
        .max(768)
        .optional()
        .describe("Max width or height of each frame in pixels. Default: 384."),
    },
    implementation: async (
      { file_path, frame_count, max_dimension }: { file_path: string; frame_count?: number; max_dimension?: number },
      { status, warn },
    ) => {
      const numFrames = frame_count ?? configFrameCount;
      const maxDim = Math.min(max_dimension ?? configMaxDim, 768);

      const resolvedPath = isAbsolute(file_path) ? file_path : resolve(file_path);

      if (!isSafePath(resolvedPath)) {
        return { error: `Access denied: '${resolvedPath}' is in a protected system directory.` };
      }

      const ext = extname(resolvedPath).toLowerCase();

      if (!SUPPORTED_VIDEO_EXTENSIONS.has(ext)) {
        return {
          error: `Unsupported video format '${ext}'. Supported: ${[...SUPPORTED_VIDEO_EXTENSIONS].join(", ")}`,
        };
      }

      try {
        const fileStat = await stat(resolvedPath);
        if (!fileStat.isFile()) {
          return { error: `Not a file: ${resolvedPath}` };
        }
      } catch {
        return { error: `File not found: ${resolvedPath}` };
      }

      const tmpDir = join(tmpdir(), `maestro-video-${randomBytes(6).toString("hex")}`);
      await mkdir(tmpDir, { recursive: true });

      try {
        const ffmpeg = await getFfmpegPath();
        const ffprobe = await getFfprobePath();

        // Step 1: Get video duration
        status("Probing video metadata...");
        let duration: number;
        try {
          const { stdout } = await execFileAsync(ffprobe, [
            "-v", "quiet",
            "-print_format", "json",
            "-show_format",
            resolvedPath,
          ]);
          const probeData = JSON.parse(stdout);
          duration = parseFloat(probeData?.format?.duration ?? "0");
        } catch (err: any) {
          return {
            error: `Could not probe video: ${err?.message || String(err)}`,
          };
        }

        if (duration <= 0) {
          return { error: "Could not determine video duration." };
        }

        // Step 2: Calculate timestamps
        const timestamps = Array.from({ length: numFrames }, (_, i) =>
          (duration / (numFrames + 1)) * (i + 1),
        );

        // Step 3: Extract and resize frames directly with ffmpeg
        status(`Extracting ${numFrames} frames from ${duration.toFixed(1)}s video...`);

        const frames = await Promise.all(
          timestamps.map(async (t, i) => {
            const outputPath = join(tmpDir, `frame_${String(i).padStart(3, "0")}.jpg`);
            try {
              await execFileAsync(ffmpeg, [
                "-ss", String(t),
                "-i", resolvedPath,
                "-frames:v", "1",
                "-vf", `scale='min(${maxDim},iw)':'min(${maxDim},ih)':force_original_aspect_ratio=decrease`,
                "-q:v", "8",
                "-y",
                outputPath,
              ]);
              const buffer = await readFile(outputPath);
              return {
                index: i,
                timestamp_s: Math.round(t * 10) / 10,
                data_uri: `data:image/jpeg;base64,${buffer.toString("base64")}`,
                bytes: buffer.byteLength,
              };
            } catch (err: any) {
              warn(`Failed to extract frame at ${t.toFixed(1)}s: ${err?.message || String(err)}`);
              return null;
            }
          }),
        );

        let validFrames = frames.filter((f): f is NonNullable<typeof f> => f !== null);

        if (validFrames.length === 0) {
          return { error: "Failed to extract any frames from the video." };
        }

        // Enforce total byte budget (600KB base64 ≈ ~800 tokens per frame)
        const MAX_TOTAL_BYTES = 600_000;
        const totalBytes = validFrames.reduce((sum, f) => sum + f.bytes, 0);
        if (totalBytes > MAX_TOTAL_BYTES) {
          // Re-extract all frames at lower quality and smaller size
          const smallerDim = Math.round(maxDim * 0.6);
          status(`Frames too large (${(totalBytes / 1024).toFixed(0)}KB), re-compressing at ${smallerDim}px...`);
          const recompressed = await Promise.all(
            timestamps.map(async (t, i) => {
              const outputPath = join(tmpDir, `frame_small_${String(i).padStart(3, "0")}.jpg`);
              try {
                await execFileAsync(ffmpeg, [
                  "-ss", String(t),
                  "-i", resolvedPath,
                  "-frames:v", "1",
                  "-vf", `scale='min(${smallerDim},iw)':'min(${smallerDim},ih)':force_original_aspect_ratio=decrease`,
                  "-q:v", "12",
                  "-y",
                  outputPath,
                ]);
                const buffer = await readFile(outputPath);
                return {
                  index: i,
                  timestamp_s: Math.round(t * 10) / 10,
                  data_uri: `data:image/jpeg;base64,${buffer.toString("base64")}`,
                  bytes: buffer.byteLength,
                };
              } catch {
                return null;
              }
            }),
          );
          const reValid = recompressed.filter((f): f is NonNullable<typeof f> => f !== null);
          if (reValid.length > 0) validFrames = reValid;
        }

        status(`Done — ${validFrames.length} frames extracted`);
        return {
          file_path: resolvedPath,
          duration_s: Math.round(duration * 10) / 10,
          frame_count: validFrames.length,
          max_dimension: maxDim,
          frames: validFrames,
        };
      } catch (err: any) {
        return {
          error: `Failed to process video: ${err?.message || String(err)}`,
          file_path: resolvedPath,
        };
      } finally {
        await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
      }
    },
  });
}