src / ripgrep.ts

import { existsSync } from "fs"
import * as path from "path"
import * as childProcess from "child_process"
import * as readline from "readline"

const isWindows = /^win/.test(process.platform)
const binName = isWindows ? "rg.exe" : "rg"

interface SearchResult {
  filePath: string
  line: number
  column: number
  match: string
  beforeContext: string[]
  afterContext: string[]
}

const MAX_RESULTS = 300

export async function getBinPath(cwd: string): Promise<string | undefined> {
  const checkPath = async (pkgFolder: string) => {
    const fullPath = path.join(cwd, pkgFolder, binName)
    return existsSync(fullPath) ? fullPath : undefined
  }
  return (
    (await checkPath("node_modules/@vscode/ripgrep/bin/")) ||
    (await checkPath("node_modules/vscode-ripgrep/bin"))
  )
}

async function execRipgrep(bin: string, args: string[]): Promise<string> {
  return new Promise((resolve, reject) => {
    const rgProcess = childProcess.spawn(bin, args)
    const rl = readline.createInterface({ input: rgProcess.stdout, crlfDelay: Infinity })
    let output = ""
    let lineCount = 0
    const maxLines = MAX_RESULTS * 5

    rl.on("line", (line) => {
      if (lineCount < maxLines) {
        output += line + "\n"
        lineCount++
      } else {
        rl.close()
        rgProcess.kill()
      }
    })

    let errorOutput = ""
    rgProcess.stderr.on("data", (data) => { errorOutput += data.toString() })
    rl.on("close", () => {
      if (errorOutput) reject(new Error(`ripgrep process error: ${errorOutput}`))
      else resolve(output)
    })
    rgProcess.on("error", (error) => { reject(new Error(`ripgrep process error: ${error.message}`)) })
  })
}

export async function regexSearchFiles(
  cwd: string,
  directoryPath: string,
  regex: string,
  filePattern?: string,
): Promise<string> {
  const rgPath = await getBinPath(cwd)
  if (!rgPath) throw new Error("Could not find ripgrep binary")

  const args = ["--json", "-e", regex, "--glob", filePattern || "*", "--context", "1", directoryPath]

  let output: string
  try {
    output = await execRipgrep(rgPath, args)
  } catch {
    return "No results found"
  }

  const results: SearchResult[] = []
  let currentResult: Partial<SearchResult> | null = null

  output.split("\n").forEach((line) => {
    if (line) {
      try {
        const parsed = JSON.parse(line)
        if (parsed.type === "match") {
          if (currentResult) results.push(currentResult as SearchResult)
          currentResult = {
            filePath: parsed.data.path.text,
            line: parsed.data.line_number,
            column: parsed.data.submatches[0].start,
            match: parsed.data.lines.text,
            beforeContext: [],
            afterContext: [],
          }
        } else if (parsed.type === "context" && currentResult) {
          if (parsed.data.line_number < currentResult.line!)
            currentResult.beforeContext!.push(parsed.data.lines.text)
          else
            currentResult.afterContext!.push(parsed.data.lines.text)
        }
      } catch (error) {
        console.error("Error parsing ripgrep output:", error)
      }
    }
  })

  if (currentResult) results.push(currentResult as SearchResult)
  return formatResults(results, cwd)
}

const MAX_RIPGREP_MB = 0.25
const MAX_BYTE_SIZE = MAX_RIPGREP_MB * 1024 * 1024

function formatResults(results: SearchResult[], cwd: string): string {
  const groupedResults: { [key: string]: SearchResult[] } = {}
  let output = ""

  if (results.length >= MAX_RESULTS)
    output += `Showing first ${MAX_RESULTS} of ${MAX_RESULTS}+ results. Use a more specific search if necessary.\n\n`
  else
    output += `Found ${results.length === 1 ? "1 result" : `${results.length.toLocaleString()} results`}.\n\n`

  results.slice(0, MAX_RESULTS).forEach((result) => {
    const relativeFilePath = path.relative(cwd, result.filePath)
    if (!groupedResults[relativeFilePath]) groupedResults[relativeFilePath] = []
    groupedResults[relativeFilePath].push(result)
  })

  let byteSize = Buffer.byteLength(output, "utf8")
  let wasLimitReached = false

  for (const [filePath, fileResults] of Object.entries(groupedResults)) {
    const filePathString = `${filePath.replace(/\\/g, "/")}\n│----\n`
    const filePathBytes = Buffer.byteLength(filePathString, "utf8")
    if (byteSize + filePathBytes >= MAX_BYTE_SIZE) { wasLimitReached = true; break }

    output += filePathString
    byteSize += filePathBytes

    for (let resultIndex = 0; resultIndex < fileResults.length; resultIndex++) {
      const result = fileResults[resultIndex]
      const allLines = [...result.beforeContext, result.match, ...result.afterContext]
      let resultBytes = 0
      const resultLines: string[] = []

      for (const line of allLines) {
        const lineString = `│${line?.trimEnd() ?? ""}\n`
        const lineBytes = Buffer.byteLength(lineString, "utf8")
        if (byteSize + resultBytes + lineBytes >= MAX_BYTE_SIZE) { wasLimitReached = true; break }
        resultLines.push(lineString)
        resultBytes += lineBytes
      }

      if (wasLimitReached) break
      resultLines.forEach((line) => { output += line })
      byteSize += resultBytes

      if (resultIndex < fileResults.length - 1) {
        const sep = "│----\n"
        const sepBytes = Buffer.byteLength(sep, "utf8")
        if (byteSize + sepBytes >= MAX_BYTE_SIZE) { wasLimitReached = true; break }
        output += sep
        byteSize += sepBytes
      }
      if (byteSize >= MAX_BYTE_SIZE) { wasLimitReached = true; break }
    }

    if (wasLimitReached) break

    const closing = "│----\n\n"
    const closingBytes = Buffer.byteLength(closing, "utf8")
    if (byteSize + closingBytes >= MAX_BYTE_SIZE) { wasLimitReached = true; break }
    output += closing
    byteSize += closingBytes
  }

  if (wasLimitReached) {
    const msg = `\n[Results truncated due to exceeding the ${MAX_RIPGREP_MB}MB size limit. Please use a more specific search pattern.]`
    if (byteSize + Buffer.byteLength(msg, "utf8") < MAX_BYTE_SIZE) output += msg
  }

  return output.trim()
}