local-openbook

Public
src / promptPreprocessor.ts
import {
  text,
  type ChatMessage,
  type FileHandle,
  type PromptPreprocessorController,
} from "@lmstudio/sdk";
import { configSchematics } from "./config";
import { rerank } from "./services/rag/reranker";

export async function preprocess(ctl: PromptPreprocessorController, userMessage: ChatMessage) {
  const userPrompt = userMessage.getText();
  const history = await ctl.pullHistory();
  // Include current message in history logic
  const files = [...history.getAllFiles(ctl.client), ...userMessage.getFiles(ctl.client)].filter(f => f.type !== "image");
  
  if (files.length === 0) {
      return userMessage;
  }

  const pluginConfig = ctl.getPluginConfig(configSchematics);
  const retrievalLimit = pluginConfig.get("retrievalLimit");
  const rerankerEnabled = pluginConfig.get("rerankerEnabled");

  const status = ctl.createStatus({
      status: "loading",
      text: "OpenBook: Analyzing documents..."
  });

  try {
      // 1. Embedding / Retrieval (First Pass)
      const embeddingModel = await ctl.client.embedding.model("nomic-ai/nomic-embed-text-v1.5-GGUF", {
          signal: ctl.abortSignal
      });

      // Fetch 5x candidates if reranking, else just the limit
      const fetchLimit = rerankerEnabled ? retrievalLimit * 5 : retrievalLimit;
      
      status.setState({ status: "loading", text: `Retrieving top ${fetchLimit} candidates...` });

      const retrievalResult = await ctl.client.files.retrieve(userPrompt, files, {
          embeddingModel,
          limit: fetchLimit,
          signal: ctl.abortSignal
      });

      let entries = retrievalResult.entries;

      // 2. Reranking (Second Pass)
      if (rerankerEnabled && entries.length > 0) {
          status.setState({ status: "loading", text: `Reranking ${entries.length} segments...` });
          try {
              const documents = entries.map(e => e.content);
              const scores = await rerank(userPrompt, documents);
              
              // Sort entries based on new scores
              const rerankedEntries = scores.map(s => {
                  const entry = entries[s.index];
                  return { ...entry, score: s.score };
              }); // sorted desc by rerank

              entries = rerankedEntries.slice(0, retrievalLimit);
          } catch (e) {
              ctl.debug(`Reranking failed: ${e}. Using raw vector results.`);
              entries = entries.slice(0, retrievalLimit);
          }
      }

      // 3. Injection
      if (entries.length > 0) {
          status.setState({ status: "done", text: `Found ${entries.length} relevant excerpts.` });
          
          let contextBlock = "### Retrieved Context (OpenBook)\n\n";
          entries.forEach((entry, idx) => {
              contextBlock += `**Source ${idx + 1}** (Score: ${entry.score.toFixed(3)}):\n> ${entry.content}\n\n`;
          });

          // Inject into the user message for now (simplest approach)
          // "The user has provided files. Here is relevant context:\n..."
          // We append it to the end or prepend. Prepending is usually better for attention.
          const newText = `${contextBlock}\n\n### User Query\n${userPrompt}`;
          userMessage.replaceText(newText);
          
          // Add citations visually if SDK allows (it does via ctl.addCitations)
          // We map our re-ordered entries back to the shape expected if needed, 
          // or just rely on the text injection. 
          // ctl.addCitations({ entries }); // Uncomment if you want the UI citation bubble
      } else {
          status.setState({ status: "done", text: "No relevant content found in documents." });
      }

  } catch (e: any) {
      status.setState({ status: "error", text: `Error: ${e.message}` });
  }

  return userMessage;
}