dist / promptPreprocessor.js

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.preprocess = preprocess;
const config_1 = require("./config");
const reranker_1 = require("./services/rag/reranker");
async function preprocess(ctl, userMessage) {
    const userPrompt = userMessage.getText();
    const history = await ctl.pullHistory();
    // Include current message in history logic
    const files = [...history.getAllFiles(ctl.client), ...userMessage.getFiles(ctl.client)].filter(f => f.type !== "image");
    if (files.length === 0) {
        return userMessage;
    }
    const pluginConfig = ctl.getPluginConfig(config_1.configSchematics);
    const retrievalLimit = pluginConfig.get("retrievalLimit");
    const rerankerEnabled = pluginConfig.get("rerankerEnabled");
    const status = ctl.createStatus({
        status: "loading",
        text: "OpenBook: Analyzing documents..."
    });
    try {
        // 1. Embedding / Retrieval (First Pass)
        const embeddingModel = await ctl.client.embedding.model("nomic-ai/nomic-embed-text-v1.5-GGUF", {
            signal: ctl.abortSignal
        });
        // Fetch 5x candidates if reranking, else just the limit
        const fetchLimit = rerankerEnabled ? retrievalLimit * 5 : retrievalLimit;
        status.setState({ status: "loading", text: `Retrieving top ${fetchLimit} candidates...` });
        const retrievalResult = await ctl.client.files.retrieve(userPrompt, files, {
            embeddingModel,
            limit: fetchLimit,
            signal: ctl.abortSignal
        });
        let entries = retrievalResult.entries;
        // 2. Reranking (Second Pass)
        if (rerankerEnabled && entries.length > 0) {
            status.setState({ status: "loading", text: `Reranking ${entries.length} segments...` });
            try {
                const documents = entries.map(e => e.content);
                const scores = await (0, reranker_1.rerank)(userPrompt, documents);
                // Sort entries based on new scores
                const rerankedEntries = scores.map(s => {
                    const entry = entries[s.index];
                    return { ...entry, score: s.score };
                }); // sorted desc by rerank
                entries = rerankedEntries.slice(0, retrievalLimit);
            }
            catch (e) {
                ctl.debug(`Reranking failed: ${e}. Using raw vector results.`);
                entries = entries.slice(0, retrievalLimit);
            }
        }
        // 3. Injection
        if (entries.length > 0) {
            status.setState({ status: "done", text: `Found ${entries.length} relevant excerpts.` });
            let contextBlock = "### Retrieved Context (OpenBook)\n\n";
            entries.forEach((entry, idx) => {
                contextBlock += `**Source ${idx + 1}** (Score: ${entry.score.toFixed(3)}):\n> ${entry.content}\n\n`;
            });
            // Inject into the user message for now (simplest approach)
            // "The user has provided files. Here is relevant context:\n..."
            // We append it to the end or prepend. Prepending is usually better for attention.
            const newText = `${contextBlock}\n\n### User Query\n${userPrompt}`;
            userMessage.replaceText(newText);
            // Add citations visually if SDK allows (it does via ctl.addCitations)
            // We map our re-ordered entries back to the shape expected if needed, 
            // or just rely on the text injection. 
            // ctl.addCitations({ entries }); // Uncomment if you want the UI citation bubble
        }
        else {
            status.setState({ status: "done", text: "No relevant content found in documents." });
        }
    }
    catch (e) {
        status.setState({ status: "error", text: `Error: ${e.message}` });
    }
    return userMessage;
}