dist / generator.js

import { LMStudioClient } from "@lmstudio/sdk";
import { configSchematics, globalConfigSchematics } from "./config.js";
/**
 * Generator function that connects to a remote LM Studio server
 * and streams generation to the local LM Studio instance.
 */
export async function generator(ctl, history) {
    // Get config values using .get() method
    const config = ctl.getPluginConfig(configSchematics);
    const globalConfig = ctl.getGlobalPluginConfig(globalConfigSchematics);
    // Extract config
    const baseUrl = globalConfig.get("baseUrl") || "ws://127.0.0.1:1234";
    const model = config.get("model");
    const temperature = config.get("temperature");
    const topP = config.get("topP");
    const topK = config.get("topK");
    const maxTokens = config.get("maxTokens");
    const contextOverflowPolicy = config.get("contextOverflowPolicy");
    const gpuOffloadRatio = globalConfig.get("gpuOffloadRatio");
    const clientIdentifier = globalConfig.get("clientIdentifier") || undefined;
    const clientPasskey = globalConfig.get("clientPasskey") || undefined;
    // Connect to remote LM Studio
    const client = new LMStudioClient({
        baseUrl,
        clientIdentifier,
        clientPasskey,
    });
    try {
        // Load the model if needed
        const loadedModels = await client.llm.listLoaded();
        const isLoaded = loadedModels.some((m) => m.identifier === model);
        if (!isLoaded) {
            // Load the model with GPU config
            await client.llm.load(model, {
                config: {
                    gpu: {
                        ratio: gpuOffloadRatio,
                    },
                },
            });
        }
        // Get model handle
        const llm = await client.llm.model(model);
        // Build generation options
        const opts = {};
        if (temperature !== undefined && temperature > 0) {
            opts.temperature = temperature;
        }
        if (topP !== undefined && topP > 0 && topP < 1) {
            opts.topPSamplingConfig = { topP };
        }
        if (topK !== undefined && topK > 0) {
            opts.topKSamplingConfig = { topK };
        }
        if (maxTokens !== undefined && maxTokens > 0) {
            opts.maxTokens = maxTokens;
        }
        if (contextOverflowPolicy) {
            opts.contextOverflowPolicy = contextOverflowPolicy;
        }
        // Get tool definitions and pass them to the remote model
        const tools = ctl.getToolDefinitions();
        // Stream generation from remote model
        const prediction = llm.respond(history, {
            ...opts,
            tools: tools.length > 0 ? tools : undefined,
        });
        // Forward streamed fragments to local LM Studio
        for await (const fragment of prediction) {
            ctl.fragmentGenerated(fragment.content);
        }
    }
    catch (err) {
        // Re-throw for LM Studio to handle
        throw err;
    }
}