src / generator.ts

import { GeneratorController, Chat, LMStudioClient } from "@lmstudio/sdk";
import { configSchematics, globalConfigSchematics } from "./config.js";

/**
 * Generator function that connects to a remote LM Studio server
 * and streams generation to the local LM Studio instance.
 */
export async function generator(
  ctl: GeneratorController,
  history: Chat
): Promise<void> {
  // Get config values using .get() method
  const config = ctl.getPluginConfig(configSchematics);
  const globalConfig = ctl.getGlobalPluginConfig(globalConfigSchematics);

  // Extract config
  const baseUrl = globalConfig.get("baseUrl") || "ws://127.0.0.1:1234";
  const model = config.get("model");
  const temperature = config.get("temperature");
  const topP = config.get("topP");
  const topK = config.get("topK");
  const maxTokens = config.get("maxTokens");
  const contextOverflowPolicy = config.get("contextOverflowPolicy") as
    | "stopAtLimit"
    | "truncateMiddle"
    | "rollingWindow";
  const gpuOffloadRatio = globalConfig.get("gpuOffloadRatio");
  const clientIdentifier = globalConfig.get("clientIdentifier") || undefined;
  const clientPasskey = globalConfig.get("clientPasskey") || undefined;

  // Connect to remote LM Studio
  const client = new LMStudioClient({
    baseUrl,
    clientIdentifier,
    clientPasskey,
  });

  try {
    // Load the model if needed
    const loadedModels = await client.llm.listLoaded();
    const isLoaded = loadedModels.some((m) => m.identifier === model);

    if (!isLoaded) {
      // Load the model with GPU config
      await client.llm.load(model, {
        config: {
          gpu: {
            ratio: gpuOffloadRatio,
          },
        },
      });
    }

    // Get model handle
    const llm = await client.llm.model(model);

    // Build generation options
    const opts: any = {};
    if (temperature !== undefined && temperature > 0) {
      opts.temperature = temperature;
    }
    if (topP !== undefined && topP > 0 && topP < 1) {
      opts.topPSamplingConfig = { topP };
    }
    if (topK !== undefined && topK > 0) {
      opts.topKSamplingConfig = { topK };
    }
    if (maxTokens !== undefined && maxTokens > 0) {
      opts.maxTokens = maxTokens;
    }
    if (contextOverflowPolicy) {
      opts.contextOverflowPolicy = contextOverflowPolicy;
    }

    // Get tool definitions and pass them to the remote model
    const tools = ctl.getToolDefinitions();

    // Stream generation from remote model
    const prediction = llm.respond(history, {
      ...opts,
      tools: tools.length > 0 ? tools : undefined,
    });

    // Forward streamed fragments to local LM Studio
    for await (const fragment of prediction) {
      ctl.fragmentGenerated(fragment.content);
    }
  } catch (err) {
    // Re-throw for LM Studio to handle
    throw err;
  }
}