import z from 'zod'; import BaseLLM from '../../base/llm'; import { GenerateObjectInput, GenerateOptions, GenerateTextInput, GenerateTextOutput, StreamTextOutput, } from '../../types'; import { Ollama } from 'ollama'; import { parse } from 'partial-json'; type OllamaConfig = { baseURL: string; model: string; options?: GenerateOptions; }; const reasoningModels = [ 'gpt-oss', 'deepseek-r1', 'qwen3', 'deepseek-v3.1', 'magistral', ]; class OllamaLLM extends BaseLLM { ollamaClient: Ollama; constructor(protected config: OllamaConfig) { super(config); this.ollamaClient = new Ollama({ host: this.config.baseURL || 'http://localhost:11434', }); } async generateText(input: GenerateTextInput): Promise { const res = await this.ollamaClient.chat({ model: this.config.model, messages: input.messages, options: { top_p: input.options?.topP ?? this.config.options?.topP, temperature: input.options?.temperature ?? this.config.options?.temperature ?? 0.7, num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens, num_ctx: 32000, frequency_penalty: input.options?.frequencyPenalty ?? this.config.options?.frequencyPenalty, presence_penalty: input.options?.presencePenalty ?? this.config.options?.presencePenalty, stop: input.options?.stopSequences ?? this.config.options?.stopSequences, }, }); return { content: res.message.content, additionalInfo: { reasoning: res.message.thinking, }, }; } async *streamText( input: GenerateTextInput, ): AsyncGenerator { const stream = await this.ollamaClient.chat({ model: this.config.model, messages: input.messages, stream: true, options: { top_p: input.options?.topP ?? this.config.options?.topP, temperature: input.options?.temperature ?? this.config.options?.temperature ?? 0.7, num_ctx: 32000, num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens, frequency_penalty: input.options?.frequencyPenalty ?? this.config.options?.frequencyPenalty, presence_penalty: input.options?.presencePenalty ?? this.config.options?.presencePenalty, stop: input.options?.stopSequences ?? this.config.options?.stopSequences, }, }); for await (const chunk of stream) { yield { contentChunk: chunk.message.content, done: chunk.done, additionalInfo: { reasoning: chunk.message.thinking, }, }; } } async generateObject(input: GenerateObjectInput): Promise { const response = await this.ollamaClient.chat({ model: this.config.model, messages: input.messages, format: z.toJSONSchema(input.schema), ...(reasoningModels.find((m) => this.config.model.includes(m)) ? { think: false } : {}), options: { top_p: input.options?.topP ?? this.config.options?.topP, temperature: input.options?.temperature ?? this.config.options?.temperature ?? 0.7, num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens, frequency_penalty: input.options?.frequencyPenalty ?? this.config.options?.frequencyPenalty, presence_penalty: input.options?.presencePenalty ?? this.config.options?.presencePenalty, stop: input.options?.stopSequences ?? this.config.options?.stopSequences, }, }); try { return input.schema.parse(JSON.parse(response.message.content)) as T; } catch (err) { throw new Error(`Error parsing response from Ollama: ${err}`); } } async *streamObject(input: GenerateObjectInput): AsyncGenerator { let recievedObj: string = ''; const stream = await this.ollamaClient.chat({ model: this.config.model, messages: input.messages, format: z.toJSONSchema(input.schema), stream: true, ...(reasoningModels.find((m) => this.config.model.includes(m)) ? { think: false } : {}), options: { top_p: input.options?.topP ?? this.config.options?.topP, temperature: input.options?.temperature ?? this.config.options?.temperature ?? 0.7, num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens, frequency_penalty: input.options?.frequencyPenalty ?? this.config.options?.frequencyPenalty, presence_penalty: input.options?.presencePenalty ?? this.config.options?.presencePenalty, stop: input.options?.stopSequences ?? this.config.options?.stopSequences, }, }); for await (const chunk of stream) { recievedObj += chunk.message.content; try { yield parse(recievedObj) as T; } catch (err) { console.log('Error parsing partial object from Ollama:', err); yield {} as T; } } } } export default OllamaLLM;