mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-12-03 02:08:17 +00:00
174 lines
5.1 KiB
TypeScript
174 lines
5.1 KiB
TypeScript
import z from 'zod';
|
|
import BaseLLM from '../../base/llm';
|
|
import {
|
|
GenerateObjectInput,
|
|
GenerateOptions,
|
|
GenerateTextInput,
|
|
GenerateTextOutput,
|
|
StreamTextOutput,
|
|
} from '../../types';
|
|
import { Ollama } from 'ollama';
|
|
import { parse } from 'partial-json';
|
|
|
|
type OllamaConfig = {
|
|
baseURL: string;
|
|
model: string;
|
|
options?: GenerateOptions;
|
|
};
|
|
|
|
const reasoningModels = [
|
|
'gpt-oss',
|
|
'deepseek-r1',
|
|
'qwen3',
|
|
'deepseek-v3.1',
|
|
'magistral',
|
|
];
|
|
|
|
class OllamaLLM extends BaseLLM<OllamaConfig> {
|
|
ollamaClient: Ollama;
|
|
|
|
constructor(protected config: OllamaConfig) {
|
|
super(config);
|
|
|
|
this.ollamaClient = new Ollama({
|
|
host: this.config.baseURL || 'http://localhost:11434',
|
|
});
|
|
}
|
|
|
|
async generateText(input: GenerateTextInput): Promise<GenerateTextOutput> {
|
|
const res = await this.ollamaClient.chat({
|
|
model: this.config.model,
|
|
messages: input.messages,
|
|
options: {
|
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
|
temperature:
|
|
input.options?.temperature ?? this.config.options?.temperature ?? 0.7,
|
|
num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens,
|
|
num_ctx: 32000,
|
|
frequency_penalty:
|
|
input.options?.frequencyPenalty ??
|
|
this.config.options?.frequencyPenalty,
|
|
presence_penalty:
|
|
input.options?.presencePenalty ??
|
|
this.config.options?.presencePenalty,
|
|
stop:
|
|
input.options?.stopSequences ?? this.config.options?.stopSequences,
|
|
},
|
|
});
|
|
|
|
return {
|
|
content: res.message.content,
|
|
additionalInfo: {
|
|
reasoning: res.message.thinking,
|
|
},
|
|
};
|
|
}
|
|
|
|
async *streamText(
|
|
input: GenerateTextInput,
|
|
): AsyncGenerator<StreamTextOutput> {
|
|
const stream = await this.ollamaClient.chat({
|
|
model: this.config.model,
|
|
messages: input.messages,
|
|
stream: true,
|
|
options: {
|
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
|
temperature:
|
|
input.options?.temperature ?? this.config.options?.temperature ?? 0.7,
|
|
num_ctx: 32000,
|
|
num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens,
|
|
frequency_penalty:
|
|
input.options?.frequencyPenalty ??
|
|
this.config.options?.frequencyPenalty,
|
|
presence_penalty:
|
|
input.options?.presencePenalty ??
|
|
this.config.options?.presencePenalty,
|
|
stop:
|
|
input.options?.stopSequences ?? this.config.options?.stopSequences,
|
|
},
|
|
});
|
|
|
|
for await (const chunk of stream) {
|
|
yield {
|
|
contentChunk: chunk.message.content,
|
|
done: chunk.done,
|
|
additionalInfo: {
|
|
reasoning: chunk.message.thinking,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
async generateObject<T>(input: GenerateObjectInput): Promise<T> {
|
|
const response = await this.ollamaClient.chat({
|
|
model: this.config.model,
|
|
messages: input.messages,
|
|
format: z.toJSONSchema(input.schema),
|
|
...(reasoningModels.find((m) => this.config.model.includes(m))
|
|
? { think: false }
|
|
: {}),
|
|
options: {
|
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
|
temperature:
|
|
input.options?.temperature ?? this.config.options?.temperature ?? 0.7,
|
|
num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens,
|
|
frequency_penalty:
|
|
input.options?.frequencyPenalty ??
|
|
this.config.options?.frequencyPenalty,
|
|
presence_penalty:
|
|
input.options?.presencePenalty ??
|
|
this.config.options?.presencePenalty,
|
|
stop:
|
|
input.options?.stopSequences ?? this.config.options?.stopSequences,
|
|
},
|
|
});
|
|
|
|
try {
|
|
return input.schema.parse(JSON.parse(response.message.content)) as T;
|
|
} catch (err) {
|
|
throw new Error(`Error parsing response from Ollama: ${err}`);
|
|
}
|
|
}
|
|
|
|
async *streamObject<T>(input: GenerateObjectInput): AsyncGenerator<T> {
|
|
let recievedObj: string = '';
|
|
|
|
const stream = await this.ollamaClient.chat({
|
|
model: this.config.model,
|
|
messages: input.messages,
|
|
format: z.toJSONSchema(input.schema),
|
|
stream: true,
|
|
...(reasoningModels.find((m) => this.config.model.includes(m))
|
|
? { think: false }
|
|
: {}),
|
|
options: {
|
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
|
temperature:
|
|
input.options?.temperature ?? this.config.options?.temperature ?? 0.7,
|
|
num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens,
|
|
frequency_penalty:
|
|
input.options?.frequencyPenalty ??
|
|
this.config.options?.frequencyPenalty,
|
|
presence_penalty:
|
|
input.options?.presencePenalty ??
|
|
this.config.options?.presencePenalty,
|
|
stop:
|
|
input.options?.stopSequences ?? this.config.options?.stopSequences,
|
|
},
|
|
});
|
|
|
|
for await (const chunk of stream) {
|
|
recievedObj += chunk.message.content;
|
|
|
|
try {
|
|
yield parse(recievedObj) as T;
|
|
} catch (err) {
|
|
console.log('Error parsing partial object from Ollama:', err);
|
|
yield {} as T;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
export default OllamaLLM;
|