mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-12-03 10:18:15 +00:00
refac(llm): LLM option handling for per request overrides
This commit is contained in:
@@ -8,7 +8,6 @@ import {
|
|||||||
|
|
||||||
abstract class BaseLLM<CONFIG> {
|
abstract class BaseLLM<CONFIG> {
|
||||||
constructor(protected config: CONFIG) {}
|
constructor(protected config: CONFIG) {}
|
||||||
abstract withOptions(options: GenerateOptions): this;
|
|
||||||
abstract generateText(input: GenerateTextInput): Promise<GenerateTextOutput>;
|
abstract generateText(input: GenerateTextInput): Promise<GenerateTextOutput>;
|
||||||
abstract streamText(
|
abstract streamText(
|
||||||
input: GenerateTextInput,
|
input: GenerateTextInput,
|
||||||
|
|||||||
@@ -35,28 +35,24 @@ class OllamaLLM extends BaseLLM<OllamaConfig> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
withOptions(options: GenerateOptions) {
|
|
||||||
this.config.options = {
|
|
||||||
...this.config.options,
|
|
||||||
...options,
|
|
||||||
};
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
async generateText(input: GenerateTextInput): Promise<GenerateTextOutput> {
|
async generateText(input: GenerateTextInput): Promise<GenerateTextOutput> {
|
||||||
this.withOptions(input.options || {});
|
|
||||||
|
|
||||||
const res = await this.ollamaClient.chat({
|
const res = await this.ollamaClient.chat({
|
||||||
model: this.config.model,
|
model: this.config.model,
|
||||||
messages: input.messages,
|
messages: input.messages,
|
||||||
options: {
|
options: {
|
||||||
top_p: this.config.options?.topP,
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
||||||
temperature: this.config.options?.temperature,
|
temperature:
|
||||||
num_predict: this.config.options?.maxTokens,
|
input.options?.temperature ?? this.config.options?.temperature ?? 0.7,
|
||||||
|
num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens,
|
||||||
num_ctx: 32000,
|
num_ctx: 32000,
|
||||||
frequency_penalty: this.config.options?.frequencyPenalty,
|
frequency_penalty:
|
||||||
presence_penalty: this.config.options?.presencePenalty,
|
input.options?.frequencyPenalty ??
|
||||||
stop: this.config.options?.stopSequences,
|
this.config.options?.frequencyPenalty,
|
||||||
|
presence_penalty:
|
||||||
|
input.options?.presencePenalty ??
|
||||||
|
this.config.options?.presencePenalty,
|
||||||
|
stop:
|
||||||
|
input.options?.stopSequences ?? this.config.options?.stopSequences,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -71,20 +67,24 @@ class OllamaLLM extends BaseLLM<OllamaConfig> {
|
|||||||
async *streamText(
|
async *streamText(
|
||||||
input: GenerateTextInput,
|
input: GenerateTextInput,
|
||||||
): AsyncGenerator<StreamTextOutput> {
|
): AsyncGenerator<StreamTextOutput> {
|
||||||
this.withOptions(input.options || {});
|
|
||||||
|
|
||||||
const stream = await this.ollamaClient.chat({
|
const stream = await this.ollamaClient.chat({
|
||||||
model: this.config.model,
|
model: this.config.model,
|
||||||
messages: input.messages,
|
messages: input.messages,
|
||||||
stream: true,
|
stream: true,
|
||||||
options: {
|
options: {
|
||||||
top_p: this.config.options?.topP,
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
||||||
temperature: this.config.options?.temperature,
|
temperature:
|
||||||
|
input.options?.temperature ?? this.config.options?.temperature ?? 0.7,
|
||||||
num_ctx: 32000,
|
num_ctx: 32000,
|
||||||
num_predict: this.config.options?.maxTokens,
|
num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens,
|
||||||
frequency_penalty: this.config.options?.frequencyPenalty,
|
frequency_penalty:
|
||||||
presence_penalty: this.config.options?.presencePenalty,
|
input.options?.frequencyPenalty ??
|
||||||
stop: this.config.options?.stopSequences,
|
this.config.options?.frequencyPenalty,
|
||||||
|
presence_penalty:
|
||||||
|
input.options?.presencePenalty ??
|
||||||
|
this.config.options?.presencePenalty,
|
||||||
|
stop:
|
||||||
|
input.options?.stopSequences ?? this.config.options?.stopSequences,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -100,8 +100,6 @@ class OllamaLLM extends BaseLLM<OllamaConfig> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async generateObject<T>(input: GenerateObjectInput): Promise<T> {
|
async generateObject<T>(input: GenerateObjectInput): Promise<T> {
|
||||||
this.withOptions(input.options || {});
|
|
||||||
|
|
||||||
const response = await this.ollamaClient.chat({
|
const response = await this.ollamaClient.chat({
|
||||||
model: this.config.model,
|
model: this.config.model,
|
||||||
messages: input.messages,
|
messages: input.messages,
|
||||||
@@ -110,12 +108,18 @@ class OllamaLLM extends BaseLLM<OllamaConfig> {
|
|||||||
? { think: false }
|
? { think: false }
|
||||||
: {}),
|
: {}),
|
||||||
options: {
|
options: {
|
||||||
top_p: this.config.options?.topP,
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
||||||
temperature: 0.7,
|
temperature:
|
||||||
num_predict: this.config.options?.maxTokens,
|
input.options?.temperature ?? this.config.options?.temperature ?? 0.7,
|
||||||
frequency_penalty: this.config.options?.frequencyPenalty,
|
num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens,
|
||||||
presence_penalty: this.config.options?.presencePenalty,
|
frequency_penalty:
|
||||||
stop: this.config.options?.stopSequences,
|
input.options?.frequencyPenalty ??
|
||||||
|
this.config.options?.frequencyPenalty,
|
||||||
|
presence_penalty:
|
||||||
|
input.options?.presencePenalty ??
|
||||||
|
this.config.options?.presencePenalty,
|
||||||
|
stop:
|
||||||
|
input.options?.stopSequences ?? this.config.options?.stopSequences,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -129,8 +133,6 @@ class OllamaLLM extends BaseLLM<OllamaConfig> {
|
|||||||
async *streamObject<T>(input: GenerateObjectInput): AsyncGenerator<T> {
|
async *streamObject<T>(input: GenerateObjectInput): AsyncGenerator<T> {
|
||||||
let recievedObj: string = '';
|
let recievedObj: string = '';
|
||||||
|
|
||||||
this.withOptions(input.options || {});
|
|
||||||
|
|
||||||
const stream = await this.ollamaClient.chat({
|
const stream = await this.ollamaClient.chat({
|
||||||
model: this.config.model,
|
model: this.config.model,
|
||||||
messages: input.messages,
|
messages: input.messages,
|
||||||
@@ -140,12 +142,18 @@ class OllamaLLM extends BaseLLM<OllamaConfig> {
|
|||||||
? { think: false }
|
? { think: false }
|
||||||
: {}),
|
: {}),
|
||||||
options: {
|
options: {
|
||||||
top_p: this.config.options?.topP,
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
||||||
temperature: 0.7,
|
temperature:
|
||||||
num_predict: this.config.options?.maxTokens,
|
input.options?.temperature ?? this.config.options?.temperature ?? 0.7,
|
||||||
frequency_penalty: this.config.options?.frequencyPenalty,
|
num_predict: input.options?.maxTokens ?? this.config.options?.maxTokens,
|
||||||
presence_penalty: this.config.options?.presencePenalty,
|
frequency_penalty:
|
||||||
stop: this.config.options?.stopSequences,
|
input.options?.frequencyPenalty ??
|
||||||
|
this.config.options?.frequencyPenalty,
|
||||||
|
presence_penalty:
|
||||||
|
input.options?.presencePenalty ??
|
||||||
|
this.config.options?.presencePenalty,
|
||||||
|
stop:
|
||||||
|
input.options?.stopSequences ?? this.config.options?.stopSequences,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -29,27 +29,21 @@ class OpenAILLM extends BaseLLM<OpenAIConfig> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
withOptions(options: GenerateOptions) {
|
|
||||||
this.config.options = {
|
|
||||||
...this.config.options,
|
|
||||||
...options,
|
|
||||||
};
|
|
||||||
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
async generateText(input: GenerateTextInput): Promise<GenerateTextOutput> {
|
async generateText(input: GenerateTextInput): Promise<GenerateTextOutput> {
|
||||||
this.withOptions(input.options || {});
|
|
||||||
|
|
||||||
const response = await this.openAIClient.chat.completions.create({
|
const response = await this.openAIClient.chat.completions.create({
|
||||||
model: this.config.model,
|
model: this.config.model,
|
||||||
messages: input.messages,
|
messages: input.messages,
|
||||||
temperature: this.config.options?.temperature || 1.0,
|
temperature:
|
||||||
top_p: this.config.options?.topP,
|
input.options?.temperature ?? this.config.options?.temperature ?? 1.0,
|
||||||
max_completion_tokens: this.config.options?.maxTokens,
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
||||||
stop: this.config.options?.stopSequences,
|
max_completion_tokens:
|
||||||
frequency_penalty: this.config.options?.frequencyPenalty,
|
input.options?.maxTokens ?? this.config.options?.maxTokens,
|
||||||
presence_penalty: this.config.options?.presencePenalty,
|
stop: input.options?.stopSequences ?? this.config.options?.stopSequences,
|
||||||
|
frequency_penalty:
|
||||||
|
input.options?.frequencyPenalty ??
|
||||||
|
this.config.options?.frequencyPenalty,
|
||||||
|
presence_penalty:
|
||||||
|
input.options?.presencePenalty ?? this.config.options?.presencePenalty,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (response.choices && response.choices.length > 0) {
|
if (response.choices && response.choices.length > 0) {
|
||||||
@@ -67,17 +61,20 @@ class OpenAILLM extends BaseLLM<OpenAIConfig> {
|
|||||||
async *streamText(
|
async *streamText(
|
||||||
input: GenerateTextInput,
|
input: GenerateTextInput,
|
||||||
): AsyncGenerator<StreamTextOutput> {
|
): AsyncGenerator<StreamTextOutput> {
|
||||||
this.withOptions(input.options || {});
|
|
||||||
|
|
||||||
const stream = await this.openAIClient.chat.completions.create({
|
const stream = await this.openAIClient.chat.completions.create({
|
||||||
model: this.config.model,
|
model: this.config.model,
|
||||||
messages: input.messages,
|
messages: input.messages,
|
||||||
temperature: this.config.options?.temperature || 1.0,
|
temperature:
|
||||||
top_p: this.config.options?.topP,
|
input.options?.temperature ?? this.config.options?.temperature ?? 1.0,
|
||||||
max_completion_tokens: this.config.options?.maxTokens,
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
||||||
stop: this.config.options?.stopSequences,
|
max_completion_tokens:
|
||||||
frequency_penalty: this.config.options?.frequencyPenalty,
|
input.options?.maxTokens ?? this.config.options?.maxTokens,
|
||||||
presence_penalty: this.config.options?.presencePenalty,
|
stop: input.options?.stopSequences ?? this.config.options?.stopSequences,
|
||||||
|
frequency_penalty:
|
||||||
|
input.options?.frequencyPenalty ??
|
||||||
|
this.config.options?.frequencyPenalty,
|
||||||
|
presence_penalty:
|
||||||
|
input.options?.presencePenalty ?? this.config.options?.presencePenalty,
|
||||||
stream: true,
|
stream: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -95,17 +92,20 @@ class OpenAILLM extends BaseLLM<OpenAIConfig> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async generateObject<T>(input: GenerateObjectInput): Promise<T> {
|
async generateObject<T>(input: GenerateObjectInput): Promise<T> {
|
||||||
this.withOptions(input.options || {});
|
|
||||||
|
|
||||||
const response = await this.openAIClient.chat.completions.parse({
|
const response = await this.openAIClient.chat.completions.parse({
|
||||||
messages: input.messages,
|
messages: input.messages,
|
||||||
model: this.config.model,
|
model: this.config.model,
|
||||||
temperature: this.config.options?.temperature || 1.0,
|
temperature:
|
||||||
top_p: this.config.options?.topP,
|
input.options?.temperature ?? this.config.options?.temperature ?? 1.0,
|
||||||
max_completion_tokens: this.config.options?.maxTokens,
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
||||||
stop: this.config.options?.stopSequences,
|
max_completion_tokens:
|
||||||
frequency_penalty: this.config.options?.frequencyPenalty,
|
input.options?.maxTokens ?? this.config.options?.maxTokens,
|
||||||
presence_penalty: this.config.options?.presencePenalty,
|
stop: input.options?.stopSequences ?? this.config.options?.stopSequences,
|
||||||
|
frequency_penalty:
|
||||||
|
input.options?.frequencyPenalty ??
|
||||||
|
this.config.options?.frequencyPenalty,
|
||||||
|
presence_penalty:
|
||||||
|
input.options?.presencePenalty ?? this.config.options?.presencePenalty,
|
||||||
response_format: zodResponseFormat(input.schema, 'object'),
|
response_format: zodResponseFormat(input.schema, 'object'),
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -123,17 +123,20 @@ class OpenAILLM extends BaseLLM<OpenAIConfig> {
|
|||||||
async *streamObject<T>(input: GenerateObjectInput): AsyncGenerator<T> {
|
async *streamObject<T>(input: GenerateObjectInput): AsyncGenerator<T> {
|
||||||
let recievedObj: string = '';
|
let recievedObj: string = '';
|
||||||
|
|
||||||
this.withOptions(input.options || {});
|
|
||||||
|
|
||||||
const stream = this.openAIClient.responses.stream({
|
const stream = this.openAIClient.responses.stream({
|
||||||
model: this.config.model,
|
model: this.config.model,
|
||||||
input: input.messages,
|
input: input.messages,
|
||||||
temperature: this.config.options?.temperature || 1.0,
|
temperature:
|
||||||
top_p: this.config.options?.topP,
|
input.options?.temperature ?? this.config.options?.temperature ?? 1.0,
|
||||||
max_completion_tokens: this.config.options?.maxTokens,
|
top_p: input.options?.topP ?? this.config.options?.topP,
|
||||||
stop: this.config.options?.stopSequences,
|
max_completion_tokens:
|
||||||
frequency_penalty: this.config.options?.frequencyPenalty,
|
input.options?.maxTokens ?? this.config.options?.maxTokens,
|
||||||
presence_penalty: this.config.options?.presencePenalty,
|
stop: input.options?.stopSequences ?? this.config.options?.stopSequences,
|
||||||
|
frequency_penalty:
|
||||||
|
input.options?.frequencyPenalty ??
|
||||||
|
this.config.options?.frequencyPenalty,
|
||||||
|
presence_penalty:
|
||||||
|
input.options?.presencePenalty ?? this.config.options?.presencePenalty,
|
||||||
text: {
|
text: {
|
||||||
format: zodTextFormat(input.schema, 'object'),
|
format: zodTextFormat(input.schema, 'object'),
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user