mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-09-18 15:21:33 +00:00
Implemented the configurable stream delay feature for
the reasoning models using ReasoningChatModel Custom Class. 1. Added the STREAM_DELAY parameter to the sample.config.toml file: [MODELS.DEEPSEEK] API_KEY = "" STREAM_DELAY = 20 # Milliseconds between token emissions for reasoning models (higher = slower, 0 = no delay) 2. Updated the Config interface in src/config.ts to include the new parameter: DEEPSEEK: { API_KEY: string; STREAM_DELAY: number; }; 3. Added a getter function in src/config.ts to retrieve the configured value: export const getDeepseekStreamDelay = () => loadConfig().MODELS.DEEPSEEK.STREAM_DELAY || 20; // Default to 20ms if not specified Updated the deepseek.ts provider to use the configured stream delay: const streamDelay = getDeepseekStreamDelay(); logger.debug(`Using stream delay of ${streamDelay}ms for ${model.id}`); // Then using it in the model configuration model: new ReasoningChatModel({ // ...other params streamDelay }), 4. This implementation provides several benefits: -User-Configurable: Users can now adjust the stream delay without modifying code -Descriptive Naming: The parameter name "STREAM_DELAY" clearly indicates its purpose -Documented: The comment in the config file explains what the parameter does -Fallback Default: If not specified, it defaults to 20ms -Logging: Added debug logging to show the configured value when loading models To adjust the stream delay, users can simply modify the STREAM_DELAY value in their config.toml file. Higher values will slow down token generation (making it easier to read in real-time), while lower values will speed it up. Setting it to 0 will disable the delay entirely.
This commit is contained in:
@@ -37,7 +37,6 @@ services:
|
|||||||
args:
|
args:
|
||||||
- NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
|
- NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
|
||||||
- NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
|
- NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
|
||||||
network: host
|
|
||||||
image: itzcrazykns1337/perplexica-frontend:main
|
image: itzcrazykns1337/perplexica-frontend:main
|
||||||
depends_on:
|
depends_on:
|
||||||
- perplexica-backend
|
- perplexica-backend
|
||||||
|
@@ -17,6 +17,7 @@ API_KEY = ""
|
|||||||
|
|
||||||
[MODELS.DEEPSEEK]
|
[MODELS.DEEPSEEK]
|
||||||
API_KEY = ""
|
API_KEY = ""
|
||||||
|
STREAM_DELAY = 5 # Milliseconds between token emissions for reasoning models (higher = slower, 0 = no delay)
|
||||||
|
|
||||||
[MODELS.OLLAMA]
|
[MODELS.OLLAMA]
|
||||||
API_URL = "" # Ollama API URL - http://host.docker.internal:11434
|
API_URL = "" # Ollama API URL - http://host.docker.internal:11434
|
||||||
|
@@ -25,6 +25,7 @@ interface Config {
|
|||||||
};
|
};
|
||||||
DEEPSEEK: {
|
DEEPSEEK: {
|
||||||
API_KEY: string;
|
API_KEY: string;
|
||||||
|
STREAM_DELAY: number;
|
||||||
};
|
};
|
||||||
OLLAMA: {
|
OLLAMA: {
|
||||||
API_URL: string;
|
API_URL: string;
|
||||||
@@ -69,6 +70,9 @@ export const getGeminiApiKey = () => loadConfig().MODELS.GEMINI.API_KEY;
|
|||||||
|
|
||||||
export const getDeepseekApiKey = () => loadConfig().MODELS.DEEPSEEK.API_KEY;
|
export const getDeepseekApiKey = () => loadConfig().MODELS.DEEPSEEK.API_KEY;
|
||||||
|
|
||||||
|
export const getDeepseekStreamDelay = () =>
|
||||||
|
loadConfig().MODELS.DEEPSEEK.STREAM_DELAY || 20; // Default to 20ms if not specified
|
||||||
|
|
||||||
export const getSearxngApiEndpoint = () =>
|
export const getSearxngApiEndpoint = () =>
|
||||||
process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG;
|
process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG;
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
import { ReasoningChatModel } from '../reasoningChatModel';
|
import { ReasoningChatModel } from '../reasoningChatModel';
|
||||||
import { ChatOpenAI } from '@langchain/openai';
|
import { ChatOpenAI } from '@langchain/openai';
|
||||||
import logger from '../../utils/logger';
|
import logger from '../../utils/logger';
|
||||||
import { getDeepseekApiKey } from '../../config';
|
import { getDeepseekApiKey, getDeepseekStreamDelay } from '../../config';
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
|
|
||||||
interface DeepSeekModel {
|
interface DeepSeekModel {
|
||||||
@@ -54,6 +54,9 @@ export const loadDeepSeekChatModels = async (): Promise<Record<string, ChatModel
|
|||||||
if (model.id in MODEL_DISPLAY_NAMES) {
|
if (model.id in MODEL_DISPLAY_NAMES) {
|
||||||
// Use ReasoningChatModel for models that need reasoning capabilities
|
// Use ReasoningChatModel for models that need reasoning capabilities
|
||||||
if (REASONING_MODELS.includes(model.id)) {
|
if (REASONING_MODELS.includes(model.id)) {
|
||||||
|
const streamDelay = getDeepseekStreamDelay();
|
||||||
|
logger.debug(`Using stream delay of ${streamDelay}ms for ${model.id}`);
|
||||||
|
|
||||||
acc[model.id] = {
|
acc[model.id] = {
|
||||||
displayName: MODEL_DISPLAY_NAMES[model.id],
|
displayName: MODEL_DISPLAY_NAMES[model.id],
|
||||||
model: new ReasoningChatModel({
|
model: new ReasoningChatModel({
|
||||||
@@ -61,7 +64,7 @@ export const loadDeepSeekChatModels = async (): Promise<Record<string, ChatModel
|
|||||||
baseURL: deepSeekEndpoint,
|
baseURL: deepSeekEndpoint,
|
||||||
modelName: model.id,
|
modelName: model.id,
|
||||||
temperature: 0.7,
|
temperature: 0.7,
|
||||||
streamDelay: 20 // Add a small delay to control streaming speed
|
streamDelay // Use configured stream delay from config
|
||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
|
Reference in New Issue
Block a user