Compare commits

..

5 Commits

Author SHA1 Message Date
OTYAK
cea3269fc4 Merge 18533d58c2 into 41b258e4d8 2025-04-09 23:57:42 +08:00
sjiampojamarn
41b258e4d8 Set speech message before return 2025-04-08 23:17:52 -07:00
OTYAK
18533d58c2 Merge branch 'ItzCrazyKns:master' into master 2025-04-08 10:41:33 +01:00
OTYAK
54c71e33e0 feat(Tavily): update sample configuration for Tavily integration 2025-04-08 10:41:00 +01:00
OTYAK
2c56aa3cb3 feat(tavily): integrate Tavily search engine with configuration and UI support 2025-04-07 16:41:54 +01:00
13 changed files with 462 additions and 381 deletions

View File

@@ -27,3 +27,7 @@ API_KEY = ""
[API_ENDPOINTS]
SEARXNG = "" # SearxNG API URL - http://localhost:32768
TAVILY = "" # Tavily API key
[SEARCH]
ENGINE = "searxng" # "searxng" or "tavily"

View File

@@ -8,6 +8,8 @@ import {
getOllamaApiEndpoint,
getOpenaiApiKey,
getDeepseekApiKey,
getSearchEngine,
getTavilyApiKey,
updateConfig,
} from '@/lib/config';
import {
@@ -58,6 +60,8 @@ export const GET = async (req: Request) => {
config['customOpenaiApiUrl'] = getCustomOpenaiApiUrl();
config['customOpenaiApiKey'] = getCustomOpenaiApiKey();
config['customOpenaiModelName'] = getCustomOpenaiModelName();
config['searchEngine'] = getSearchEngine();
config['tavilyApiKey'] = getTavilyApiKey();
return Response.json({ ...config }, { status: 200 });
} catch (err) {
@@ -99,6 +103,12 @@ export const POST = async (req: Request) => {
MODEL_NAME: config.customOpenaiModelName,
},
},
SEARCH: {
ENGINE: config.searchEngine,
},
API_ENDPOINTS: {
TAVILY: config.tavilyApiKey || '',
},
};
updateConfig(updatedConfig);

View File

@@ -1,4 +1,4 @@
import { searchSearxng } from '@/lib/searxng';
import { searchSearxng } from '../../../lib/searchEngines/searxng';
const articleWebsites = [
'yahoo.com',

View File

@@ -24,6 +24,8 @@ interface SettingsType {
customOpenaiApiKey: string;
customOpenaiApiUrl: string;
customOpenaiModelName: string;
searchEngine: string;
tavilyApiKey?: string;
}
interface InputProps extends React.InputHTMLAttributes<HTMLInputElement> {
@@ -145,6 +147,7 @@ const Page = () => {
const [automaticImageSearch, setAutomaticImageSearch] = useState(false);
const [automaticVideoSearch, setAutomaticVideoSearch] = useState(false);
const [systemInstructions, setSystemInstructions] = useState<string>('');
const [searchEngine, setSearchEngine] = useState<string>('searxng');
const [savingStates, setSavingStates] = useState<Record<string, boolean>>({});
useEffect(() => {
@@ -207,6 +210,7 @@ const Page = () => {
);
setSystemInstructions(localStorage.getItem('systemInstructions')!);
setSearchEngine(localStorage.getItem('searchEngine') || 'searxng');
setIsLoading(false);
};
@@ -366,6 +370,10 @@ const Page = () => {
localStorage.setItem('embeddingModel', value);
} else if (key === 'systemInstructions') {
localStorage.setItem('systemInstructions', value);
} else if (key === 'searchEngine') {
localStorage.setItem('searchEngine', value);
} else if (key === 'tavilyApiKey') {
localStorage.setItem('tavilyApiKey', value);
}
} catch (err) {
console.error('Failed to save:', err);
@@ -508,6 +516,32 @@ const Page = () => {
/>
</Switch>
</div>
<div className="flex flex-col space-y-1 mt-2">
<p className="text-black/70 dark:text-white/70 text-sm">
Search Engine
</p>
<Select
value={searchEngine}
onChange={(e) => {
const value = e.target.value;
setSearchEngine(value);
saveConfig('searchEngine', value);
}}
options={[
{ value: 'searxng', label: 'SearxNG' },
...(config.tavilyApiKey ? [{ value: 'tavily', label: 'Tavily' }] : []),
]}
/>
<p className="text-xs text-black/60 dark:text-white/60 mt-1">
Select which search engine to use for web searches
</p>
{searchEngine === 'tavily' && !config.tavilyApiKey && (
<p className="text-xs text-red-500 mt-1">
Tavily API key is required to use this search engine
</p>
)}
</div>
</div>
</SettingsSection>
@@ -858,6 +892,32 @@ const Page = () => {
onSave={(value) => saveConfig('deepseekApiKey', value)}
/>
</div>
<div className="flex flex-col space-y-1 mt-4 pt-4 border-t border-light-200 dark:border-dark-200">
<p className="text-black/90 dark:text-white/90 font-medium">Search Engine API Keys</p>
<p className="text-sm text-black/60 dark:text-white/60 mt-0.5">
API keys for search engines used in the application
</p>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
Tavily API Key
</p>
<Input
type="text"
placeholder="Tavily API key"
value={config.tavilyApiKey || ''}
isSaving={savingStates['tavilyApiKey']}
onChange={(e) => {
setConfig((prev) => ({
...prev!,
tavilyApiKey: e.target.value,
}));
}}
onSave={(value) => saveConfig('tavilyApiKey', value)}
/>
</div>
</div>
</SettingsSection>
</div>

View File

@@ -363,6 +363,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
if (data.type === 'sources') {
sources = data.data;
if (!added) {
setMessages((prevMessages) => [
...prevMessages,
{
@@ -375,6 +376,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
},
]);
added = true;
}
setMessageAppeared(true);
}
@@ -392,8 +394,8 @@ const ChatWindow = ({ id }: { id?: string }) => {
},
]);
added = true;
setMessageAppeared(true);
} else {
}
setMessages((prev) =>
prev.map((message) => {
if (message.messageId === data.messageId) {
@@ -403,9 +405,9 @@ const ChatWindow = ({ id }: { id?: string }) => {
return message;
}),
);
}
recievedMessage += data.data;
setMessageAppeared(true);
}
if (data.type === 'messageEnd') {

View File

@@ -97,6 +97,7 @@ const MessageBox = ({
},
),
);
setSpeechMessage(message.content.replace(regex, ''));
return;
}

View File

@@ -76,11 +76,13 @@ const Optimization = ({
<PopoverButton
onClick={() => setOptimizationMode(mode.key)}
key={i}
disabled={mode.key === 'quality'}
className={cn(
'p-2 rounded-lg flex flex-col items-start justify-start text-start space-y-1 duration-200 cursor-pointer transition',
optimizationMode === mode.key
? 'bg-light-secondary dark:bg-dark-secondary'
: 'hover:bg-light-secondary dark:hover:bg-dark-secondary',
mode.key === 'quality' && 'opacity-50 cursor-not-allowed',
)}
>
<div className="flex flex-row items-center space-x-1 text-black dark:text-white">

View File

@@ -7,7 +7,7 @@ import { PromptTemplate } from '@langchain/core/prompts';
import formatChatHistoryAsString from '../utils/formatHistory';
import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { searchSearxng } from '../searxng';
import { searchSearxng } from '../searchEngines/searxng';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
const imageSearchChainPrompt = `

View File

@@ -7,7 +7,7 @@ import { PromptTemplate } from '@langchain/core/prompts';
import formatChatHistoryAsString from '../utils/formatHistory';
import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { searchSearxng } from '../searxng';
import { searchSearxng } from '../searchEngines/searxng';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
const VideoSearchChainPrompt = `

View File

@@ -36,6 +36,10 @@ interface Config {
};
API_ENDPOINTS: {
SEARXNG: string;
TAVILY: string;
};
SEARCH: {
ENGINE: string;
};
}
@@ -64,6 +68,12 @@ export const getGeminiApiKey = () => loadConfig().MODELS.GEMINI.API_KEY;
export const getSearxngApiEndpoint = () =>
process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG;
export const getTavilyApiKey = () =>
process.env.TAVILY_API_KEY || loadConfig().API_ENDPOINTS.TAVILY;
export const getSearchEngine = () =>
process.env.SEARCH_ENGINE || loadConfig().SEARCH?.ENGINE || 'searxng';
export const getOllamaApiEndpoint = () => loadConfig().MODELS.OLLAMA.API_URL;
export const getDeepseekApiKey = () => loadConfig().MODELS.DEEPSEEK.API_KEY;

View File

@@ -6,20 +6,26 @@ import {
MessagesPlaceholder,
PromptTemplate,
} from '@langchain/core/prompts';
import {
RunnableLambda,
RunnableMap,
RunnableSequence,
} from '@langchain/core/runnables';
import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers';
import LineListOutputParser from '../outputParsers/listLineOutputParser';
import LineOutputParser from '../outputParsers/lineOutputParser';
import { getDocumentsFromLinks } from '../utils/documents';
import { Document } from 'langchain/document';
import { searchSearxng, SearxngSearchResult } from '../searxng';
import { searchTavily } from '../searchEngines/tavily';
import { searchSearxng } from '../searchEngines/searxng';
import { getSearchEngine } from '../config';
import path from 'node:path';
import fs from 'node:fs';
import computeSimilarity from '../utils/computeSimilarity';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import { StreamEvent } from '@langchain/core/tracers/log_stream';
import { EventEmitter } from 'node:stream';
export interface MetaSearchAgentType {
searchAndAnswer: (
@@ -43,7 +49,7 @@ interface Config {
activeEngines: string[];
}
type SearchInput = {
type BasicChainInput = {
chat_history: BaseMessage[];
query: string;
};
@@ -56,25 +62,14 @@ class MetaSearchAgent implements MetaSearchAgentType {
this.config = config;
}
private async searchSources(
llm: BaseChatModel,
input: SearchInput,
emitter: EventEmitter,
) {
private async createSearchRetrieverChain(llm: BaseChatModel) {
(llm as unknown as ChatOpenAI).temperature = 0;
const chatPrompt = PromptTemplate.fromTemplate(
this.config.queryGeneratorPrompt,
);
const processedChatPrompt = await chatPrompt.invoke({
chat_history: formatChatHistoryAsString(input.chat_history),
query: input.query,
});
const llmRes = await llm.invoke(processedChatPrompt);
const messageStr = await this.strParser.invoke(llmRes);
return RunnableSequence.from([
PromptTemplate.fromTemplate(this.config.queryGeneratorPrompt),
llm,
this.strParser,
RunnableLambda.from(async (input: string) => {
const linksOutputParser = new LineListOutputParser({
key: 'links',
});
@@ -83,10 +78,10 @@ class MetaSearchAgent implements MetaSearchAgentType {
key: 'question',
});
const links = await linksOutputParser.parse(messageStr);
const links = await linksOutputParser.parse(input);
let question = this.config.summarizer
? await questionOutputParser.parse(messageStr)
: messageStr;
? await questionOutputParser.parse(input)
: input;
if (question === 'not_needed') {
return { query: '', docs: [] };
@@ -106,7 +101,8 @@ class MetaSearchAgent implements MetaSearchAgentType {
linkDocs.map((doc) => {
const URLDocExists = docGroups.find(
(d) =>
d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
d.metadata.url === doc.metadata.url &&
d.metadata.totalDocs < 10,
);
if (!URLDocExists) {
@@ -121,7 +117,8 @@ class MetaSearchAgent implements MetaSearchAgentType {
const docIndex = docGroups.findIndex(
(d) =>
d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
d.metadata.url === doc.metadata.url &&
d.metadata.totalDocs < 10,
);
if (docIndex !== -1) {
@@ -210,185 +207,82 @@ class MetaSearchAgent implements MetaSearchAgentType {
} else {
question = question.replace(/<think>.*?<\/think>/g, '');
const res = await searchSearxng(question, {
const searchEngine = getSearchEngine();
let res;
if (searchEngine === 'tavily') {
res = await searchTavily(question, {
search_depth: 'basic',
max_results: 15,
include_images: true,
});
} else {
// Default to SearxNG
res = await searchSearxng(question, {
language: 'en',
engines: this.config.activeEngines,
});
}
const documents = res.results.map(
let documents: Document[] = [];
documents = documents.concat(
res.results.map(
(result) =>
new Document({
pageContent:
result.content ||
(this.config.activeEngines.includes('youtube')
? result.title
: '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */,
: ''),
metadata: {
title: result.title,
url: result.url,
...(result.img_src && { img_src: result.img_src }),
...(result.img_src ? { img_src: result.img_src } : {}),
},
}),
)
);
return { query: question, docs: documents };
}
}
private async performDeepResearch(
llm: BaseChatModel,
input: SearchInput,
emitter: EventEmitter,
) {
(llm as unknown as ChatOpenAI).temperature = 0;
const queryGenPrompt = PromptTemplate.fromTemplate(
this.config.queryGeneratorPrompt,
);
const formattedChatPrompt = await queryGenPrompt.invoke({
chat_history: formatChatHistoryAsString(input.chat_history),
query: input.query,
});
let i = 0;
let currentQuery = await this.strParser.invoke(
await llm.invoke(formattedChatPrompt),
);
const originalQuery = currentQuery;
const pastQueries: string[] = [];
const results: SearxngSearchResult[] = [];
while (i < 10) {
const res = await searchSearxng(currentQuery, {
language: 'en',
engines: this.config.activeEngines,
});
results.push(...res.results);
const reflectorPrompt = PromptTemplate.fromTemplate(`
You are an LLM that is tasked with reflecting on the results of a search query.
## Goal
You will be given question of the user, a list of search results collected from the web to answer that question along with past queries made to collect those results. You have to analyze the results based on user's question and do the following:
1. Identify unexplored areas or areas with less detailed information in the results and generate a new query that focuses on those areas. The new queries should be more specific and a similar query shall not exist in past queries which will be provided to you. Make sure to include keywords that you're looking for because the new query will be used to search the web for information on that topic. Make sure the query contains only 1 question and is not too long to ensure it is Search Engine friendly.
2. You'll have to generate a description explaining what you are doing for example "I am looking for more information about X" or "Understanding how X works" etc. The description should be short and concise.
## Output format
You need to output in XML format and do not generate any other text. ake sure to not include any other text in the output or start a conversation in the output. The output should be in the following format:
<query>(query)</query>
<description>(description)</description>
## Example
Say the user asked "What is Llama 4 by Meta?" and let search results contain information about Llama 4 being an LLM and very little information about its features. You can output:
<query>Llama 4 features</query> // Generate queries that capture keywords for SEO and not making words like "How", "What", "Why" etc.
<description>Looking for new features in Llama 4</description>
or something like
<query>How is Llama 4 better than its previous generation models</query>
<description>Understanding the difference between Llama 4 and previous generation models.</description>
## BELOW IS THE ACTUAL DATA YOU WILL BE WORKING WITH. IT IS NOT A PART OF EXAMPLES. YOU'LL HAVE TO GENERATE YOUR ANSWER BASED ON THIS DATA.
<user_question>\n{question}\n</user_question>
<search_results>\n{search_results}\n</search_results>
<past_queries>\n{past_queries}\n</past_queries>
Response:
`);
const formattedReflectorPrompt = await reflectorPrompt.invoke({
question: originalQuery,
search_results: results
.map(
(result) => `<result>${result.title} - ${result.content}</result>`,
)
.join('\n'),
past_queries: pastQueries.map((q) => `<query>${q}</query>`).join('\n'),
});
const feedback = await this.strParser.invoke(
await llm.invoke(formattedReflectorPrompt),
);
console.log(`Feedback: ${feedback}`);
const queryOutputParser = new LineOutputParser({
key: 'query',
});
const descriptionOutputParser = new LineOutputParser({
key: 'description',
});
currentQuery = await queryOutputParser.parse(feedback);
const description = await descriptionOutputParser.parse(feedback);
console.log(`Query: ${currentQuery}`);
console.log(`Description: ${description}`);
pastQueries.push(currentQuery);
++i;
}
const uniqueResults: SearxngSearchResult[] = [];
results.forEach((res) => {
const exists = uniqueResults.find((r) => r.url === res.url);
if (!exists) {
uniqueResults.push(res);
} else {
exists.content += `\n\n` + res.content;
}
});
const documents = uniqueResults /* .slice(0, 50) */
.map(
(r) =>
new Document({
pageContent: r.content || '',
metadata: {
title: r.title,
url: r.url,
...(r.img_src && { img_src: r.img_src }),
},
}),
);
return documents;
]);
}
private async streamAnswer(
private async createAnsweringChain(
llm: BaseChatModel,
fileIds: string[],
embeddings: Embeddings,
optimizationMode: 'speed' | 'balanced' | 'quality',
systemInstructions: string,
input: SearchInput,
emitter: EventEmitter,
) {
const chatPrompt = ChatPromptTemplate.fromMessages([
['system', this.config.responsePrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
]);
return RunnableSequence.from([
RunnableMap.from({
systemInstructions: () => systemInstructions,
query: (input: BasicChainInput) => input.query,
chat_history: (input: BasicChainInput) => input.chat_history,
date: () => new Date().toISOString(),
context: RunnableLambda.from(async (input: BasicChainInput) => {
const processedHistory = formatChatHistoryAsString(
input.chat_history,
);
let context = '';
if (optimizationMode === 'speed' || optimizationMode === 'balanced') {
let docs: Document[] | null = null;
let query = input.query;
if (this.config.searchWeb) {
const searchResults = await this.searchSources(llm, input, emitter);
const searchRetrieverChain =
await this.createSearchRetrieverChain(llm);
query = searchResults.query;
docs = searchResults.docs;
const searchRetrieverResult = await searchRetrieverChain.invoke({
chat_history: processedHistory,
query,
});
query = searchRetrieverResult.query;
docs = searchRetrieverResult.docs;
}
const sortedDocs = await this.rerankDocs(
@@ -399,42 +293,23 @@ class MetaSearchAgent implements MetaSearchAgentType {
optimizationMode,
);
emitter.emit(
'data',
JSON.stringify({ type: 'sources', data: sortedDocs }),
);
context = this.processDocs(sortedDocs);
} else if (optimizationMode === 'quality') {
let docs: Document[] = [];
docs = await this.performDeepResearch(llm, input, emitter);
emitter.emit('data', JSON.stringify({ type: 'sources', data: docs }));
context = this.processDocs(docs);
}
const formattedChatPrompt = await chatPrompt.invoke({
query: input.query,
chat_history: input.chat_history,
date: new Date().toISOString(),
context: context,
systemInstructions: systemInstructions,
return sortedDocs;
})
.withConfig({
runName: 'FinalSourceRetriever',
})
.pipe(this.processDocs),
}),
ChatPromptTemplate.fromMessages([
['system', this.config.responsePrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
]),
llm,
this.strParser,
]).withConfig({
runName: 'FinalResponseGenerator',
});
const llmRes = await llm.stream(formattedChatPrompt);
for await (const data of llmRes) {
const messageStr = await this.strParser.invoke(data);
emitter.emit(
'data',
JSON.stringify({ type: 'response', data: messageStr }),
);
}
emitter.emit('end');
}
private async rerankDocs(
@@ -570,13 +445,44 @@ class MetaSearchAgent implements MetaSearchAgentType {
return docs
.map(
(_, index) =>
`${index + 1}. ${docs[index].metadata.title} ${
docs[index].pageContent
}`,
`${index + 1}. ${docs[index].metadata.title} ${docs[index].pageContent}`,
)
.join('\n');
}
private async handleStream(
stream: AsyncGenerator<StreamEvent, any, any>,
emitter: eventEmitter,
) {
for await (const event of stream) {
if (
event.event === 'on_chain_end' &&
event.name === 'FinalSourceRetriever'
) {
``;
emitter.emit(
'data',
JSON.stringify({ type: 'sources', data: event.data.output }),
);
}
if (
event.event === 'on_chain_stream' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'response', data: event.data.chunk }),
);
}
if (
event.event === 'on_chain_end' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit('end');
}
}
}
async searchAndAnswer(
message: string,
history: BaseMessage[],
@@ -588,19 +494,26 @@ class MetaSearchAgent implements MetaSearchAgentType {
) {
const emitter = new eventEmitter();
this.streamAnswer(
const answeringChain = await this.createAnsweringChain(
llm,
fileIds,
embeddings,
optimizationMode,
systemInstructions,
);
const stream = answeringChain.streamEvents(
{
chat_history: history,
query: message,
},
emitter,
{
version: 'v1',
},
);
this.handleStream(stream, emitter);
return emitter;
}
}

View File

@@ -1,5 +1,5 @@
import axios from 'axios';
import { getSearxngApiEndpoint } from './config';
import { getSearxngApiEndpoint } from '../config';
interface SearxngSearchOptions {
categories?: string[];
@@ -8,7 +8,7 @@ interface SearxngSearchOptions {
pageno?: number;
}
export interface SearxngSearchResult {
interface SearxngSearchResult {
title: string;
url: string;
img_src?: string;

View File

@@ -0,0 +1,79 @@
import axios from 'axios';
import { getTavilyApiKey } from '../config';
interface TavilySearchOptions {
topic?: 'general' | 'news';
search_depth?: 'basic' | 'advanced';
chunks_per_source?: number;
max_results?: number;
time_range?: 'day' | 'week' | 'month' | 'year' | 'd' | 'w' | 'm' | 'y';
days?: number;
include_answer?: boolean | 'basic' | 'advanced';
include_raw_content?: boolean;
include_images?: boolean;
include_image_descriptions?: boolean;
include_domains?: string[];
exclude_domains?: string[];
}
interface TavilySearchResult {
title: string;
url: string;
content: string;
score: number;
raw_content?: string;
}
interface TavilySearchResponse {
query: string;
answer?: string;
images?: Array<{
url: string;
description?: string;
}>;
results: TavilySearchResult[];
response_time: string;
}
export const searchTavily = async (
query: string,
opts?: TavilySearchOptions,
) => {
const tavilyApiKey = getTavilyApiKey();
if (!tavilyApiKey) {
throw new Error('Tavily API key is not configured');
}
const url = 'https://api.tavily.com/search';
const response = await axios.post<TavilySearchResponse>(
url,
{
query,
...opts,
},
{
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${tavilyApiKey}`,
},
}
);
const results = response.data.results;
// Convert Tavily results to match the format expected by the rest of the application
const formattedResults = results.map(result => ({
title: result.title,
url: result.url,
content: result.content,
img_src: undefined, // Tavily doesn't provide image URLs in the standard response
}));
return {
results: formattedResults,
suggestions: [], // Tavily doesn't provide suggestions, so return empty array
answer: response.data.answer, // Include the AI-generated answer if available
};
};