From 2c56aa3cb398d7f3c916c3461d2056b348492f05 Mon Sep 17 00:00:00 2001 From: OTYAK <118303871+OmarElKadri@users.noreply.github.com> Date: Mon, 7 Apr 2025 16:41:54 +0100 Subject: [PATCH 1/2] feat(tavily): integrate Tavily search engine with configuration and UI support --- docs/TAVILY_SETUP.md | 110 ++++++++++++++++++++++++++++++ src/app/api/config/route.ts | 10 +++ src/app/settings/page.tsx | 60 ++++++++++++++++ src/lib/config.ts | 10 +++ src/lib/search/metaSearchAgent.ts | 60 +++++++++++----- src/lib/tavily.ts | 79 +++++++++++++++++++++ 6 files changed, 311 insertions(+), 18 deletions(-) create mode 100644 docs/TAVILY_SETUP.md create mode 100644 src/lib/tavily.ts diff --git a/docs/TAVILY_SETUP.md b/docs/TAVILY_SETUP.md new file mode 100644 index 0000000..2fe0d3e --- /dev/null +++ b/docs/TAVILY_SETUP.md @@ -0,0 +1,110 @@ +# Using Tavily Instead of SearxNG + +This guide explains how to configure and use Tavily as a replacement for SearxNG in the Perplexica project. + +## What is Tavily? + +[Tavily](https://tavily.com/) is an AI-native search engine that provides a powerful API for web searches. It's designed to be more focused on AI applications and provides structured data in its responses. + +## Advantages of Tavily over SearxNG + +- **AI-Optimized**: Tavily is specifically designed for AI applications and provides more relevant results for AI-powered search. +- **Structured Data**: Results come in a well-structured format that's easy to process. +- **AI-Generated Answers**: Tavily can generate concise answers to queries based on search results. +- **Advanced Search Options**: Includes features like search depth control, domain filtering, time-based filtering, and result limiting. + +## Setup Instructions + +1. **Get a Tavily API Key**: + - Sign up for an account at [Tavily](https://tavily.com/) + - Navigate to your dashboard and create an API key + +2. **Configure the API Key**: + - Open your `config.toml` file + - Add your Tavily API key to the `TAVILY` field under `[API_ENDPOINTS]`: + ```toml + [API_ENDPOINTS] + TAVILY = "your-tavily-api-key" + ``` + - Alternatively, you can set the `TAVILY_API_KEY` environment variable + +3. **Select Tavily as the Search Engine**: + - Open your `config.toml` file + - Set the `ENGINE` field under `[SEARCH]` to `"tavily"`: + ```toml + [SEARCH] + ENGINE = "tavily" + ``` + - Alternatively, you can set the `SEARCH_ENGINE` environment variable to `"tavily"` + +4. **Verify Configuration**: + - The application will now use Tavily for web searches + - You can verify this by checking the console logs, which will show "Using search engine: tavily" when a search is performed + +## Usage + +The Tavily integration is designed to be a drop-in replacement for SearxNG. All existing search functionality will continue to work as before, but with improved results from Tavily. + +## API Options + +The Tavily implementation supports the following options: + +- `topic`: 'general' or 'news' (default: 'general') +- `search_depth`: 'basic' or 'advanced' (default: 'advanced') +- `chunks_per_source`: Number of content chunks to retrieve from each source (1-3, default: 3) +- `max_results`: Maximum number of results to return (default: 10) +- `time_range`: Filter results by time range ('day', 'week', 'month', 'year', 'd', 'w', 'm', 'y') +- `days`: Number of days back from the current date to include (for news topic) +- `include_answer`: Include an AI-generated answer ('basic', 'advanced', or true) +- `include_raw_content`: Include the cleaned and parsed HTML content of each result +- `include_images`: Also perform an image search and include the results +- `include_image_descriptions`: Add descriptive text for each image +- `include_domains`: Array of domains to include in search results +- `exclude_domains`: Array of domains to exclude from search results + +## Response Format + +Tavily returns a structured response with the following fields: + +- `query`: The search query that was executed +- `answer`: An AI-generated answer to the query (if requested) +- `images`: List of query-related images (if requested) +- `results`: A list of search results, each containing: + - `title`: The title of the search result + - `url`: The URL of the search result + - `content`: A short description of the search result + - `score`: The relevance score of the search result + - `raw_content`: The cleaned and parsed HTML content (if requested) +- `response_time`: Time in seconds it took to complete the request + +## Troubleshooting + +If you encounter any issues: + +1. Verify your Tavily API key is correctly set in `config.toml` or as an environment variable +2. Check the application logs for any error messages +3. Ensure your Tavily account has sufficient credits/quota for API calls + +## Switching Between Search Engines + +You can easily switch between SearxNG and Tavily by changing the `ENGINE` setting in your `config.toml` file: + +```toml +[SEARCH] +ENGINE = "searxng" # Use SearxNG (default) +# or +ENGINE = "tavily" # Use Tavily +``` + +You can also use the `SEARCH_ENGINE` environment variable to override this setting. + +## Reverting to SearxNG + +If you need to revert to using SearxNG: + +1. Set the `ENGINE` field under `[SEARCH]` in your `config.toml` file to `"searxng"`: + ```toml + [SEARCH] + ENGINE = "searxng" + ``` +2. Ensure your `SEARXNG` entry is properly configured with a valid SearxNG instance URL \ No newline at end of file diff --git a/src/app/api/config/route.ts b/src/app/api/config/route.ts index 39c1f84..f203ea0 100644 --- a/src/app/api/config/route.ts +++ b/src/app/api/config/route.ts @@ -8,6 +8,8 @@ import { getOllamaApiEndpoint, getOpenaiApiKey, getDeepseekApiKey, + getSearchEngine, + getTavilyApiKey, updateConfig, } from '@/lib/config'; import { @@ -58,6 +60,8 @@ export const GET = async (req: Request) => { config['customOpenaiApiUrl'] = getCustomOpenaiApiUrl(); config['customOpenaiApiKey'] = getCustomOpenaiApiKey(); config['customOpenaiModelName'] = getCustomOpenaiModelName(); + config['searchEngine'] = getSearchEngine(); + config['tavilyApiKey'] = getTavilyApiKey(); return Response.json({ ...config }, { status: 200 }); } catch (err) { @@ -99,6 +103,12 @@ export const POST = async (req: Request) => { MODEL_NAME: config.customOpenaiModelName, }, }, + SEARCH: { + ENGINE: config.searchEngine, + }, + API_ENDPOINTS: { + TAVILY: config.tavilyApiKey || '', + }, }; updateConfig(updatedConfig); diff --git a/src/app/settings/page.tsx b/src/app/settings/page.tsx index 8eee9a4..06e228a 100644 --- a/src/app/settings/page.tsx +++ b/src/app/settings/page.tsx @@ -24,6 +24,8 @@ interface SettingsType { customOpenaiApiKey: string; customOpenaiApiUrl: string; customOpenaiModelName: string; + searchEngine: string; + tavilyApiKey?: string; } interface InputProps extends React.InputHTMLAttributes { @@ -145,6 +147,7 @@ const Page = () => { const [automaticImageSearch, setAutomaticImageSearch] = useState(false); const [automaticVideoSearch, setAutomaticVideoSearch] = useState(false); const [systemInstructions, setSystemInstructions] = useState(''); + const [searchEngine, setSearchEngine] = useState('searxng'); const [savingStates, setSavingStates] = useState>({}); useEffect(() => { @@ -207,6 +210,7 @@ const Page = () => { ); setSystemInstructions(localStorage.getItem('systemInstructions')!); + setSearchEngine(localStorage.getItem('searchEngine') || 'searxng'); setIsLoading(false); }; @@ -366,6 +370,10 @@ const Page = () => { localStorage.setItem('embeddingModel', value); } else if (key === 'systemInstructions') { localStorage.setItem('systemInstructions', value); + } else if (key === 'searchEngine') { + localStorage.setItem('searchEngine', value); + } else if (key === 'tavilyApiKey') { + localStorage.setItem('tavilyApiKey', value); } } catch (err) { console.error('Failed to save:', err); @@ -508,6 +516,32 @@ const Page = () => { /> + +
+

+ Search Engine +

+ { + setConfig((prev) => ({ + ...prev!, + tavilyApiKey: e.target.value, + })); + }} + onSave={(value) => saveConfig('tavilyApiKey', value)} + /> +
diff --git a/src/lib/config.ts b/src/lib/config.ts index 2831214..54d5c75 100644 --- a/src/lib/config.ts +++ b/src/lib/config.ts @@ -36,6 +36,10 @@ interface Config { }; API_ENDPOINTS: { SEARXNG: string; + TAVILY: string; + }; + SEARCH: { + ENGINE: string; }; } @@ -64,6 +68,12 @@ export const getGeminiApiKey = () => loadConfig().MODELS.GEMINI.API_KEY; export const getSearxngApiEndpoint = () => process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG; +export const getTavilyApiKey = () => + process.env.TAVILY_API_KEY || loadConfig().API_ENDPOINTS.TAVILY; + +export const getSearchEngine = () => + process.env.SEARCH_ENGINE || loadConfig().SEARCH?.ENGINE || 'searxng'; + export const getOllamaApiEndpoint = () => loadConfig().MODELS.OLLAMA.API_URL; export const getDeepseekApiKey = () => loadConfig().MODELS.DEEPSEEK.API_KEY; diff --git a/src/lib/search/metaSearchAgent.ts b/src/lib/search/metaSearchAgent.ts index 67b7c58..645f59d 100644 --- a/src/lib/search/metaSearchAgent.ts +++ b/src/lib/search/metaSearchAgent.ts @@ -17,7 +17,9 @@ import LineListOutputParser from '../outputParsers/listLineOutputParser'; import LineOutputParser from '../outputParsers/lineOutputParser'; import { getDocumentsFromLinks } from '../utils/documents'; import { Document } from 'langchain/document'; +import { searchTavily } from '../tavily'; import { searchSearxng } from '../searxng'; +import { getSearchEngine } from '../config'; import path from 'node:path'; import fs from 'node:fs'; import computeSimilarity from '../utils/computeSimilarity'; @@ -205,25 +207,47 @@ class MetaSearchAgent implements MetaSearchAgentType { } else { question = question.replace(/.*?<\/think>/g, ''); - const res = await searchSearxng(question, { - language: 'en', - engines: this.config.activeEngines, - }); + const searchEngine = getSearchEngine(); + console.log(`Using search engine: ${searchEngine}`); - const documents = res.results.map( - (result) => - new Document({ - pageContent: - result.content || - (this.config.activeEngines.includes('youtube') - ? result.title - : '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */, - metadata: { - title: result.title, - url: result.url, - ...(result.img_src && { img_src: result.img_src }), - }, - }), + let res; + + if (searchEngine === 'tavily') { + res = await searchTavily(question, { + search_depth: 'basic', + max_results: 15, + include_images: true, + }); + } else { + // Default to SearxNG + res = await searchSearxng(question, { + language: 'en', + engines: this.config.activeEngines, + }); + } + + // If we have an AI-generated answer from Tavily, create a document for it + let documents: Document[] = []; + + + + // Add the regular search results + documents = documents.concat( + res.results.map( + (result) => + new Document({ + pageContent: + result.content || + (this.config.activeEngines.includes('youtube') + ? result.title + : ''), + metadata: { + title: result.title, + url: result.url, + ...(result.img_src ? { img_src: result.img_src } : {}), + }, + }), + ) ); return { query: question, docs: documents }; diff --git a/src/lib/tavily.ts b/src/lib/tavily.ts new file mode 100644 index 0000000..a609bcc --- /dev/null +++ b/src/lib/tavily.ts @@ -0,0 +1,79 @@ +import axios from 'axios'; +import { getTavilyApiKey } from './config'; + +interface TavilySearchOptions { + topic?: 'general' | 'news'; + search_depth?: 'basic' | 'advanced'; + chunks_per_source?: number; + max_results?: number; + time_range?: 'day' | 'week' | 'month' | 'year' | 'd' | 'w' | 'm' | 'y'; + days?: number; + include_answer?: boolean | 'basic' | 'advanced'; + include_raw_content?: boolean; + include_images?: boolean; + include_image_descriptions?: boolean; + include_domains?: string[]; + exclude_domains?: string[]; +} + +interface TavilySearchResult { + title: string; + url: string; + content: string; + score: number; + raw_content?: string; +} + +interface TavilySearchResponse { + query: string; + answer?: string; + images?: Array<{ + url: string; + description?: string; + }>; + results: TavilySearchResult[]; + response_time: string; +} + +export const searchTavily = async ( + query: string, + opts?: TavilySearchOptions, +) => { + const tavilyApiKey = getTavilyApiKey(); + + if (!tavilyApiKey) { + throw new Error('Tavily API key is not configured'); + } + + const url = 'https://api.tavily.com/search'; + + const response = await axios.post( + url, + { + query, + ...opts, + }, + { + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${tavilyApiKey}`, + }, + } + ); + + const results = response.data.results; + + // Convert Tavily results to match the format expected by the rest of the application + const formattedResults = results.map(result => ({ + title: result.title, + url: result.url, + content: result.content, + img_src: undefined, // Tavily doesn't provide image URLs in the standard response + })); + + return { + results: formattedResults, + suggestions: [], // Tavily doesn't provide suggestions, so return empty array + answer: response.data.answer, // Include the AI-generated answer if available + }; +}; \ No newline at end of file From 54c71e33e03d85036cd4537f529afb5d8b703708 Mon Sep 17 00:00:00 2001 From: OTYAK <118303871+OmarElKadri@users.noreply.github.com> Date: Tue, 8 Apr 2025 10:41:00 +0100 Subject: [PATCH 2/2] feat(Tavily): update sample configuration for Tavily integration --- docs/TAVILY_SETUP.md | 110 ------------------------- sample.config.toml | 6 +- src/app/api/discover/route.ts | 2 +- src/app/settings/page.tsx | 2 +- src/lib/chains/imageSearchAgent.ts | 2 +- src/lib/chains/videoSearchAgent.ts | 2 +- src/lib/search/metaSearchAgent.ts | 9 +- src/lib/{ => searchEngines}/searxng.ts | 2 +- src/lib/{ => searchEngines}/tavily.ts | 2 +- 9 files changed, 13 insertions(+), 124 deletions(-) delete mode 100644 docs/TAVILY_SETUP.md rename src/lib/{ => searchEngines}/searxng.ts (95%) rename src/lib/{ => searchEngines}/tavily.ts (97%) diff --git a/docs/TAVILY_SETUP.md b/docs/TAVILY_SETUP.md deleted file mode 100644 index 2fe0d3e..0000000 --- a/docs/TAVILY_SETUP.md +++ /dev/null @@ -1,110 +0,0 @@ -# Using Tavily Instead of SearxNG - -This guide explains how to configure and use Tavily as a replacement for SearxNG in the Perplexica project. - -## What is Tavily? - -[Tavily](https://tavily.com/) is an AI-native search engine that provides a powerful API for web searches. It's designed to be more focused on AI applications and provides structured data in its responses. - -## Advantages of Tavily over SearxNG - -- **AI-Optimized**: Tavily is specifically designed for AI applications and provides more relevant results for AI-powered search. -- **Structured Data**: Results come in a well-structured format that's easy to process. -- **AI-Generated Answers**: Tavily can generate concise answers to queries based on search results. -- **Advanced Search Options**: Includes features like search depth control, domain filtering, time-based filtering, and result limiting. - -## Setup Instructions - -1. **Get a Tavily API Key**: - - Sign up for an account at [Tavily](https://tavily.com/) - - Navigate to your dashboard and create an API key - -2. **Configure the API Key**: - - Open your `config.toml` file - - Add your Tavily API key to the `TAVILY` field under `[API_ENDPOINTS]`: - ```toml - [API_ENDPOINTS] - TAVILY = "your-tavily-api-key" - ``` - - Alternatively, you can set the `TAVILY_API_KEY` environment variable - -3. **Select Tavily as the Search Engine**: - - Open your `config.toml` file - - Set the `ENGINE` field under `[SEARCH]` to `"tavily"`: - ```toml - [SEARCH] - ENGINE = "tavily" - ``` - - Alternatively, you can set the `SEARCH_ENGINE` environment variable to `"tavily"` - -4. **Verify Configuration**: - - The application will now use Tavily for web searches - - You can verify this by checking the console logs, which will show "Using search engine: tavily" when a search is performed - -## Usage - -The Tavily integration is designed to be a drop-in replacement for SearxNG. All existing search functionality will continue to work as before, but with improved results from Tavily. - -## API Options - -The Tavily implementation supports the following options: - -- `topic`: 'general' or 'news' (default: 'general') -- `search_depth`: 'basic' or 'advanced' (default: 'advanced') -- `chunks_per_source`: Number of content chunks to retrieve from each source (1-3, default: 3) -- `max_results`: Maximum number of results to return (default: 10) -- `time_range`: Filter results by time range ('day', 'week', 'month', 'year', 'd', 'w', 'm', 'y') -- `days`: Number of days back from the current date to include (for news topic) -- `include_answer`: Include an AI-generated answer ('basic', 'advanced', or true) -- `include_raw_content`: Include the cleaned and parsed HTML content of each result -- `include_images`: Also perform an image search and include the results -- `include_image_descriptions`: Add descriptive text for each image -- `include_domains`: Array of domains to include in search results -- `exclude_domains`: Array of domains to exclude from search results - -## Response Format - -Tavily returns a structured response with the following fields: - -- `query`: The search query that was executed -- `answer`: An AI-generated answer to the query (if requested) -- `images`: List of query-related images (if requested) -- `results`: A list of search results, each containing: - - `title`: The title of the search result - - `url`: The URL of the search result - - `content`: A short description of the search result - - `score`: The relevance score of the search result - - `raw_content`: The cleaned and parsed HTML content (if requested) -- `response_time`: Time in seconds it took to complete the request - -## Troubleshooting - -If you encounter any issues: - -1. Verify your Tavily API key is correctly set in `config.toml` or as an environment variable -2. Check the application logs for any error messages -3. Ensure your Tavily account has sufficient credits/quota for API calls - -## Switching Between Search Engines - -You can easily switch between SearxNG and Tavily by changing the `ENGINE` setting in your `config.toml` file: - -```toml -[SEARCH] -ENGINE = "searxng" # Use SearxNG (default) -# or -ENGINE = "tavily" # Use Tavily -``` - -You can also use the `SEARCH_ENGINE` environment variable to override this setting. - -## Reverting to SearxNG - -If you need to revert to using SearxNG: - -1. Set the `ENGINE` field under `[SEARCH]` in your `config.toml` file to `"searxng"`: - ```toml - [SEARCH] - ENGINE = "searxng" - ``` -2. Ensure your `SEARXNG` entry is properly configured with a valid SearxNG instance URL \ No newline at end of file diff --git a/sample.config.toml b/sample.config.toml index 980e99d..e2fa352 100644 --- a/sample.config.toml +++ b/sample.config.toml @@ -26,4 +26,8 @@ API_URL = "" # Ollama API URL - http://host.docker.internal:11434 API_KEY = "" [API_ENDPOINTS] -SEARXNG = "" # SearxNG API URL - http://localhost:32768 \ No newline at end of file +SEARXNG = "" # SearxNG API URL - http://localhost:32768 +TAVILY = "" # Tavily API key + +[SEARCH] +ENGINE = "searxng" # "searxng" or "tavily" \ No newline at end of file diff --git a/src/app/api/discover/route.ts b/src/app/api/discover/route.ts index 8c1f470..502295c 100644 --- a/src/app/api/discover/route.ts +++ b/src/app/api/discover/route.ts @@ -1,4 +1,4 @@ -import { searchSearxng } from '@/lib/searxng'; +import { searchSearxng } from '../../../lib/searchEngines/searxng'; const articleWebsites = [ 'yahoo.com', diff --git a/src/app/settings/page.tsx b/src/app/settings/page.tsx index 06e228a..6988a3d 100644 --- a/src/app/settings/page.tsx +++ b/src/app/settings/page.tsx @@ -906,7 +906,7 @@ const Page = () => {

{ diff --git a/src/lib/chains/imageSearchAgent.ts b/src/lib/chains/imageSearchAgent.ts index 4fd684f..1381c3c 100644 --- a/src/lib/chains/imageSearchAgent.ts +++ b/src/lib/chains/imageSearchAgent.ts @@ -7,7 +7,7 @@ import { PromptTemplate } from '@langchain/core/prompts'; import formatChatHistoryAsString from '../utils/formatHistory'; import { BaseMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; -import { searchSearxng } from '../searxng'; +import { searchSearxng } from '../searchEngines/searxng'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; const imageSearchChainPrompt = ` diff --git a/src/lib/chains/videoSearchAgent.ts b/src/lib/chains/videoSearchAgent.ts index f7cb156..0b05d57 100644 --- a/src/lib/chains/videoSearchAgent.ts +++ b/src/lib/chains/videoSearchAgent.ts @@ -7,7 +7,7 @@ import { PromptTemplate } from '@langchain/core/prompts'; import formatChatHistoryAsString from '../utils/formatHistory'; import { BaseMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; -import { searchSearxng } from '../searxng'; +import { searchSearxng } from '../searchEngines/searxng'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; const VideoSearchChainPrompt = ` diff --git a/src/lib/search/metaSearchAgent.ts b/src/lib/search/metaSearchAgent.ts index 645f59d..94e1692 100644 --- a/src/lib/search/metaSearchAgent.ts +++ b/src/lib/search/metaSearchAgent.ts @@ -17,8 +17,8 @@ import LineListOutputParser from '../outputParsers/listLineOutputParser'; import LineOutputParser from '../outputParsers/lineOutputParser'; import { getDocumentsFromLinks } from '../utils/documents'; import { Document } from 'langchain/document'; -import { searchTavily } from '../tavily'; -import { searchSearxng } from '../searxng'; +import { searchTavily } from '../searchEngines/tavily'; +import { searchSearxng } from '../searchEngines/searxng'; import { getSearchEngine } from '../config'; import path from 'node:path'; import fs from 'node:fs'; @@ -208,7 +208,6 @@ class MetaSearchAgent implements MetaSearchAgentType { question = question.replace(/.*?<\/think>/g, ''); const searchEngine = getSearchEngine(); - console.log(`Using search engine: ${searchEngine}`); let res; @@ -226,12 +225,8 @@ class MetaSearchAgent implements MetaSearchAgentType { }); } - // If we have an AI-generated answer from Tavily, create a document for it let documents: Document[] = []; - - - // Add the regular search results documents = documents.concat( res.results.map( (result) => diff --git a/src/lib/searxng.ts b/src/lib/searchEngines/searxng.ts similarity index 95% rename from src/lib/searxng.ts rename to src/lib/searchEngines/searxng.ts index ae19db2..1209fea 100644 --- a/src/lib/searxng.ts +++ b/src/lib/searchEngines/searxng.ts @@ -1,5 +1,5 @@ import axios from 'axios'; -import { getSearxngApiEndpoint } from './config'; +import { getSearxngApiEndpoint } from '../config'; interface SearxngSearchOptions { categories?: string[]; diff --git a/src/lib/tavily.ts b/src/lib/searchEngines/tavily.ts similarity index 97% rename from src/lib/tavily.ts rename to src/lib/searchEngines/tavily.ts index a609bcc..b6e651d 100644 --- a/src/lib/tavily.ts +++ b/src/lib/searchEngines/tavily.ts @@ -1,5 +1,5 @@ import axios from 'axios'; -import { getTavilyApiKey } from './config'; +import { getTavilyApiKey } from '../config'; interface TavilySearchOptions { topic?: 'general' | 'news';