diff --git a/docs/TAVILY_SETUP.md b/docs/TAVILY_SETUP.md new file mode 100644 index 0000000..2fe0d3e --- /dev/null +++ b/docs/TAVILY_SETUP.md @@ -0,0 +1,110 @@ +# Using Tavily Instead of SearxNG + +This guide explains how to configure and use Tavily as a replacement for SearxNG in the Perplexica project. + +## What is Tavily? + +[Tavily](https://tavily.com/) is an AI-native search engine that provides a powerful API for web searches. It's designed to be more focused on AI applications and provides structured data in its responses. + +## Advantages of Tavily over SearxNG + +- **AI-Optimized**: Tavily is specifically designed for AI applications and provides more relevant results for AI-powered search. +- **Structured Data**: Results come in a well-structured format that's easy to process. +- **AI-Generated Answers**: Tavily can generate concise answers to queries based on search results. +- **Advanced Search Options**: Includes features like search depth control, domain filtering, time-based filtering, and result limiting. + +## Setup Instructions + +1. **Get a Tavily API Key**: + - Sign up for an account at [Tavily](https://tavily.com/) + - Navigate to your dashboard and create an API key + +2. **Configure the API Key**: + - Open your `config.toml` file + - Add your Tavily API key to the `TAVILY` field under `[API_ENDPOINTS]`: + ```toml + [API_ENDPOINTS] + TAVILY = "your-tavily-api-key" + ``` + - Alternatively, you can set the `TAVILY_API_KEY` environment variable + +3. **Select Tavily as the Search Engine**: + - Open your `config.toml` file + - Set the `ENGINE` field under `[SEARCH]` to `"tavily"`: + ```toml + [SEARCH] + ENGINE = "tavily" + ``` + - Alternatively, you can set the `SEARCH_ENGINE` environment variable to `"tavily"` + +4. **Verify Configuration**: + - The application will now use Tavily for web searches + - You can verify this by checking the console logs, which will show "Using search engine: tavily" when a search is performed + +## Usage + +The Tavily integration is designed to be a drop-in replacement for SearxNG. All existing search functionality will continue to work as before, but with improved results from Tavily. + +## API Options + +The Tavily implementation supports the following options: + +- `topic`: 'general' or 'news' (default: 'general') +- `search_depth`: 'basic' or 'advanced' (default: 'advanced') +- `chunks_per_source`: Number of content chunks to retrieve from each source (1-3, default: 3) +- `max_results`: Maximum number of results to return (default: 10) +- `time_range`: Filter results by time range ('day', 'week', 'month', 'year', 'd', 'w', 'm', 'y') +- `days`: Number of days back from the current date to include (for news topic) +- `include_answer`: Include an AI-generated answer ('basic', 'advanced', or true) +- `include_raw_content`: Include the cleaned and parsed HTML content of each result +- `include_images`: Also perform an image search and include the results +- `include_image_descriptions`: Add descriptive text for each image +- `include_domains`: Array of domains to include in search results +- `exclude_domains`: Array of domains to exclude from search results + +## Response Format + +Tavily returns a structured response with the following fields: + +- `query`: The search query that was executed +- `answer`: An AI-generated answer to the query (if requested) +- `images`: List of query-related images (if requested) +- `results`: A list of search results, each containing: + - `title`: The title of the search result + - `url`: The URL of the search result + - `content`: A short description of the search result + - `score`: The relevance score of the search result + - `raw_content`: The cleaned and parsed HTML content (if requested) +- `response_time`: Time in seconds it took to complete the request + +## Troubleshooting + +If you encounter any issues: + +1. Verify your Tavily API key is correctly set in `config.toml` or as an environment variable +2. Check the application logs for any error messages +3. Ensure your Tavily account has sufficient credits/quota for API calls + +## Switching Between Search Engines + +You can easily switch between SearxNG and Tavily by changing the `ENGINE` setting in your `config.toml` file: + +```toml +[SEARCH] +ENGINE = "searxng" # Use SearxNG (default) +# or +ENGINE = "tavily" # Use Tavily +``` + +You can also use the `SEARCH_ENGINE` environment variable to override this setting. + +## Reverting to SearxNG + +If you need to revert to using SearxNG: + +1. Set the `ENGINE` field under `[SEARCH]` in your `config.toml` file to `"searxng"`: + ```toml + [SEARCH] + ENGINE = "searxng" + ``` +2. Ensure your `SEARXNG` entry is properly configured with a valid SearxNG instance URL \ No newline at end of file diff --git a/src/app/api/config/route.ts b/src/app/api/config/route.ts index 39c1f84..f203ea0 100644 --- a/src/app/api/config/route.ts +++ b/src/app/api/config/route.ts @@ -8,6 +8,8 @@ import { getOllamaApiEndpoint, getOpenaiApiKey, getDeepseekApiKey, + getSearchEngine, + getTavilyApiKey, updateConfig, } from '@/lib/config'; import { @@ -58,6 +60,8 @@ export const GET = async (req: Request) => { config['customOpenaiApiUrl'] = getCustomOpenaiApiUrl(); config['customOpenaiApiKey'] = getCustomOpenaiApiKey(); config['customOpenaiModelName'] = getCustomOpenaiModelName(); + config['searchEngine'] = getSearchEngine(); + config['tavilyApiKey'] = getTavilyApiKey(); return Response.json({ ...config }, { status: 200 }); } catch (err) { @@ -99,6 +103,12 @@ export const POST = async (req: Request) => { MODEL_NAME: config.customOpenaiModelName, }, }, + SEARCH: { + ENGINE: config.searchEngine, + }, + API_ENDPOINTS: { + TAVILY: config.tavilyApiKey || '', + }, }; updateConfig(updatedConfig); diff --git a/src/app/settings/page.tsx b/src/app/settings/page.tsx index 8eee9a4..06e228a 100644 --- a/src/app/settings/page.tsx +++ b/src/app/settings/page.tsx @@ -24,6 +24,8 @@ interface SettingsType { customOpenaiApiKey: string; customOpenaiApiUrl: string; customOpenaiModelName: string; + searchEngine: string; + tavilyApiKey?: string; } interface InputProps extends React.InputHTMLAttributes { @@ -145,6 +147,7 @@ const Page = () => { const [automaticImageSearch, setAutomaticImageSearch] = useState(false); const [automaticVideoSearch, setAutomaticVideoSearch] = useState(false); const [systemInstructions, setSystemInstructions] = useState(''); + const [searchEngine, setSearchEngine] = useState('searxng'); const [savingStates, setSavingStates] = useState>({}); useEffect(() => { @@ -207,6 +210,7 @@ const Page = () => { ); setSystemInstructions(localStorage.getItem('systemInstructions')!); + setSearchEngine(localStorage.getItem('searchEngine') || 'searxng'); setIsLoading(false); }; @@ -366,6 +370,10 @@ const Page = () => { localStorage.setItem('embeddingModel', value); } else if (key === 'systemInstructions') { localStorage.setItem('systemInstructions', value); + } else if (key === 'searchEngine') { + localStorage.setItem('searchEngine', value); + } else if (key === 'tavilyApiKey') { + localStorage.setItem('tavilyApiKey', value); } } catch (err) { console.error('Failed to save:', err); @@ -508,6 +516,32 @@ const Page = () => { /> + +
+

+ Search Engine +

+ { + setConfig((prev) => ({ + ...prev!, + tavilyApiKey: e.target.value, + })); + }} + onSave={(value) => saveConfig('tavilyApiKey', value)} + /> +
diff --git a/src/lib/config.ts b/src/lib/config.ts index 2831214..54d5c75 100644 --- a/src/lib/config.ts +++ b/src/lib/config.ts @@ -36,6 +36,10 @@ interface Config { }; API_ENDPOINTS: { SEARXNG: string; + TAVILY: string; + }; + SEARCH: { + ENGINE: string; }; } @@ -64,6 +68,12 @@ export const getGeminiApiKey = () => loadConfig().MODELS.GEMINI.API_KEY; export const getSearxngApiEndpoint = () => process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG; +export const getTavilyApiKey = () => + process.env.TAVILY_API_KEY || loadConfig().API_ENDPOINTS.TAVILY; + +export const getSearchEngine = () => + process.env.SEARCH_ENGINE || loadConfig().SEARCH?.ENGINE || 'searxng'; + export const getOllamaApiEndpoint = () => loadConfig().MODELS.OLLAMA.API_URL; export const getDeepseekApiKey = () => loadConfig().MODELS.DEEPSEEK.API_KEY; diff --git a/src/lib/search/metaSearchAgent.ts b/src/lib/search/metaSearchAgent.ts index 67b7c58..645f59d 100644 --- a/src/lib/search/metaSearchAgent.ts +++ b/src/lib/search/metaSearchAgent.ts @@ -17,7 +17,9 @@ import LineListOutputParser from '../outputParsers/listLineOutputParser'; import LineOutputParser from '../outputParsers/lineOutputParser'; import { getDocumentsFromLinks } from '../utils/documents'; import { Document } from 'langchain/document'; +import { searchTavily } from '../tavily'; import { searchSearxng } from '../searxng'; +import { getSearchEngine } from '../config'; import path from 'node:path'; import fs from 'node:fs'; import computeSimilarity from '../utils/computeSimilarity'; @@ -205,25 +207,47 @@ class MetaSearchAgent implements MetaSearchAgentType { } else { question = question.replace(/.*?<\/think>/g, ''); - const res = await searchSearxng(question, { - language: 'en', - engines: this.config.activeEngines, - }); + const searchEngine = getSearchEngine(); + console.log(`Using search engine: ${searchEngine}`); - const documents = res.results.map( - (result) => - new Document({ - pageContent: - result.content || - (this.config.activeEngines.includes('youtube') - ? result.title - : '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */, - metadata: { - title: result.title, - url: result.url, - ...(result.img_src && { img_src: result.img_src }), - }, - }), + let res; + + if (searchEngine === 'tavily') { + res = await searchTavily(question, { + search_depth: 'basic', + max_results: 15, + include_images: true, + }); + } else { + // Default to SearxNG + res = await searchSearxng(question, { + language: 'en', + engines: this.config.activeEngines, + }); + } + + // If we have an AI-generated answer from Tavily, create a document for it + let documents: Document[] = []; + + + + // Add the regular search results + documents = documents.concat( + res.results.map( + (result) => + new Document({ + pageContent: + result.content || + (this.config.activeEngines.includes('youtube') + ? result.title + : ''), + metadata: { + title: result.title, + url: result.url, + ...(result.img_src ? { img_src: result.img_src } : {}), + }, + }), + ) ); return { query: question, docs: documents }; diff --git a/src/lib/tavily.ts b/src/lib/tavily.ts new file mode 100644 index 0000000..a609bcc --- /dev/null +++ b/src/lib/tavily.ts @@ -0,0 +1,79 @@ +import axios from 'axios'; +import { getTavilyApiKey } from './config'; + +interface TavilySearchOptions { + topic?: 'general' | 'news'; + search_depth?: 'basic' | 'advanced'; + chunks_per_source?: number; + max_results?: number; + time_range?: 'day' | 'week' | 'month' | 'year' | 'd' | 'w' | 'm' | 'y'; + days?: number; + include_answer?: boolean | 'basic' | 'advanced'; + include_raw_content?: boolean; + include_images?: boolean; + include_image_descriptions?: boolean; + include_domains?: string[]; + exclude_domains?: string[]; +} + +interface TavilySearchResult { + title: string; + url: string; + content: string; + score: number; + raw_content?: string; +} + +interface TavilySearchResponse { + query: string; + answer?: string; + images?: Array<{ + url: string; + description?: string; + }>; + results: TavilySearchResult[]; + response_time: string; +} + +export const searchTavily = async ( + query: string, + opts?: TavilySearchOptions, +) => { + const tavilyApiKey = getTavilyApiKey(); + + if (!tavilyApiKey) { + throw new Error('Tavily API key is not configured'); + } + + const url = 'https://api.tavily.com/search'; + + const response = await axios.post( + url, + { + query, + ...opts, + }, + { + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${tavilyApiKey}`, + }, + } + ); + + const results = response.data.results; + + // Convert Tavily results to match the format expected by the rest of the application + const formattedResults = results.map(result => ({ + title: result.title, + url: result.url, + content: result.content, + img_src: undefined, // Tavily doesn't provide image URLs in the standard response + })); + + return { + results: formattedResults, + suggestions: [], // Tavily doesn't provide suggestions, so return empty array + answer: response.data.answer, // Include the AI-generated answer if available + }; +}; \ No newline at end of file