feat(tavily): integrate Tavily search engine with configuration and UI support

This commit is contained in:
OTYAK
2025-04-07 16:41:54 +01:00
parent a85f762c58
commit 2c56aa3cb3
6 changed files with 311 additions and 18 deletions

110
docs/TAVILY_SETUP.md Normal file
View File

@ -0,0 +1,110 @@
# Using Tavily Instead of SearxNG
This guide explains how to configure and use Tavily as a replacement for SearxNG in the Perplexica project.
## What is Tavily?
[Tavily](https://tavily.com/) is an AI-native search engine that provides a powerful API for web searches. It's designed to be more focused on AI applications and provides structured data in its responses.
## Advantages of Tavily over SearxNG
- **AI-Optimized**: Tavily is specifically designed for AI applications and provides more relevant results for AI-powered search.
- **Structured Data**: Results come in a well-structured format that's easy to process.
- **AI-Generated Answers**: Tavily can generate concise answers to queries based on search results.
- **Advanced Search Options**: Includes features like search depth control, domain filtering, time-based filtering, and result limiting.
## Setup Instructions
1. **Get a Tavily API Key**:
- Sign up for an account at [Tavily](https://tavily.com/)
- Navigate to your dashboard and create an API key
2. **Configure the API Key**:
- Open your `config.toml` file
- Add your Tavily API key to the `TAVILY` field under `[API_ENDPOINTS]`:
```toml
[API_ENDPOINTS]
TAVILY = "your-tavily-api-key"
```
- Alternatively, you can set the `TAVILY_API_KEY` environment variable
3. **Select Tavily as the Search Engine**:
- Open your `config.toml` file
- Set the `ENGINE` field under `[SEARCH]` to `"tavily"`:
```toml
[SEARCH]
ENGINE = "tavily"
```
- Alternatively, you can set the `SEARCH_ENGINE` environment variable to `"tavily"`
4. **Verify Configuration**:
- The application will now use Tavily for web searches
- You can verify this by checking the console logs, which will show "Using search engine: tavily" when a search is performed
## Usage
The Tavily integration is designed to be a drop-in replacement for SearxNG. All existing search functionality will continue to work as before, but with improved results from Tavily.
## API Options
The Tavily implementation supports the following options:
- `topic`: 'general' or 'news' (default: 'general')
- `search_depth`: 'basic' or 'advanced' (default: 'advanced')
- `chunks_per_source`: Number of content chunks to retrieve from each source (1-3, default: 3)
- `max_results`: Maximum number of results to return (default: 10)
- `time_range`: Filter results by time range ('day', 'week', 'month', 'year', 'd', 'w', 'm', 'y')
- `days`: Number of days back from the current date to include (for news topic)
- `include_answer`: Include an AI-generated answer ('basic', 'advanced', or true)
- `include_raw_content`: Include the cleaned and parsed HTML content of each result
- `include_images`: Also perform an image search and include the results
- `include_image_descriptions`: Add descriptive text for each image
- `include_domains`: Array of domains to include in search results
- `exclude_domains`: Array of domains to exclude from search results
## Response Format
Tavily returns a structured response with the following fields:
- `query`: The search query that was executed
- `answer`: An AI-generated answer to the query (if requested)
- `images`: List of query-related images (if requested)
- `results`: A list of search results, each containing:
- `title`: The title of the search result
- `url`: The URL of the search result
- `content`: A short description of the search result
- `score`: The relevance score of the search result
- `raw_content`: The cleaned and parsed HTML content (if requested)
- `response_time`: Time in seconds it took to complete the request
## Troubleshooting
If you encounter any issues:
1. Verify your Tavily API key is correctly set in `config.toml` or as an environment variable
2. Check the application logs for any error messages
3. Ensure your Tavily account has sufficient credits/quota for API calls
## Switching Between Search Engines
You can easily switch between SearxNG and Tavily by changing the `ENGINE` setting in your `config.toml` file:
```toml
[SEARCH]
ENGINE = "searxng" # Use SearxNG (default)
# or
ENGINE = "tavily" # Use Tavily
```
You can also use the `SEARCH_ENGINE` environment variable to override this setting.
## Reverting to SearxNG
If you need to revert to using SearxNG:
1. Set the `ENGINE` field under `[SEARCH]` in your `config.toml` file to `"searxng"`:
```toml
[SEARCH]
ENGINE = "searxng"
```
2. Ensure your `SEARXNG` entry is properly configured with a valid SearxNG instance URL

View File

@ -8,6 +8,8 @@ import {
getOllamaApiEndpoint,
getOpenaiApiKey,
getDeepseekApiKey,
getSearchEngine,
getTavilyApiKey,
updateConfig,
} from '@/lib/config';
import {
@ -58,6 +60,8 @@ export const GET = async (req: Request) => {
config['customOpenaiApiUrl'] = getCustomOpenaiApiUrl();
config['customOpenaiApiKey'] = getCustomOpenaiApiKey();
config['customOpenaiModelName'] = getCustomOpenaiModelName();
config['searchEngine'] = getSearchEngine();
config['tavilyApiKey'] = getTavilyApiKey();
return Response.json({ ...config }, { status: 200 });
} catch (err) {
@ -99,6 +103,12 @@ export const POST = async (req: Request) => {
MODEL_NAME: config.customOpenaiModelName,
},
},
SEARCH: {
ENGINE: config.searchEngine,
},
API_ENDPOINTS: {
TAVILY: config.tavilyApiKey || '',
},
};
updateConfig(updatedConfig);

View File

@ -24,6 +24,8 @@ interface SettingsType {
customOpenaiApiKey: string;
customOpenaiApiUrl: string;
customOpenaiModelName: string;
searchEngine: string;
tavilyApiKey?: string;
}
interface InputProps extends React.InputHTMLAttributes<HTMLInputElement> {
@ -145,6 +147,7 @@ const Page = () => {
const [automaticImageSearch, setAutomaticImageSearch] = useState(false);
const [automaticVideoSearch, setAutomaticVideoSearch] = useState(false);
const [systemInstructions, setSystemInstructions] = useState<string>('');
const [searchEngine, setSearchEngine] = useState<string>('searxng');
const [savingStates, setSavingStates] = useState<Record<string, boolean>>({});
useEffect(() => {
@ -207,6 +210,7 @@ const Page = () => {
);
setSystemInstructions(localStorage.getItem('systemInstructions')!);
setSearchEngine(localStorage.getItem('searchEngine') || 'searxng');
setIsLoading(false);
};
@ -366,6 +370,10 @@ const Page = () => {
localStorage.setItem('embeddingModel', value);
} else if (key === 'systemInstructions') {
localStorage.setItem('systemInstructions', value);
} else if (key === 'searchEngine') {
localStorage.setItem('searchEngine', value);
} else if (key === 'tavilyApiKey') {
localStorage.setItem('tavilyApiKey', value);
}
} catch (err) {
console.error('Failed to save:', err);
@ -508,6 +516,32 @@ const Page = () => {
/>
</Switch>
</div>
<div className="flex flex-col space-y-1 mt-2">
<p className="text-black/70 dark:text-white/70 text-sm">
Search Engine
</p>
<Select
value={searchEngine}
onChange={(e) => {
const value = e.target.value;
setSearchEngine(value);
saveConfig('searchEngine', value);
}}
options={[
{ value: 'searxng', label: 'SearxNG' },
...(config.tavilyApiKey ? [{ value: 'tavily', label: 'Tavily' }] : []),
]}
/>
<p className="text-xs text-black/60 dark:text-white/60 mt-1">
Select which search engine to use for web searches
</p>
{searchEngine === 'tavily' && !config.tavilyApiKey && (
<p className="text-xs text-red-500 mt-1">
Tavily API key is required to use this search engine
</p>
)}
</div>
</div>
</SettingsSection>
@ -858,6 +892,32 @@ const Page = () => {
onSave={(value) => saveConfig('deepseekApiKey', value)}
/>
</div>
<div className="flex flex-col space-y-1 mt-4 pt-4 border-t border-light-200 dark:border-dark-200">
<p className="text-black/90 dark:text-white/90 font-medium">Search Engine API Keys</p>
<p className="text-sm text-black/60 dark:text-white/60 mt-0.5">
API keys for search engines used in the application
</p>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
Tavily API Key
</p>
<Input
type="text"
placeholder="Tavily API Key"
value={config.tavilyApiKey || ''}
isSaving={savingStates['tavilyApiKey']}
onChange={(e) => {
setConfig((prev) => ({
...prev!,
tavilyApiKey: e.target.value,
}));
}}
onSave={(value) => saveConfig('tavilyApiKey', value)}
/>
</div>
</div>
</SettingsSection>
</div>

View File

@ -36,6 +36,10 @@ interface Config {
};
API_ENDPOINTS: {
SEARXNG: string;
TAVILY: string;
};
SEARCH: {
ENGINE: string;
};
}
@ -64,6 +68,12 @@ export const getGeminiApiKey = () => loadConfig().MODELS.GEMINI.API_KEY;
export const getSearxngApiEndpoint = () =>
process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG;
export const getTavilyApiKey = () =>
process.env.TAVILY_API_KEY || loadConfig().API_ENDPOINTS.TAVILY;
export const getSearchEngine = () =>
process.env.SEARCH_ENGINE || loadConfig().SEARCH?.ENGINE || 'searxng';
export const getOllamaApiEndpoint = () => loadConfig().MODELS.OLLAMA.API_URL;
export const getDeepseekApiKey = () => loadConfig().MODELS.DEEPSEEK.API_KEY;

View File

@ -17,7 +17,9 @@ import LineListOutputParser from '../outputParsers/listLineOutputParser';
import LineOutputParser from '../outputParsers/lineOutputParser';
import { getDocumentsFromLinks } from '../utils/documents';
import { Document } from 'langchain/document';
import { searchTavily } from '../tavily';
import { searchSearxng } from '../searxng';
import { getSearchEngine } from '../config';
import path from 'node:path';
import fs from 'node:fs';
import computeSimilarity from '../utils/computeSimilarity';
@ -205,25 +207,47 @@ class MetaSearchAgent implements MetaSearchAgentType {
} else {
question = question.replace(/<think>.*?<\/think>/g, '');
const res = await searchSearxng(question, {
language: 'en',
engines: this.config.activeEngines,
});
const searchEngine = getSearchEngine();
console.log(`Using search engine: ${searchEngine}`);
const documents = res.results.map(
(result) =>
new Document({
pageContent:
result.content ||
(this.config.activeEngines.includes('youtube')
? result.title
: '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */,
metadata: {
title: result.title,
url: result.url,
...(result.img_src && { img_src: result.img_src }),
},
}),
let res;
if (searchEngine === 'tavily') {
res = await searchTavily(question, {
search_depth: 'basic',
max_results: 15,
include_images: true,
});
} else {
// Default to SearxNG
res = await searchSearxng(question, {
language: 'en',
engines: this.config.activeEngines,
});
}
// If we have an AI-generated answer from Tavily, create a document for it
let documents: Document[] = [];
// Add the regular search results
documents = documents.concat(
res.results.map(
(result) =>
new Document({
pageContent:
result.content ||
(this.config.activeEngines.includes('youtube')
? result.title
: ''),
metadata: {
title: result.title,
url: result.url,
...(result.img_src ? { img_src: result.img_src } : {}),
},
}),
)
);
return { query: question, docs: documents };

79
src/lib/tavily.ts Normal file
View File

@ -0,0 +1,79 @@
import axios from 'axios';
import { getTavilyApiKey } from './config';
interface TavilySearchOptions {
topic?: 'general' | 'news';
search_depth?: 'basic' | 'advanced';
chunks_per_source?: number;
max_results?: number;
time_range?: 'day' | 'week' | 'month' | 'year' | 'd' | 'w' | 'm' | 'y';
days?: number;
include_answer?: boolean | 'basic' | 'advanced';
include_raw_content?: boolean;
include_images?: boolean;
include_image_descriptions?: boolean;
include_domains?: string[];
exclude_domains?: string[];
}
interface TavilySearchResult {
title: string;
url: string;
content: string;
score: number;
raw_content?: string;
}
interface TavilySearchResponse {
query: string;
answer?: string;
images?: Array<{
url: string;
description?: string;
}>;
results: TavilySearchResult[];
response_time: string;
}
export const searchTavily = async (
query: string,
opts?: TavilySearchOptions,
) => {
const tavilyApiKey = getTavilyApiKey();
if (!tavilyApiKey) {
throw new Error('Tavily API key is not configured');
}
const url = 'https://api.tavily.com/search';
const response = await axios.post<TavilySearchResponse>(
url,
{
query,
...opts,
},
{
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${tavilyApiKey}`,
},
}
);
const results = response.data.results;
// Convert Tavily results to match the format expected by the rest of the application
const formattedResults = results.map(result => ({
title: result.title,
url: result.url,
content: result.content,
img_src: undefined, // Tavily doesn't provide image URLs in the standard response
}));
return {
results: formattedResults,
suggestions: [], // Tavily doesn't provide suggestions, so return empty array
answer: response.data.answer, // Include the AI-generated answer if available
};
};