Files
Perplexica/src/lib/agents/search/researcher/actions/webSearch.ts
2025-12-09 11:41:55 +05:30

181 lines
7.9 KiB
TypeScript

import z from 'zod';
import { ResearchAction } from '../../types';
import { searchSearxng } from '@/lib/searxng';
import { Chunk, SearchResultsResearchBlock } from '@/lib/types';
const actionSchema = z.object({
type: z.literal('web_search'),
queries: z
.array(z.string())
.describe('An array of search queries to perform web searches for.'),
});
const speedModePrompt = `
Use this tool to perform web searches based on the provided queries. This is useful when you need to gather information from the web to answer the user's questions. You can provide up to 3 queries at a time. You will have to use this every single time if this is present and relevant.
You are currently on speed mode, meaning you would only get to call this tool once. Make sure to prioritize the most important queries that are likely to get you the needed information in one go.
Your queries should be very targeted and specific to the information you need, avoid broad or generic queries.
Your queries shouldn't be sentences but rather keywords that are SEO friendly and can be used to search the web for information.
For example, if the user is asking about the features of a new technology, you might use queries like "GPT-5.1 features", "GPT-5.1 release date", "GPT-5.1 improvements" rather than a broad query like "Tell me about GPT-5.1".
You can search for 3 queries in one go, make sure to utilize all 3 queries to maximize the information you can gather. If a question is simple, then split your queries to cover different aspects or related topics to get a comprehensive understanding.
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed information.
`
const balancedModePrompt = `
Use this tool to perform web searches based on the provided queries. This is useful when you need to gather information from the web to answer the user's questions. You can provide up to 3 queries at a time. You will have to use this every single time if this is present and relevant.
You can call this tool several times if needed to gather enough information.
Start initially with broader queries to get an overview, then narrow down with more specific queries based on the results you receive.
Your queries shouldn't be sentences but rather keywords that are SEO friendly and can be used to search the web for information.
For example if the user is asking about Tesla, your actions should be like:
1. __plan "The user is asking about Tesla. I will start with broader queries to get an overview of Tesla, then narrow down with more specific queries based on the results I receive." then
2. web_search ["Tesla", "Tesla latest news", "Tesla stock price"] then
3. __plan "Based on the previous search results, I will now narrow down my queries to focus on Tesla's recent developments and stock performance." then
4. web_search ["Tesla Q2 2025 earnings", "Tesla new model 2025", "Tesla stock analysis"] then done.
5. __plan "I have gathered enough information to provide a comprehensive answer."
6. done.
You can search for 3 queries in one go, make sure to utilize all 3 queries to maximize the information you can gather. If a question is simple, then split your queries to cover different aspects or related topics to get a comprehensive understanding.
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed information. You can call this tools, multiple times as needed.
`
const qualityModePrompt = `
Use this tool to perform web searches based on the provided queries. This is useful when you need to gather information from the web to answer the user's questions. You can provide up to 3 queries at a time. You will have to use this every single time if this is present and relevant.
You have to call this tool several times to gather enough information unless the question is very simple (like greeting questions or basic facts).
Start initially with broader queries to get an overview, then narrow down with more specific queries based on the results you receive.
Never stop before at least 5-6 iterations of searches unless the user question is very simple.
Your queries shouldn't be sentences but rather keywords that are SEO friendly and can be used to search the web for information.
You can search for 3 queries in one go, make sure to utilize all 3 queries to maximize the information you can gather. If a question is simple, then split your queries to cover different aspects or related topics to get a comprehensive understanding.
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed information. You can call this tools, multiple times as needed.
`
const webSearchAction: ResearchAction<typeof actionSchema> = {
name: 'web_search',
schema: actionSchema,
getToolDescription: () => 'Use this tool to perform web searches based on the provided queries. This is useful when you need to gather information from the web to answer the user\'s questions. You can provide up to 3 queries at a time. You will have to use this every single time if this is present and relevant.',
getDescription: (config) => {
let prompt = ''
switch (config.mode) {
case 'speed':
prompt = speedModePrompt
break;
case 'balanced':
prompt = balancedModePrompt
break;
case 'quality':
prompt = qualityModePrompt
break;
default:
prompt = speedModePrompt
break;
}
return prompt
},
enabled: (config) =>
config.classification.classification.skipSearch === false,
execute: async (input, additionalConfig) => {
input.queries = input.queries.slice(0, 3);
const researchBlock = additionalConfig.session.getBlock(
additionalConfig.researchBlockId,
);
if (researchBlock && researchBlock.type === 'research') {
researchBlock.data.subSteps.push({
id: crypto.randomUUID(),
type: 'searching',
searching: input.queries,
});
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
{
op: 'replace',
path: '/data/subSteps',
value: researchBlock.data.subSteps,
},
]);
}
const searchResultsBlockId = crypto.randomUUID();
let searchResultsEmitted = false;
let results: Chunk[] = [];
const search = async (q: string) => {
const res = await searchSearxng(q);
const resultChunks: Chunk[] = res.results.map((r) => ({
content: r.content || r.title,
metadata: {
title: r.title,
url: r.url,
},
}));
results.push(...resultChunks);
if (
!searchResultsEmitted &&
researchBlock &&
researchBlock.type === 'research'
) {
searchResultsEmitted = true;
researchBlock.data.subSteps.push({
id: searchResultsBlockId,
type: 'search_results',
reading: resultChunks,
});
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
{
op: 'replace',
path: '/data/subSteps',
value: researchBlock.data.subSteps,
},
]);
} else if (
searchResultsEmitted &&
researchBlock &&
researchBlock.type === 'research'
) {
const subStepIndex = researchBlock.data.subSteps.findIndex(
(step) => step.id === searchResultsBlockId,
);
const subStep = researchBlock.data.subSteps[
subStepIndex
] as SearchResultsResearchBlock;
subStep.reading.push(...resultChunks);
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
{
op: 'replace',
path: '/data/subSteps',
value: researchBlock.data.subSteps,
},
]);
}
};
await Promise.all(input.queries.map(search));
return {
type: 'search_results',
results,
};
},
};
export default webSearchAction;