mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-07-20 23:48:31 +00:00
feat(metaSearchAgent): implement structured outputs
This commit is contained in:
@ -1,63 +1,41 @@
|
|||||||
export const webSearchRetrieverPrompt = `
|
export const webSearchRetrieverPrompt = `
|
||||||
You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it.
|
You are an AI question rephraser. You will be given a conversation and a follow-up question; rephrase it into a standalone question that another LLM can use to search the web.
|
||||||
If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
|
|
||||||
If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
|
|
||||||
You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
|
|
||||||
|
|
||||||
There are several examples attached for your reference inside the below \`examples\` XML block
|
Return ONLY a JSON object that matches this schema:
|
||||||
|
query: string // the standalone question (or "summarize")
|
||||||
|
links: string[] // URLs extracted from the user query (empty if none)
|
||||||
|
searchRequired: boolean // true if web search is needed, false for greetings/simple writing tasks
|
||||||
|
searchMode: "" | "normal" | "news" // "" when searchRequired is false; "news" if the user asks for news/articles, otherwise "normal"
|
||||||
|
|
||||||
<examples>
|
Rules
|
||||||
1. Follow up question: What is the capital of France
|
- Greetings / simple writing tasks → query:"", links:[], searchRequired:false, searchMode:""
|
||||||
Rephrased question:\`
|
- Summarizing a URL → query:"summarize", links:[url...], searchRequired:true, searchMode:"normal"
|
||||||
<question>
|
- Asking for news/articles → searchMode:"news"
|
||||||
Capital of france
|
|
||||||
</question>
|
Examples
|
||||||
\`
|
1. Follow-up: What is the capital of France?
|
||||||
|
"query":"capital of France","links":[],"searchRequired":true,"searchMode":"normal"
|
||||||
|
|
||||||
2. Hi, how are you?
|
2. Hi, how are you?
|
||||||
Rephrased question\`
|
"query":"","links":[],"searchRequired":false,"searchMode":""
|
||||||
<question>
|
|
||||||
not_needed
|
|
||||||
</question>
|
|
||||||
\`
|
|
||||||
|
|
||||||
3. Follow up question: What is Docker?
|
3. Follow-up: What is Docker?
|
||||||
Rephrased question: \`
|
"query":"what is Docker","links":[],"searchRequired":true,"searchMode":"normal"
|
||||||
<question>
|
|
||||||
What is Docker
|
|
||||||
</question>
|
|
||||||
\`
|
|
||||||
|
|
||||||
4. Follow up question: Can you tell me what is X from https://example.com
|
4. Follow-up: Can you tell me what is X from https://example.com?
|
||||||
Rephrased question: \`
|
"query":"what is X","links":["https://example.com"],"searchRequired":true,"searchMode":"normal"
|
||||||
<question>
|
|
||||||
Can you tell me what is X?
|
|
||||||
</question>
|
|
||||||
|
|
||||||
<links>
|
5. Follow-up: Summarize the content from https://example.com
|
||||||
https://example.com
|
"query":"summarize","links":["https://example.com"],"searchRequired":true,"searchMode":"normal"
|
||||||
</links>
|
|
||||||
\`
|
|
||||||
|
|
||||||
5. Follow up question: Summarize the content from https://example.com
|
6. Follow-up: Latest news about AI
|
||||||
Rephrased question: \`
|
"query":"latest news about AI","links":[],"searchRequired":true,"searchMode":"news"
|
||||||
<question>
|
|
||||||
summarize
|
|
||||||
</question>
|
|
||||||
|
|
||||||
<links>
|
|
||||||
https://example.com
|
|
||||||
</links>
|
|
||||||
\`
|
|
||||||
</examples>
|
|
||||||
|
|
||||||
Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above.
|
|
||||||
|
|
||||||
<conversation>
|
<conversation>
|
||||||
{chat_history}
|
{chat_history}
|
||||||
</conversation>
|
</conversation>
|
||||||
|
|
||||||
Follow up question: {query}
|
Follow-up question: {query}
|
||||||
Rephrased question:
|
Rephrased question:
|
||||||
`;
|
`;
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ import computeSimilarity from '../utils/computeSimilarity';
|
|||||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||||
import eventEmitter from 'events';
|
import eventEmitter from 'events';
|
||||||
import { StreamEvent } from '@langchain/core/tracers/log_stream';
|
import { StreamEvent } from '@langchain/core/tracers/log_stream';
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
export interface MetaSearchAgentType {
|
export interface MetaSearchAgentType {
|
||||||
searchAndAnswer: (
|
searchAndAnswer: (
|
||||||
@ -52,6 +53,17 @@ type BasicChainInput = {
|
|||||||
query: string;
|
query: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const retrieverLLMOutputSchema = z.object({
|
||||||
|
query: z.string().describe('The query to search the web for.'),
|
||||||
|
links: z
|
||||||
|
.array(z.string())
|
||||||
|
.describe('The links to search/summarize if present'),
|
||||||
|
searchRequired: z
|
||||||
|
.boolean()
|
||||||
|
.describe('Wether there is a need to search the web'),
|
||||||
|
searchMode: z.enum(['', 'normal', 'news']).describe('The search mode.'),
|
||||||
|
});
|
||||||
|
|
||||||
class MetaSearchAgent implements MetaSearchAgentType {
|
class MetaSearchAgent implements MetaSearchAgentType {
|
||||||
private config: Config;
|
private config: Config;
|
||||||
private strParser = new StringOutputParser();
|
private strParser = new StringOutputParser();
|
||||||
@ -62,26 +74,24 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
|||||||
|
|
||||||
private async createSearchRetrieverChain(llm: BaseChatModel) {
|
private async createSearchRetrieverChain(llm: BaseChatModel) {
|
||||||
(llm as unknown as ChatOpenAI).temperature = 0;
|
(llm as unknown as ChatOpenAI).temperature = 0;
|
||||||
|
|
||||||
return RunnableSequence.from([
|
return RunnableSequence.from([
|
||||||
PromptTemplate.fromTemplate(this.config.queryGeneratorPrompt),
|
PromptTemplate.fromTemplate(this.config.queryGeneratorPrompt),
|
||||||
|
Object.assign(
|
||||||
|
Object.create(Object.getPrototypeOf(llm)),
|
||||||
llm,
|
llm,
|
||||||
this.strParser,
|
).withStructuredOutput(retrieverLLMOutputSchema, {
|
||||||
RunnableLambda.from(async (input: string) => {
|
...(llm.metadata?.['model-type'] === 'groq'
|
||||||
const linksOutputParser = new LineListOutputParser({
|
? {
|
||||||
key: 'links',
|
method: 'json-object',
|
||||||
});
|
}
|
||||||
|
: {}),
|
||||||
|
}),
|
||||||
|
RunnableLambda.from(
|
||||||
|
async (input: z.infer<typeof retrieverLLMOutputSchema>) => {
|
||||||
|
let question = input.query;
|
||||||
|
const links = input.links;
|
||||||
|
|
||||||
const questionOutputParser = new LineOutputParser({
|
if (!input.searchRequired) {
|
||||||
key: 'question',
|
|
||||||
});
|
|
||||||
|
|
||||||
const links = await linksOutputParser.parse(input);
|
|
||||||
let question = this.config.summarizer
|
|
||||||
? await questionOutputParser.parse(input)
|
|
||||||
: input;
|
|
||||||
|
|
||||||
if (question === 'not_needed') {
|
|
||||||
return { query: '', docs: [] };
|
return { query: '', docs: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,7 +217,10 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
|||||||
|
|
||||||
const res = await searchSearxng(question, {
|
const res = await searchSearxng(question, {
|
||||||
language: 'en',
|
language: 'en',
|
||||||
engines: this.config.activeEngines,
|
engines:
|
||||||
|
input.searchMode === 'normal'
|
||||||
|
? this.config.activeEngines
|
||||||
|
: ['bing news'],
|
||||||
});
|
});
|
||||||
|
|
||||||
const documents = res.results.map(
|
const documents = res.results.map(
|
||||||
@ -228,7 +241,8 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
|||||||
|
|
||||||
return { query: question, docs: documents };
|
return { query: question, docs: documents };
|
||||||
}
|
}
|
||||||
}),
|
},
|
||||||
|
),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user