feat(classifier): switch to a fixed approach

This commit is contained in:
ItzCrazyKns
2025-12-01 18:33:54 +05:30
parent 610d06be36
commit 9b3833f933
12 changed files with 108 additions and 601 deletions

View File

@@ -0,0 +1,50 @@
import z from 'zod';
import { ClassifierInput } from './types';
import { classifierPrompt } from '@/lib/prompts/search/classifier';
import formatChatHistoryAsString from '@/lib/utils/formatHistory';
const schema = z.object({
classification: z.object({
skipSearch: z
.boolean()
.describe('Indicates whether to skip the search step.'),
personalSearch: z
.boolean()
.describe('Indicates whether to perform a personal search.'),
academicSearch: z
.boolean()
.describe('Indicates whether to perform an academic search.'),
discussionSearch: z
.boolean()
.describe('Indicates whether to perform a discussion search.'),
showWeatherWidget: z
.boolean()
.describe('Indicates whether to show the weather widget.'),
showStockWidget: z
.boolean()
.describe('Indicates whether to show the stock widget.'),
}),
standaloneFollowUp: z
.string()
.describe(
"A self-contained, context-independent reformulation of the user's question.",
),
});
export const classify = async (input: ClassifierInput) => {
const output = await input.llm.generateObject<typeof schema>({
messages: [
{
role: 'system',
content: classifierPrompt,
},
{
role: 'user',
content: `<conversation_history>\n${formatChatHistoryAsString(input.chatHistory)}\n</conversation_history>\n<user_query>\n${input.query}\n</user_query>`,
},
],
schema,
});
return output;
};

View File

@@ -1,73 +0,0 @@
import z from 'zod';
import { ClassifierInput, ClassifierOutput } from '../types';
import { WidgetRegistry } from '../widgets';
import { IntentRegistry } from './intents';
import { getClassifierPrompt } from '@/lib/prompts/search/classifier';
import formatChatHistoryAsString from '@/lib/utils/formatHistory';
class Classifier {
async classify(input: ClassifierInput): Promise<ClassifierOutput> {
const availableIntents = IntentRegistry.getAvailableIntents({
sources: input.enabledSources,
});
const availableWidgets = WidgetRegistry.getAll();
const classificationSchema = z.object({
skipSearch: z
.boolean()
.describe(
'Set to true to SKIP search. Skip ONLY when: (1) widgets alone fully answer the query (e.g., weather, stocks, calculator), (2) simple greetings or writing tasks (NOT questions). Set to false for ANY question or information request.',
),
standaloneFollowUp: z
.string()
.describe(
"A self-contained, context-independent reformulation of the user's question. Must include all necessary context from chat history, replace pronouns with specific nouns, and be clear enough to answer without seeing the conversation. Keep the same complexity as the original question.",
),
intents: z
.array(z.enum(availableIntents.map((i) => i.name)))
.describe(
"The intent(s) that best describe how to fulfill the user's query. Can include multiple intents (e.g., ['web_search', 'widget_response'] for 'weather in NYC and recent news'). Always include at least one intent when applicable.",
),
widgets: z
.array(z.union(availableWidgets.map((w) => w.schema)))
.describe(
'Widgets that can display structured data to answer (fully or partially) the query. Include all applicable widgets regardless of skipSearch value.',
),
});
const classifierPrompt = getClassifierPrompt({
intentDesc: IntentRegistry.getDescriptions({
sources: input.enabledSources,
}),
widgetDesc: WidgetRegistry.getDescriptions(),
});
const res = await input.llm.generateObject<
z.infer<typeof classificationSchema>
>({
messages: [
{
role: 'system',
content: classifierPrompt,
},
{
role: 'user',
content: `<conversation>${formatChatHistoryAsString(input.chatHistory)}</conversation>\n\n<query>${input.query}</query>`,
},
],
schema: classificationSchema,
});
res.widgets = res.widgets.map((widgetConfig) => {
return {
type: widgetConfig.type,
params: widgetConfig,
};
});
return res as ClassifierOutput;
}
}
export default Classifier;

View File

@@ -1,52 +0,0 @@
import { Intent } from '../../types';
const description = `Use this intent to search for scholarly articles, research papers, scientific studies, and academic resources when the user explicitly requests credible, peer-reviewed, or authoritative information from academic sources.
#### When to use:
1. User explicitly mentions academic keywords: research papers, scientific studies, scholarly articles, peer-reviewed, journal articles.
2. User asks for scientific evidence or academic research on a topic.
3. User needs authoritative, citation-worthy sources for research or academic purposes.
#### When NOT to use:
1. General questions that don't specifically request academic sources - use 'web_search' instead.
2. User just wants general information without specifying academic sources.
3. Casual queries about facts or current events.
#### Example use cases:
1. "Find scientific papers on climate change effects"
- User explicitly wants scientific papers.
- Intent: ['academic_search'] with skipSearch: false
2. "What does the research say about meditation benefits?"
- User is asking for research-based information.
- Intent: ['academic_search', 'web_search'] with skipSearch: false
3. "Show me peer-reviewed articles on CRISPR technology"
- User specifically wants peer-reviewed academic content.
- Intent: ['academic_search'] with skipSearch: false
4. "I need scholarly sources about renewable energy for my thesis"
- User explicitly needs scholarly/academic sources.
- Intent: ['academic_search'] with skipSearch: false
5. "Explain quantum computing" (WRONG to use academic_search alone)
- This is a general question, not specifically requesting academic papers.
- Correct intent: ['web_search'] with skipSearch: false
- Could combine: ['web_search', 'academic_search'] if you want both general and academic sources
6. "What's the latest study on sleep patterns?"
- User mentions "study" - combine academic and web search for comprehensive results.
- Intent: ['academic_search', 'web_search'] with skipSearch: false
**IMPORTANT**: This intent can be combined with 'web_search' to provide both academic papers and general web information. Always set skipSearch to false when using this intent.
**NOTE**: This intent is only available if academic search sources are enabled in the configuration.`;
const academicSearchIntent: Intent = {
name: 'academic_search',
description,
requiresSearch: true,
enabled: (config) => config.sources.includes('academic'),
};
export default academicSearchIntent;

View File

@@ -1,55 +0,0 @@
import { Intent } from '../../types';
const description = `Use this intent to search through discussion forums, community boards, and social platforms (Reddit, forums, etc.) when the user explicitly wants opinions, personal experiences, community discussions, or crowd-sourced information.
#### When to use:
1. User explicitly mentions: Reddit, forums, discussion boards, community opinions, "what do people think", "user experiences".
2. User is asking for opinions, reviews, or personal experiences about a product, service, or topic.
3. User wants to know what communities or people are saying about something.
#### When NOT to use:
1. General questions that don't specifically ask for opinions or discussions - use 'web_search' instead.
2. User wants factual information or official sources.
3. Casual queries about facts, news, or current events without requesting community input.
#### Example use cases:
1. "What do people on Reddit think about the new iPhone?"
- User explicitly wants Reddit/community opinions.
- Intent: ['discussions_search'] with skipSearch: false
2. "User experiences with Tesla Model 3"
- User is asking for personal experiences from users.
- Intent: ['discussions_search'] with skipSearch: false
3. "Best gaming laptop according to forums"
- User wants forum/community recommendations.
- Intent: ['discussions_search'] with skipSearch: false
4. "What are people saying about the new AI regulations?"
- User wants community discussions/opinions.
- Intent: ['discussions_search', 'web_search'] with skipSearch: false
5. "Reviews and user opinions on the Framework laptop"
- Combines user opinions with general reviews.
- Intent: ['discussions_search', 'web_search'] with skipSearch: false
6. "What's the price of iPhone 15?" (WRONG to use discussions_search)
- This is a factual question, not asking for opinions.
- Correct intent: ['web_search'] with skipSearch: false
7. "Explain how OAuth works" (WRONG to use discussions_search)
- This is asking for information, not community opinions.
- Correct intent: ['web_search'] with skipSearch: false
**IMPORTANT**: This intent can be combined with 'web_search' to provide both community discussions and official/factual information. Always set skipSearch to false when using this intent.
**NOTE**: This intent is only available if discussion search sources are enabled in the configuration.`;
const discussionSearchIntent: Intent = {
name: 'discussions_search',
description,
requiresSearch: true,
enabled: (config) => config.sources.includes('discussions'),
};
export default discussionSearchIntent;

View File

@@ -1,16 +0,0 @@
import academicSearchIntent from './academicSearch';
import discussionSearchIntent from './discussionSearch';
import privateSearchIntent from './privateSearch';
import IntentRegistry from './registry';
import webSearchIntent from './webSearch';
import widgetResponseIntent from './widgetResponse';
import writingTaskIntent from './writingTask';
IntentRegistry.register(webSearchIntent);
IntentRegistry.register(academicSearchIntent);
IntentRegistry.register(discussionSearchIntent);
IntentRegistry.register(widgetResponseIntent);
IntentRegistry.register(writingTaskIntent);
IntentRegistry.register(privateSearchIntent);
export { IntentRegistry };

View File

@@ -1,47 +0,0 @@
import { Intent } from '../../types';
const description = `Use this intent to search through the user's uploaded documents or provided web page links when they ask questions about their personal files or specific URLs.
#### When to use:
1. User explicitly asks about uploaded documents ("tell me about the document I uploaded", "summarize this file").
2. User provides specific URLs/links and asks questions about them ("tell me about example.com", "what's on this page: url.com").
3. User references "my documents", "the file I shared", "this link" when files or URLs are available.
#### When NOT to use:
1. User asks generic questions like "summarize" without providing context or files (later the system will ask what they want summarized).
2. No files have been uploaded and no URLs provided - use web_search or other intents instead.
3. User is asking general questions unrelated to their uploaded content.
#### Example use cases:
1. "Tell me about the PDF I uploaded"
- Files are uploaded, user wants information from them.
- Intent: ['private_search'] with skipSearch: false
2. "What's the main point from example.com?"
- User provided a specific URL to analyze.
- Intent: ['private_search'] with skipSearch: false
3. "Summarize the research paper I shared"
- User references a shared document.
- Intent: ['private_search'] with skipSearch: false
4. "Summarize" (WRONG to use private_search if no files/URLs)
- No context provided, no files uploaded.
- Correct: Skip this intent, let the answer agent ask what to summarize
5. "What does my document say about climate change and also search the web for recent updates?"
- Combine private document search with web search.
- Intent: ['private_search', 'web_search'] with skipSearch: false
**IMPORTANT**: Only use this intent if files are actually uploaded or URLs are explicitly provided in the query. Check the context for uploaded files before selecting this intent. Always set skipSearch to false when using this intent.
**NOTE**: This intent can be combined with other search intents when the user wants both personal document information and external sources.`;
const privateSearchIntent: Intent = {
name: 'private_search',
description,
enabled: (config) => true,
requiresSearch: true,
};
export default privateSearchIntent;

View File

@@ -1,31 +0,0 @@
import { Intent, SearchAgentConfig, SearchSources } from '../../types';
class IntentRegistry {
private static intents = new Map<string, Intent>();
static register(intent: Intent) {
this.intents.set(intent.name, intent);
}
static get(name: string): Intent | undefined {
return this.intents.get(name);
}
static getAvailableIntents(config: { sources: SearchSources[] }): Intent[] {
return Array.from(
this.intents.values().filter((intent) => intent.enabled(config)),
);
}
static getDescriptions(config: { sources: SearchSources[] }): string {
const availableintents = this.getAvailableIntents(config);
return availableintents
.map(
(intent) => `-------\n\n###${intent.name}: ${intent.description}\n\n`,
)
.join('\n\n');
}
}
export default IntentRegistry;

View File

@@ -1,31 +0,0 @@
import { Intent } from '../../types';
const description = `
Use this intent to find current information from the web when the user is asking a question or needs up-to-date information that cannot be provided by widgets or other intents.
#### When to use:
1. Simple user questions about current events, news, weather, or general knowledge that require the latest information and there is no specific better intent to use.
2. When the user explicitly requests information from the web or indicates they want the most recent data (and still there's no other better intent).
3. When no widgets can fully satisfy the user's request for information nor any other specialized search intent applies.
#### Examples use cases:
1. "What is the weather in San Francisco today? ALso tell me some popular events happening there this weekend."
- In this case, the weather widget can provide the current weather, but for popular events, a web search is needed. So the intent should include a 'web_search' & a 'widget_response'.
2. "Who won the Oscar for Best Picture in 2024?"
- This is a straightforward question that requires current information from the web.
3. "Give me the latest news on AI regulations."
- The user is asking for up-to-date news, which necessitates a web search.
**IMPORTANT**: If this intent is given then skip search should be false.
`;
const webSearchIntent: Intent = {
name: 'web_search',
description: description,
requiresSearch: true,
enabled: (config) => config.sources.includes('web'),
};
export default webSearchIntent;

View File

@@ -1,47 +0,0 @@
import { Intent } from '../../types';
const description = `Use this intent when the user's query can be fully or partially answered using specialized widgets that provide structured, real-time data (weather, stocks, calculations, and more).
#### When to use:
1. The user is asking for specific information that a widget can provide (current weather, stock prices, mathematical calculations, unit conversions, etc.).
2. A widget can completely answer the query without needing additional web search (use this intent alone and set skipSearch to true).
3. A widget can provide part of the answer, but additional information from web search or other sources is needed (combine with other intents like 'web_search' and set skipSearch to false).
#### Example use cases:
Note: These are just examples - there are several other widgets available for use depending on the user's query.
1. "What is the weather in New York?"
- The weather widget can fully answer this query.
- Intent: ['widget_response'] with skipSearch: true
- Widget: [{ type: 'weather', location: 'New York', lat: 0, lon: 0 }]
2. "What's the weather in San Francisco today? Also tell me some popular events happening there this weekend."
- Weather widget provides current conditions, but events require web search.
- Intent: ['web_search', 'widget_response'] with skipSearch: false
- Widget: [{ type: 'weather', location: 'San Francisco', lat: 0, lon: 0 }]
3. "Calculate 25% of 480"
- The calculator widget can fully answer this.
- Intent: ['widget_response'] with skipSearch: true
- Widget: [{ type: 'calculator', expression: '25% of 480' }]
4. "AAPL stock price and recent Apple news"
- Stock widget provides price, but news requires web search.
- Intent: ['web_search', 'widget_response'] with skipSearch: false
- Widget: [{ type: 'stock', symbol: 'AAPL' }]
5. "What's Tesla's stock doing and how does it compare to competitors?"
- Stock widget provides Tesla's price, but comparison analysis requires web search.
- Intent: ['web_search', 'widget_response'] with skipSearch: false
- Widget: [{ type: 'stock', symbol: 'TSLA' }]
**IMPORTANT**: Set skipSearch to true ONLY if the widget(s) can completely answer the user's query without any additional information. If the user asks for anything beyond what the widget provides (context, explanations, comparisons, related information), combine this intent with 'web_search' and set skipSearch to false.`;
const widgetResponseIntent: Intent = {
name: 'widget_response',
description,
requiresSearch: false,
enabled: (config) => true,
};
export default widgetResponseIntent;

View File

@@ -1,53 +0,0 @@
import { Intent } from '../../types';
const description = `Use this intent for simple writing or greeting tasks that do not require any external information or facts. This is ONLY for greetings and straightforward creative writing that needs no factual verification.
#### When to use:
1. User greetings or simple social interactions (hello, hi, thanks, goodbye).
2. Creative writing tasks that require NO factual information (poems, birthday messages, thank you notes).
3. Simple drafting tasks where the user provides all necessary information.
#### When NOT to use:
1. ANY question that starts with "what", "how", "why", "when", "where", "who" - these need web_search.
2. Requests for explanations, definitions, or information about anything.
3. Code-related questions or technical help - these need web_search.
4. Writing tasks that require facts, data, or current information.
5. When you're uncertain about any information needed - default to web_search.
#### Example use cases:
1. "Hello" or "Hi there"
- Simple greeting, no information needed.
- Intent: ['writing_task'] with skipSearch: true
2. "Write me a birthday message for my friend"
- Creative writing, no facts needed.
- Intent: ['writing_task'] with skipSearch: true
3. "Draft a thank you email for a job interview"
- Simple writing task, no external information required.
- Intent: ['writing_task'] with skipSearch: true
4. "What is React?" (WRONG to use writing_task)
- This is a QUESTION asking for information.
- Correct intent: ['web_search'] with skipSearch: false
5. "How do I fix this error in Python?" (WRONG to use writing_task)
- This is asking for technical help.
- Correct intent: ['web_search'] with skipSearch: false
6. "Write an email about the latest AI developments" (WRONG to use writing_task alone)
- This requires current information about AI developments.
- Correct intent: ['web_search'] with skipSearch: false
**CRITICAL RULE**: When in doubt, DO NOT use this intent. Default to web_search. This intent should be rare - only use it for greetings and purely creative writing tasks that need absolutely no facts or information.
**IMPORTANT**: If this intent is used alone, skipSearch should be true. Never combine this with other search intents unless you're absolutely certain both are needed.`;
const writingTaskIntent: Intent = {
name: 'writing_task',
description,
requiresSearch: false,
enabled: (config) => true,
};
export default writingTaskIntent;

View File

@@ -55,10 +55,15 @@ export type ClassifierInput = {
};
export type ClassifierOutput = {
skipSearch: boolean;
classification: {
skipSearch: boolean;
personalSearch: boolean;
academicSearch: boolean;
discussionSearch: boolean;
showWeatherWidget: boolean;
showStockWidget: boolean;
};
standaloneFollowUp: string;
intents: string[];
widgets: WidgetConfig[];
};
export type AdditionalConfig = {