feat(search): Implement OpenSearch support

feat(search): Add searchUrl to message
feat(parsers): Enhance parsers to deal with some thinking models better.
This commit is contained in:
Willie Zutz
2025-05-08 00:21:31 -06:00
parent f65b168388
commit ddfe8c607d
12 changed files with 115 additions and 17 deletions

9
public/opensearch.xml Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
<ShortName>Perplexica</ShortName>
<Description>Search with Perplexica AI</Description>
<InputEncoding>UTF-8</InputEncoding>
<Image width="16" height="16" type="image/x-icon">/favicon.ico</Image>
<Url type="text/html" template="/?q={searchTerms}"/>
<Url type="application/opensearchdescription+xml" rel="self" template="/opensearch.xml"/>
</OpenSearchDescription>

View File

@ -7,7 +7,7 @@ import db from '@/lib/db';
import { chats, messages as messagesSchema } from '@/lib/db/schema';
import {
getAvailableChatModelProviders,
getAvailableEmbeddingModelProviders
getAvailableEmbeddingModelProviders,
} from '@/lib/providers';
import { searchHandlers } from '@/lib/search';
import { getFileDetails } from '@/lib/utils/files';
@ -66,6 +66,7 @@ const handleEmitterEvents = async (
let recievedMessage = '';
let sources: any[] = [];
let searchQuery: string | undefined;
let searchUrl: string | undefined;
stream.on('data', (data) => {
const parsedData = JSON.parse(data);
@ -86,6 +87,9 @@ const handleEmitterEvents = async (
if (parsedData.searchQuery) {
searchQuery = parsedData.searchQuery;
}
if (parsedData.searchUrl) {
searchUrl = parsedData.searchUrl;
}
writer.write(
encoder.encode(
@ -94,6 +98,7 @@ const handleEmitterEvents = async (
data: parsedData.data,
searchQuery: parsedData.searchQuery,
messageId: aiMessageId,
searchUrl: searchUrl,
}) + '\n',
),
);
@ -128,6 +133,7 @@ const handleEmitterEvents = async (
messageId: aiMessageId,
modelStats: modelStats,
searchQuery: searchQuery,
searchUrl: searchUrl,
}) + '\n',
),
);
@ -144,6 +150,7 @@ const handleEmitterEvents = async (
...(sources && sources.length > 0 && { sources }),
...(searchQuery && { searchQuery }),
modelStats: modelStats,
...(searchUrl && { searchUrl }),
}),
})
.execute();

View File

@ -26,6 +26,14 @@ export default function RootLayout({
}>) {
return (
<html className="h-full" lang="en" suppressHydrationWarning>
<head>
<link
rel="search"
type="application/opensearchdescription+xml"
title="Perplexica Search"
href="/opensearch.xml"
/>
</head>
<body className={cn('h-full', montserrat.className)}>
<ThemeProvider>
<Sidebar>{children}</Sidebar>

View File

@ -28,6 +28,7 @@ export type Message = {
sources?: Document[];
modelStats?: ModelStats;
searchQuery?: string;
searchUrl?: string;
};
export interface File {
@ -417,7 +418,6 @@ const ChatWindow = ({ id }: { id?: string }) => {
if (data.type === 'sources') {
sources = data.data;
const searchQuery = data.searchQuery;
if (!added) {
setMessages((prevMessages) => [
...prevMessages,
@ -427,7 +427,8 @@ const ChatWindow = ({ id }: { id?: string }) => {
chatId: chatId!,
role: 'assistant',
sources: sources,
searchQuery: searchQuery,
searchQuery: data.searchQuery,
searchUrl: data.searchUrl,
createdAt: new Date(),
},
]);
@ -486,6 +487,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
modelStats: data.modelStats || null,
// Make sure the searchQuery is preserved (if available in the message data)
searchQuery: message.searchQuery || data.searchQuery,
searchUrl: message.searchUrl || data.searchUrl,
};
}
return message;

View File

@ -280,8 +280,23 @@ const MessageBox = ({
</div>
{message.searchQuery && (
<div className="mb-2 text-sm bg-light-secondary dark:bg-dark-secondary rounded-lg p-3">
<span className="font-medium text-black/70 dark:text-white/70">Search query:</span>{' '}
<span className="text-black dark:text-white">{message.searchQuery}</span>
<span className="font-medium text-black/70 dark:text-white/70">
Search query:
</span>{' '}
{message.searchUrl ? (
<a
href={message.searchUrl}
target="_blank"
rel="noopener noreferrer"
className="dark:text-white text-black hover:underline"
>
{message.searchQuery}
</a>
) : (
<span className="text-black dark:text-white">
{message.searchQuery}
</span>
)}
</div>
)}
<MessageSources sources={message.sources} />

View File

@ -86,7 +86,7 @@ const MessageInput = ({
setMessage('');
};
return (
return (
<form
onSubmit={(e) => {
e.preventDefault();
@ -107,7 +107,7 @@ const MessageInput = ({
onChange={(e) => setMessage(e.target.value)}
minRows={2}
className="bg-transparent placeholder:text-black/50 dark:placeholder:text-white/50 text-sm text-black dark:text-white resize-none focus:outline-none w-full max-h-24 lg:max-h-36 xl:max-h-48"
placeholder={firstMessage ? "Ask anything..." :"Ask a follow-up"}
placeholder={firstMessage ? 'Ask anything...' : 'Ask a follow-up'}
/>
<div className="flex flex-row items-center justify-between mt-4">
<div className="flex flex-row items-center space-x-2 lg:space-x-4">
@ -134,7 +134,11 @@ const MessageInput = ({
className="bg-[#24A0ED] text-white disabled:text-black/50 dark:disabled:text-white/50 disabled:bg-[#e0e0dc] dark:disabled:bg-[#ececec21] hover:bg-opacity-85 transition duration-100 rounded-full p-2"
type="submit"
>
{firstMessage ? <ArrowRight className="bg-background" size={17} /> : <ArrowUp className="bg-background" size={17} />}
{firstMessage ? (
<ArrowRight className="bg-background" size={17} />
) : (
<ArrowUp className="bg-background" size={17} />
)}
</button>
</div>
</div>

View File

@ -10,6 +10,7 @@ const suggestionGeneratorPrompt = `
You are an AI suggestion generator for an AI powered search engine. You will be given a conversation below. You need to generate 4-5 suggestions based on the conversation. The suggestion should be relevant to the conversation that can be used by the user to ask the chat model for more information.
You need to make sure the suggestions are relevant to the conversation and are helpful to the user. Keep a note that the user might use these suggestions to ask a chat model for more information.
Make sure the suggestions are medium in length and are informative and relevant to the conversation.
If you are a thinking or reasoning AI, you should avoid using \`<suggestions>\` and \`</suggestions>\` tags in your thinking. Those tags should only be used in the final output.
Provide these suggestions separated by newlines between the XML tags <suggestions> and </suggestions>. For example:

View File

@ -21,6 +21,10 @@ class LineOutputParser extends BaseOutputParser<string> {
async parse(text: string): Promise<string> {
text = text.trim() || '';
// First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
// This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
text = this.removeThinkingBlocks(text);
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
const startKeyIndex = text.indexOf(`<${this.key}>`);
const endKeyIndex = text.indexOf(`</${this.key}>`);
@ -40,6 +44,17 @@ class LineOutputParser extends BaseOutputParser<string> {
return line;
}
/**
* Removes all content within <think>...</think> blocks
* @param text The input text containing thinking blocks
* @returns The text with all thinking blocks removed
*/
private removeThinkingBlocks(text: string): string {
// Use regex to identify and remove all <think>...</think> blocks
// Using the 's' flag to make dot match newlines
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
}
getFormatInstructions(): string {
throw new Error('Not implemented.');
}

View File

@ -21,6 +21,10 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
async parse(text: string): Promise<string[]> {
text = text.trim() || '';
// First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
// This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
text = this.removeThinkingBlocks(text);
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
const startKeyIndex = text.indexOf(`<${this.key}>`);
const endKeyIndex = text.indexOf(`</${this.key}>`);
@ -42,6 +46,17 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
return lines;
}
/**
* Removes all content within <think>...</think> blocks
* @param text The input text containing thinking blocks
* @returns The text with all thinking blocks removed
*/
private removeThinkingBlocks(text: string): string {
// Use regex to identify and remove all <think>...</think> blocks
// Using [\s\S] pattern to match all characters including newlines
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
}
getFormatInstructions(): string {
throw new Error('Not implemented.');
}

View File

@ -3,6 +3,7 @@ You are an AI question rephraser. You will be given a conversation and a follow-
If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
If you are a thinking or reasoning AI, you should avoid using \`<question>\` and \`</question>\` tags in your thinking. Those tags should only be used in the final output. You should also avoid using \`<links>\` and \`</links>\` tags in your thinking. Those tags should only be used in the final output.
There are several examples attached for your reference inside the below \`examples\` XML block

View File

@ -56,6 +56,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
private config: Config;
private strParser = new StringOutputParser();
private searchQuery?: string;
private searxngUrl?: string;
constructor(config: Config) {
this.config = config;
@ -81,6 +82,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
let question = this.config.summarizer
? await questionOutputParser.parse(input)
: input;
console.log('question', question);
if (question === 'not_needed') {
return { query: '', docs: [] };
@ -206,12 +208,15 @@ class MetaSearchAgent implements MetaSearchAgentType {
} else {
question = question.replace(/<think>.*?<\/think>/g, '');
const res = await searchSearxng(question, {
const searxngResult = await searchSearxng(question, {
language: 'en',
engines: this.config.activeEngines,
});
const documents = res.results.map(
// Store the SearXNG URL for later use in emitting to the client
this.searxngUrl = searxngResult.searchUrl;
const documents = searxngResult.results.map(
(result) =>
new Document({
pageContent:
@ -265,7 +270,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
query = searchRetrieverResult.query;
docs = searchRetrieverResult.docs;
// Store the search query in the context for emitting to the client
if (searchRetrieverResult.searchQuery) {
this.searchQuery = searchRetrieverResult.searchQuery;
@ -447,16 +452,15 @@ class MetaSearchAgent implements MetaSearchAgentType {
event.event === 'on_chain_end' &&
event.name === 'FinalSourceRetriever'
) {
// Add searchQuery to the sources data if it exists
const sourcesData = event.data.output;
// @ts-ignore - we added searchQuery property
if (this.searchQuery) {
emitter.emit(
'data',
JSON.stringify({
type: 'sources',
JSON.stringify({
type: 'sources',
data: sourcesData,
searchQuery: this.searchQuery
searchQuery: this.searchQuery,
searchUrl: this.searxngUrl,
}),
);
} else {

View File

@ -19,6 +19,12 @@ interface SearxngSearchResult {
iframe_src?: string;
}
interface SearxngResponse {
results: SearxngSearchResult[];
suggestions: string[];
searchUrl: string;
}
export const searchSearxng = async (
query: string,
opts?: SearxngSearchOptions,
@ -44,5 +50,16 @@ export const searchSearxng = async (
const results: SearxngSearchResult[] = res.data.results;
const suggestions: string[] = res.data.suggestions;
return { results, suggestions };
// Create a URL for viewing the search results in the SearXNG web interface
const searchUrl = new URL(searxngURL);
searchUrl.pathname = '/search';
searchUrl.searchParams.append('q', query);
if (opts?.engines?.length) {
searchUrl.searchParams.append('engines', opts.engines.join(','));
}
if (opts?.language) {
searchUrl.searchParams.append('language', opts.language);
}
return { results, suggestions, searchUrl: searchUrl.toString() };
};