Compare commits

..

2 Commits

Author SHA1 Message Date
Willie Zutz
d839769d7e fix(opensearch): Implement dynamic OpenSearch XML generation and update layout reference 2025-05-08 00:39:17 -06:00
Willie Zutz
ddfe8c607d feat(search): Implement OpenSearch support
feat(search): Add searchUrl to message
feat(parsers): Enhance parsers to deal with some thinking models better.
2025-05-08 00:21:31 -06:00
12 changed files with 132 additions and 17 deletions

View File

@@ -7,7 +7,7 @@ import db from '@/lib/db';
import { chats, messages as messagesSchema } from '@/lib/db/schema'; import { chats, messages as messagesSchema } from '@/lib/db/schema';
import { import {
getAvailableChatModelProviders, getAvailableChatModelProviders,
getAvailableEmbeddingModelProviders getAvailableEmbeddingModelProviders,
} from '@/lib/providers'; } from '@/lib/providers';
import { searchHandlers } from '@/lib/search'; import { searchHandlers } from '@/lib/search';
import { getFileDetails } from '@/lib/utils/files'; import { getFileDetails } from '@/lib/utils/files';
@@ -66,6 +66,7 @@ const handleEmitterEvents = async (
let recievedMessage = ''; let recievedMessage = '';
let sources: any[] = []; let sources: any[] = [];
let searchQuery: string | undefined; let searchQuery: string | undefined;
let searchUrl: string | undefined;
stream.on('data', (data) => { stream.on('data', (data) => {
const parsedData = JSON.parse(data); const parsedData = JSON.parse(data);
@@ -86,6 +87,9 @@ const handleEmitterEvents = async (
if (parsedData.searchQuery) { if (parsedData.searchQuery) {
searchQuery = parsedData.searchQuery; searchQuery = parsedData.searchQuery;
} }
if (parsedData.searchUrl) {
searchUrl = parsedData.searchUrl;
}
writer.write( writer.write(
encoder.encode( encoder.encode(
@@ -94,6 +98,7 @@ const handleEmitterEvents = async (
data: parsedData.data, data: parsedData.data,
searchQuery: parsedData.searchQuery, searchQuery: parsedData.searchQuery,
messageId: aiMessageId, messageId: aiMessageId,
searchUrl: searchUrl,
}) + '\n', }) + '\n',
), ),
); );
@@ -128,6 +133,7 @@ const handleEmitterEvents = async (
messageId: aiMessageId, messageId: aiMessageId,
modelStats: modelStats, modelStats: modelStats,
searchQuery: searchQuery, searchQuery: searchQuery,
searchUrl: searchUrl,
}) + '\n', }) + '\n',
), ),
); );
@@ -144,6 +150,7 @@ const handleEmitterEvents = async (
...(sources && sources.length > 0 && { sources }), ...(sources && sources.length > 0 && { sources }),
...(searchQuery && { searchQuery }), ...(searchQuery && { searchQuery }),
modelStats: modelStats, modelStats: modelStats,
...(searchUrl && { searchUrl }),
}), }),
}) })
.execute(); .execute();

View File

@@ -0,0 +1,26 @@
import { NextResponse } from 'next/server';
export async function GET(request: Request) {
// Get the host from the request
const url = new URL(request.url);
const origin = url.origin;
// Create the OpenSearch XML with the correct origin
const opensearchXml = `<?xml version="1.0" encoding="utf-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" xmlns:moz="http://www.mozilla.org/2006/browser/search/">
<ShortName>Perplexica</ShortName>
<LongName>Search with Perplexica AI</LongName>
<Description>Perplexica is a powerful AI-driven search engine that understands your queries and delivers relevant results.</Description>
<InputEncoding>UTF-8</InputEncoding>
<Image width="16" height="16" type="image/x-icon">${origin}/favicon.ico</Image>
<Url type="text/html" template="${origin}/?q={searchTerms}"/>
<Url type="application/opensearchdescription+xml" rel="self" template="${origin}/api/opensearch"/>
</OpenSearchDescription>`;
// Return the XML with the correct content type
return new NextResponse(opensearchXml, {
headers: {
'Content-Type': 'application/opensearchdescription+xml',
},
});
}

View File

@@ -26,6 +26,14 @@ export default function RootLayout({
}>) { }>) {
return ( return (
<html className="h-full" lang="en" suppressHydrationWarning> <html className="h-full" lang="en" suppressHydrationWarning>
<head>
<link
rel="search"
type="application/opensearchdescription+xml"
title="Perplexica Search"
href="/api/opensearch"
/>
</head>
<body className={cn('h-full', montserrat.className)}> <body className={cn('h-full', montserrat.className)}>
<ThemeProvider> <ThemeProvider>
<Sidebar>{children}</Sidebar> <Sidebar>{children}</Sidebar>

View File

@@ -28,6 +28,7 @@ export type Message = {
sources?: Document[]; sources?: Document[];
modelStats?: ModelStats; modelStats?: ModelStats;
searchQuery?: string; searchQuery?: string;
searchUrl?: string;
}; };
export interface File { export interface File {
@@ -417,7 +418,6 @@ const ChatWindow = ({ id }: { id?: string }) => {
if (data.type === 'sources') { if (data.type === 'sources') {
sources = data.data; sources = data.data;
const searchQuery = data.searchQuery;
if (!added) { if (!added) {
setMessages((prevMessages) => [ setMessages((prevMessages) => [
...prevMessages, ...prevMessages,
@@ -427,7 +427,8 @@ const ChatWindow = ({ id }: { id?: string }) => {
chatId: chatId!, chatId: chatId!,
role: 'assistant', role: 'assistant',
sources: sources, sources: sources,
searchQuery: searchQuery, searchQuery: data.searchQuery,
searchUrl: data.searchUrl,
createdAt: new Date(), createdAt: new Date(),
}, },
]); ]);
@@ -486,6 +487,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
modelStats: data.modelStats || null, modelStats: data.modelStats || null,
// Make sure the searchQuery is preserved (if available in the message data) // Make sure the searchQuery is preserved (if available in the message data)
searchQuery: message.searchQuery || data.searchQuery, searchQuery: message.searchQuery || data.searchQuery,
searchUrl: message.searchUrl || data.searchUrl,
}; };
} }
return message; return message;

View File

@@ -280,8 +280,23 @@ const MessageBox = ({
</div> </div>
{message.searchQuery && ( {message.searchQuery && (
<div className="mb-2 text-sm bg-light-secondary dark:bg-dark-secondary rounded-lg p-3"> <div className="mb-2 text-sm bg-light-secondary dark:bg-dark-secondary rounded-lg p-3">
<span className="font-medium text-black/70 dark:text-white/70">Search query:</span>{' '} <span className="font-medium text-black/70 dark:text-white/70">
<span className="text-black dark:text-white">{message.searchQuery}</span> Search query:
</span>{' '}
{message.searchUrl ? (
<a
href={message.searchUrl}
target="_blank"
rel="noopener noreferrer"
className="dark:text-white text-black hover:underline"
>
{message.searchQuery}
</a>
) : (
<span className="text-black dark:text-white">
{message.searchQuery}
</span>
)}
</div> </div>
)} )}
<MessageSources sources={message.sources} /> <MessageSources sources={message.sources} />

View File

@@ -86,7 +86,7 @@ const MessageInput = ({
setMessage(''); setMessage('');
}; };
return ( return (
<form <form
onSubmit={(e) => { onSubmit={(e) => {
e.preventDefault(); e.preventDefault();
@@ -107,7 +107,7 @@ const MessageInput = ({
onChange={(e) => setMessage(e.target.value)} onChange={(e) => setMessage(e.target.value)}
minRows={2} minRows={2}
className="bg-transparent placeholder:text-black/50 dark:placeholder:text-white/50 text-sm text-black dark:text-white resize-none focus:outline-none w-full max-h-24 lg:max-h-36 xl:max-h-48" className="bg-transparent placeholder:text-black/50 dark:placeholder:text-white/50 text-sm text-black dark:text-white resize-none focus:outline-none w-full max-h-24 lg:max-h-36 xl:max-h-48"
placeholder={firstMessage ? "Ask anything..." :"Ask a follow-up"} placeholder={firstMessage ? 'Ask anything...' : 'Ask a follow-up'}
/> />
<div className="flex flex-row items-center justify-between mt-4"> <div className="flex flex-row items-center justify-between mt-4">
<div className="flex flex-row items-center space-x-2 lg:space-x-4"> <div className="flex flex-row items-center space-x-2 lg:space-x-4">
@@ -134,7 +134,11 @@ const MessageInput = ({
className="bg-[#24A0ED] text-white disabled:text-black/50 dark:disabled:text-white/50 disabled:bg-[#e0e0dc] dark:disabled:bg-[#ececec21] hover:bg-opacity-85 transition duration-100 rounded-full p-2" className="bg-[#24A0ED] text-white disabled:text-black/50 dark:disabled:text-white/50 disabled:bg-[#e0e0dc] dark:disabled:bg-[#ececec21] hover:bg-opacity-85 transition duration-100 rounded-full p-2"
type="submit" type="submit"
> >
{firstMessage ? <ArrowRight className="bg-background" size={17} /> : <ArrowUp className="bg-background" size={17} />} {firstMessage ? (
<ArrowRight className="bg-background" size={17} />
) : (
<ArrowUp className="bg-background" size={17} />
)}
</button> </button>
</div> </div>
</div> </div>

View File

@@ -10,6 +10,7 @@ const suggestionGeneratorPrompt = `
You are an AI suggestion generator for an AI powered search engine. You will be given a conversation below. You need to generate 4-5 suggestions based on the conversation. The suggestion should be relevant to the conversation that can be used by the user to ask the chat model for more information. You are an AI suggestion generator for an AI powered search engine. You will be given a conversation below. You need to generate 4-5 suggestions based on the conversation. The suggestion should be relevant to the conversation that can be used by the user to ask the chat model for more information.
You need to make sure the suggestions are relevant to the conversation and are helpful to the user. Keep a note that the user might use these suggestions to ask a chat model for more information. You need to make sure the suggestions are relevant to the conversation and are helpful to the user. Keep a note that the user might use these suggestions to ask a chat model for more information.
Make sure the suggestions are medium in length and are informative and relevant to the conversation. Make sure the suggestions are medium in length and are informative and relevant to the conversation.
If you are a thinking or reasoning AI, you should avoid using \`<suggestions>\` and \`</suggestions>\` tags in your thinking. Those tags should only be used in the final output.
Provide these suggestions separated by newlines between the XML tags <suggestions> and </suggestions>. For example: Provide these suggestions separated by newlines between the XML tags <suggestions> and </suggestions>. For example:

View File

@@ -21,6 +21,10 @@ class LineOutputParser extends BaseOutputParser<string> {
async parse(text: string): Promise<string> { async parse(text: string): Promise<string> {
text = text.trim() || ''; text = text.trim() || '';
// First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
// This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
text = this.removeThinkingBlocks(text);
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/; const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
const startKeyIndex = text.indexOf(`<${this.key}>`); const startKeyIndex = text.indexOf(`<${this.key}>`);
const endKeyIndex = text.indexOf(`</${this.key}>`); const endKeyIndex = text.indexOf(`</${this.key}>`);
@@ -40,6 +44,17 @@ class LineOutputParser extends BaseOutputParser<string> {
return line; return line;
} }
/**
* Removes all content within <think>...</think> blocks
* @param text The input text containing thinking blocks
* @returns The text with all thinking blocks removed
*/
private removeThinkingBlocks(text: string): string {
// Use regex to identify and remove all <think>...</think> blocks
// Using the 's' flag to make dot match newlines
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
}
getFormatInstructions(): string { getFormatInstructions(): string {
throw new Error('Not implemented.'); throw new Error('Not implemented.');
} }

View File

@@ -21,6 +21,10 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
async parse(text: string): Promise<string[]> { async parse(text: string): Promise<string[]> {
text = text.trim() || ''; text = text.trim() || '';
// First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
// This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
text = this.removeThinkingBlocks(text);
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/; const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
const startKeyIndex = text.indexOf(`<${this.key}>`); const startKeyIndex = text.indexOf(`<${this.key}>`);
const endKeyIndex = text.indexOf(`</${this.key}>`); const endKeyIndex = text.indexOf(`</${this.key}>`);
@@ -42,6 +46,17 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
return lines; return lines;
} }
/**
* Removes all content within <think>...</think> blocks
* @param text The input text containing thinking blocks
* @returns The text with all thinking blocks removed
*/
private removeThinkingBlocks(text: string): string {
// Use regex to identify and remove all <think>...</think> blocks
// Using [\s\S] pattern to match all characters including newlines
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
}
getFormatInstructions(): string { getFormatInstructions(): string {
throw new Error('Not implemented.'); throw new Error('Not implemented.');
} }

View File

@@ -3,6 +3,7 @@ You are an AI question rephraser. You will be given a conversation and a follow-
If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic). If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block. If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response. You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
If you are a thinking or reasoning AI, you should avoid using \`<question>\` and \`</question>\` tags in your thinking. Those tags should only be used in the final output. You should also avoid using \`<links>\` and \`</links>\` tags in your thinking. Those tags should only be used in the final output.
There are several examples attached for your reference inside the below \`examples\` XML block There are several examples attached for your reference inside the below \`examples\` XML block

View File

@@ -56,6 +56,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
private config: Config; private config: Config;
private strParser = new StringOutputParser(); private strParser = new StringOutputParser();
private searchQuery?: string; private searchQuery?: string;
private searxngUrl?: string;
constructor(config: Config) { constructor(config: Config) {
this.config = config; this.config = config;
@@ -81,6 +82,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
let question = this.config.summarizer let question = this.config.summarizer
? await questionOutputParser.parse(input) ? await questionOutputParser.parse(input)
: input; : input;
console.log('question', question);
if (question === 'not_needed') { if (question === 'not_needed') {
return { query: '', docs: [] }; return { query: '', docs: [] };
@@ -206,12 +208,15 @@ class MetaSearchAgent implements MetaSearchAgentType {
} else { } else {
question = question.replace(/<think>.*?<\/think>/g, ''); question = question.replace(/<think>.*?<\/think>/g, '');
const res = await searchSearxng(question, { const searxngResult = await searchSearxng(question, {
language: 'en', language: 'en',
engines: this.config.activeEngines, engines: this.config.activeEngines,
}); });
const documents = res.results.map( // Store the SearXNG URL for later use in emitting to the client
this.searxngUrl = searxngResult.searchUrl;
const documents = searxngResult.results.map(
(result) => (result) =>
new Document({ new Document({
pageContent: pageContent:
@@ -265,7 +270,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
query = searchRetrieverResult.query; query = searchRetrieverResult.query;
docs = searchRetrieverResult.docs; docs = searchRetrieverResult.docs;
// Store the search query in the context for emitting to the client // Store the search query in the context for emitting to the client
if (searchRetrieverResult.searchQuery) { if (searchRetrieverResult.searchQuery) {
this.searchQuery = searchRetrieverResult.searchQuery; this.searchQuery = searchRetrieverResult.searchQuery;
@@ -447,16 +452,15 @@ class MetaSearchAgent implements MetaSearchAgentType {
event.event === 'on_chain_end' && event.event === 'on_chain_end' &&
event.name === 'FinalSourceRetriever' event.name === 'FinalSourceRetriever'
) { ) {
// Add searchQuery to the sources data if it exists
const sourcesData = event.data.output; const sourcesData = event.data.output;
// @ts-ignore - we added searchQuery property
if (this.searchQuery) { if (this.searchQuery) {
emitter.emit( emitter.emit(
'data', 'data',
JSON.stringify({ JSON.stringify({
type: 'sources', type: 'sources',
data: sourcesData, data: sourcesData,
searchQuery: this.searchQuery searchQuery: this.searchQuery,
searchUrl: this.searxngUrl,
}), }),
); );
} else { } else {

View File

@@ -19,6 +19,12 @@ interface SearxngSearchResult {
iframe_src?: string; iframe_src?: string;
} }
interface SearxngResponse {
results: SearxngSearchResult[];
suggestions: string[];
searchUrl: string;
}
export const searchSearxng = async ( export const searchSearxng = async (
query: string, query: string,
opts?: SearxngSearchOptions, opts?: SearxngSearchOptions,
@@ -44,5 +50,16 @@ export const searchSearxng = async (
const results: SearxngSearchResult[] = res.data.results; const results: SearxngSearchResult[] = res.data.results;
const suggestions: string[] = res.data.suggestions; const suggestions: string[] = res.data.suggestions;
return { results, suggestions }; // Create a URL for viewing the search results in the SearXNG web interface
const searchUrl = new URL(searxngURL);
searchUrl.pathname = '/search';
searchUrl.searchParams.append('q', query);
if (opts?.engines?.length) {
searchUrl.searchParams.append('engines', opts.engines.join(','));
}
if (opts?.language) {
searchUrl.searchParams.append('language', opts.language);
}
return { results, suggestions, searchUrl: searchUrl.toString() };
}; };