mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-06-17 23:38:30 +00:00
feat(search): Implement OpenSearch support
feat(search): Add searchUrl to message feat(parsers): Enhance parsers to deal with some thinking models better.
This commit is contained in:
9
public/opensearch.xml
Normal file
9
public/opensearch.xml
Normal file
@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
|
||||
<ShortName>Perplexica</ShortName>
|
||||
<Description>Search with Perplexica AI</Description>
|
||||
<InputEncoding>UTF-8</InputEncoding>
|
||||
<Image width="16" height="16" type="image/x-icon">/favicon.ico</Image>
|
||||
<Url type="text/html" template="/?q={searchTerms}"/>
|
||||
<Url type="application/opensearchdescription+xml" rel="self" template="/opensearch.xml"/>
|
||||
</OpenSearchDescription>
|
@ -7,7 +7,7 @@ import db from '@/lib/db';
|
||||
import { chats, messages as messagesSchema } from '@/lib/db/schema';
|
||||
import {
|
||||
getAvailableChatModelProviders,
|
||||
getAvailableEmbeddingModelProviders
|
||||
getAvailableEmbeddingModelProviders,
|
||||
} from '@/lib/providers';
|
||||
import { searchHandlers } from '@/lib/search';
|
||||
import { getFileDetails } from '@/lib/utils/files';
|
||||
@ -66,6 +66,7 @@ const handleEmitterEvents = async (
|
||||
let recievedMessage = '';
|
||||
let sources: any[] = [];
|
||||
let searchQuery: string | undefined;
|
||||
let searchUrl: string | undefined;
|
||||
|
||||
stream.on('data', (data) => {
|
||||
const parsedData = JSON.parse(data);
|
||||
@ -86,6 +87,9 @@ const handleEmitterEvents = async (
|
||||
if (parsedData.searchQuery) {
|
||||
searchQuery = parsedData.searchQuery;
|
||||
}
|
||||
if (parsedData.searchUrl) {
|
||||
searchUrl = parsedData.searchUrl;
|
||||
}
|
||||
|
||||
writer.write(
|
||||
encoder.encode(
|
||||
@ -94,6 +98,7 @@ const handleEmitterEvents = async (
|
||||
data: parsedData.data,
|
||||
searchQuery: parsedData.searchQuery,
|
||||
messageId: aiMessageId,
|
||||
searchUrl: searchUrl,
|
||||
}) + '\n',
|
||||
),
|
||||
);
|
||||
@ -128,6 +133,7 @@ const handleEmitterEvents = async (
|
||||
messageId: aiMessageId,
|
||||
modelStats: modelStats,
|
||||
searchQuery: searchQuery,
|
||||
searchUrl: searchUrl,
|
||||
}) + '\n',
|
||||
),
|
||||
);
|
||||
@ -144,6 +150,7 @@ const handleEmitterEvents = async (
|
||||
...(sources && sources.length > 0 && { sources }),
|
||||
...(searchQuery && { searchQuery }),
|
||||
modelStats: modelStats,
|
||||
...(searchUrl && { searchUrl }),
|
||||
}),
|
||||
})
|
||||
.execute();
|
||||
|
@ -26,6 +26,14 @@ export default function RootLayout({
|
||||
}>) {
|
||||
return (
|
||||
<html className="h-full" lang="en" suppressHydrationWarning>
|
||||
<head>
|
||||
<link
|
||||
rel="search"
|
||||
type="application/opensearchdescription+xml"
|
||||
title="Perplexica Search"
|
||||
href="/opensearch.xml"
|
||||
/>
|
||||
</head>
|
||||
<body className={cn('h-full', montserrat.className)}>
|
||||
<ThemeProvider>
|
||||
<Sidebar>{children}</Sidebar>
|
||||
|
@ -28,6 +28,7 @@ export type Message = {
|
||||
sources?: Document[];
|
||||
modelStats?: ModelStats;
|
||||
searchQuery?: string;
|
||||
searchUrl?: string;
|
||||
};
|
||||
|
||||
export interface File {
|
||||
@ -417,7 +418,6 @@ const ChatWindow = ({ id }: { id?: string }) => {
|
||||
|
||||
if (data.type === 'sources') {
|
||||
sources = data.data;
|
||||
const searchQuery = data.searchQuery;
|
||||
if (!added) {
|
||||
setMessages((prevMessages) => [
|
||||
...prevMessages,
|
||||
@ -427,7 +427,8 @@ const ChatWindow = ({ id }: { id?: string }) => {
|
||||
chatId: chatId!,
|
||||
role: 'assistant',
|
||||
sources: sources,
|
||||
searchQuery: searchQuery,
|
||||
searchQuery: data.searchQuery,
|
||||
searchUrl: data.searchUrl,
|
||||
createdAt: new Date(),
|
||||
},
|
||||
]);
|
||||
@ -486,6 +487,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
|
||||
modelStats: data.modelStats || null,
|
||||
// Make sure the searchQuery is preserved (if available in the message data)
|
||||
searchQuery: message.searchQuery || data.searchQuery,
|
||||
searchUrl: message.searchUrl || data.searchUrl,
|
||||
};
|
||||
}
|
||||
return message;
|
||||
|
@ -280,8 +280,23 @@ const MessageBox = ({
|
||||
</div>
|
||||
{message.searchQuery && (
|
||||
<div className="mb-2 text-sm bg-light-secondary dark:bg-dark-secondary rounded-lg p-3">
|
||||
<span className="font-medium text-black/70 dark:text-white/70">Search query:</span>{' '}
|
||||
<span className="text-black dark:text-white">{message.searchQuery}</span>
|
||||
<span className="font-medium text-black/70 dark:text-white/70">
|
||||
Search query:
|
||||
</span>{' '}
|
||||
{message.searchUrl ? (
|
||||
<a
|
||||
href={message.searchUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="dark:text-white text-black hover:underline"
|
||||
>
|
||||
{message.searchQuery}
|
||||
</a>
|
||||
) : (
|
||||
<span className="text-black dark:text-white">
|
||||
{message.searchQuery}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
<MessageSources sources={message.sources} />
|
||||
|
@ -86,7 +86,7 @@ const MessageInput = ({
|
||||
setMessage('');
|
||||
};
|
||||
|
||||
return (
|
||||
return (
|
||||
<form
|
||||
onSubmit={(e) => {
|
||||
e.preventDefault();
|
||||
@ -107,7 +107,7 @@ const MessageInput = ({
|
||||
onChange={(e) => setMessage(e.target.value)}
|
||||
minRows={2}
|
||||
className="bg-transparent placeholder:text-black/50 dark:placeholder:text-white/50 text-sm text-black dark:text-white resize-none focus:outline-none w-full max-h-24 lg:max-h-36 xl:max-h-48"
|
||||
placeholder={firstMessage ? "Ask anything..." :"Ask a follow-up"}
|
||||
placeholder={firstMessage ? 'Ask anything...' : 'Ask a follow-up'}
|
||||
/>
|
||||
<div className="flex flex-row items-center justify-between mt-4">
|
||||
<div className="flex flex-row items-center space-x-2 lg:space-x-4">
|
||||
@ -134,7 +134,11 @@ const MessageInput = ({
|
||||
className="bg-[#24A0ED] text-white disabled:text-black/50 dark:disabled:text-white/50 disabled:bg-[#e0e0dc] dark:disabled:bg-[#ececec21] hover:bg-opacity-85 transition duration-100 rounded-full p-2"
|
||||
type="submit"
|
||||
>
|
||||
{firstMessage ? <ArrowRight className="bg-background" size={17} /> : <ArrowUp className="bg-background" size={17} />}
|
||||
{firstMessage ? (
|
||||
<ArrowRight className="bg-background" size={17} />
|
||||
) : (
|
||||
<ArrowUp className="bg-background" size={17} />
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -10,6 +10,7 @@ const suggestionGeneratorPrompt = `
|
||||
You are an AI suggestion generator for an AI powered search engine. You will be given a conversation below. You need to generate 4-5 suggestions based on the conversation. The suggestion should be relevant to the conversation that can be used by the user to ask the chat model for more information.
|
||||
You need to make sure the suggestions are relevant to the conversation and are helpful to the user. Keep a note that the user might use these suggestions to ask a chat model for more information.
|
||||
Make sure the suggestions are medium in length and are informative and relevant to the conversation.
|
||||
If you are a thinking or reasoning AI, you should avoid using \`<suggestions>\` and \`</suggestions>\` tags in your thinking. Those tags should only be used in the final output.
|
||||
|
||||
Provide these suggestions separated by newlines between the XML tags <suggestions> and </suggestions>. For example:
|
||||
|
||||
|
@ -21,6 +21,10 @@ class LineOutputParser extends BaseOutputParser<string> {
|
||||
async parse(text: string): Promise<string> {
|
||||
text = text.trim() || '';
|
||||
|
||||
// First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
|
||||
// This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
|
||||
text = this.removeThinkingBlocks(text);
|
||||
|
||||
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
|
||||
const startKeyIndex = text.indexOf(`<${this.key}>`);
|
||||
const endKeyIndex = text.indexOf(`</${this.key}>`);
|
||||
@ -40,6 +44,17 @@ class LineOutputParser extends BaseOutputParser<string> {
|
||||
return line;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all content within <think>...</think> blocks
|
||||
* @param text The input text containing thinking blocks
|
||||
* @returns The text with all thinking blocks removed
|
||||
*/
|
||||
private removeThinkingBlocks(text: string): string {
|
||||
// Use regex to identify and remove all <think>...</think> blocks
|
||||
// Using the 's' flag to make dot match newlines
|
||||
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
}
|
||||
|
||||
getFormatInstructions(): string {
|
||||
throw new Error('Not implemented.');
|
||||
}
|
||||
|
@ -21,6 +21,10 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
|
||||
async parse(text: string): Promise<string[]> {
|
||||
text = text.trim() || '';
|
||||
|
||||
// First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
|
||||
// This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
|
||||
text = this.removeThinkingBlocks(text);
|
||||
|
||||
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
|
||||
const startKeyIndex = text.indexOf(`<${this.key}>`);
|
||||
const endKeyIndex = text.indexOf(`</${this.key}>`);
|
||||
@ -42,6 +46,17 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all content within <think>...</think> blocks
|
||||
* @param text The input text containing thinking blocks
|
||||
* @returns The text with all thinking blocks removed
|
||||
*/
|
||||
private removeThinkingBlocks(text: string): string {
|
||||
// Use regex to identify and remove all <think>...</think> blocks
|
||||
// Using [\s\S] pattern to match all characters including newlines
|
||||
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
}
|
||||
|
||||
getFormatInstructions(): string {
|
||||
throw new Error('Not implemented.');
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ You are an AI question rephraser. You will be given a conversation and a follow-
|
||||
If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
|
||||
If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
|
||||
You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
|
||||
If you are a thinking or reasoning AI, you should avoid using \`<question>\` and \`</question>\` tags in your thinking. Those tags should only be used in the final output. You should also avoid using \`<links>\` and \`</links>\` tags in your thinking. Those tags should only be used in the final output.
|
||||
|
||||
There are several examples attached for your reference inside the below \`examples\` XML block
|
||||
|
||||
|
@ -56,6 +56,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||
private config: Config;
|
||||
private strParser = new StringOutputParser();
|
||||
private searchQuery?: string;
|
||||
private searxngUrl?: string;
|
||||
|
||||
constructor(config: Config) {
|
||||
this.config = config;
|
||||
@ -81,6 +82,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||
let question = this.config.summarizer
|
||||
? await questionOutputParser.parse(input)
|
||||
: input;
|
||||
console.log('question', question);
|
||||
|
||||
if (question === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
@ -206,12 +208,15 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||
} else {
|
||||
question = question.replace(/<think>.*?<\/think>/g, '');
|
||||
|
||||
const res = await searchSearxng(question, {
|
||||
const searxngResult = await searchSearxng(question, {
|
||||
language: 'en',
|
||||
engines: this.config.activeEngines,
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
// Store the SearXNG URL for later use in emitting to the client
|
||||
this.searxngUrl = searxngResult.searchUrl;
|
||||
|
||||
const documents = searxngResult.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent:
|
||||
@ -265,7 +270,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||
|
||||
query = searchRetrieverResult.query;
|
||||
docs = searchRetrieverResult.docs;
|
||||
|
||||
|
||||
// Store the search query in the context for emitting to the client
|
||||
if (searchRetrieverResult.searchQuery) {
|
||||
this.searchQuery = searchRetrieverResult.searchQuery;
|
||||
@ -447,16 +452,15 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||
event.event === 'on_chain_end' &&
|
||||
event.name === 'FinalSourceRetriever'
|
||||
) {
|
||||
// Add searchQuery to the sources data if it exists
|
||||
const sourcesData = event.data.output;
|
||||
// @ts-ignore - we added searchQuery property
|
||||
if (this.searchQuery) {
|
||||
emitter.emit(
|
||||
'data',
|
||||
JSON.stringify({
|
||||
type: 'sources',
|
||||
JSON.stringify({
|
||||
type: 'sources',
|
||||
data: sourcesData,
|
||||
searchQuery: this.searchQuery
|
||||
searchQuery: this.searchQuery,
|
||||
searchUrl: this.searxngUrl,
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
|
@ -19,6 +19,12 @@ interface SearxngSearchResult {
|
||||
iframe_src?: string;
|
||||
}
|
||||
|
||||
interface SearxngResponse {
|
||||
results: SearxngSearchResult[];
|
||||
suggestions: string[];
|
||||
searchUrl: string;
|
||||
}
|
||||
|
||||
export const searchSearxng = async (
|
||||
query: string,
|
||||
opts?: SearxngSearchOptions,
|
||||
@ -44,5 +50,16 @@ export const searchSearxng = async (
|
||||
const results: SearxngSearchResult[] = res.data.results;
|
||||
const suggestions: string[] = res.data.suggestions;
|
||||
|
||||
return { results, suggestions };
|
||||
// Create a URL for viewing the search results in the SearXNG web interface
|
||||
const searchUrl = new URL(searxngURL);
|
||||
searchUrl.pathname = '/search';
|
||||
searchUrl.searchParams.append('q', query);
|
||||
if (opts?.engines?.length) {
|
||||
searchUrl.searchParams.append('engines', opts.engines.join(','));
|
||||
}
|
||||
if (opts?.language) {
|
||||
searchUrl.searchParams.append('language', opts.language);
|
||||
}
|
||||
|
||||
return { results, suggestions, searchUrl: searchUrl.toString() };
|
||||
};
|
||||
|
Reference in New Issue
Block a user