Compare commits

..

2 Commits

Author SHA1 Message Date
Willie Zutz
f65b168388 feat(UI): Add the search query to the response.
Also, tweaked the search retriever prompt so it gives better search queries.
2025-05-07 01:16:51 -06:00
Willie Zutz
8796009141 fix(api): History rewriting should delete the current message.
fix(UI): Model changes shouldn't submit the form.
2025-05-06 23:45:46 -06:00
7 changed files with 172 additions and 27 deletions

94
.github/copilot-instructions.md vendored Normal file
View File

@@ -0,0 +1,94 @@
# GitHub Copilot Instructions for Perplexica
This file provides context and guidance for GitHub Copilot when working with the Perplexica codebase.
## Project Overview
Perplexica is an open-source AI-powered search engine that uses advanced machine learning to provide intelligent search results. It combines web search capabilities with LLM-based processing to understand and answer user questions, similar to Perplexity AI but fully open source.
## Key Components
- **Frontend**: Next.js application with React components (in `/src/components` and `/src/app`)
- **Backend Logic**: Node.js backend with API routes (in `/src/app/api`) and library code (in `/src/lib`)
- **Search Engine**: Uses SearXNG as a metadata search engine
- **LLM Integration**: Supports multiple models including OpenAI, Anthropic, Groq, Ollama (local models)
- **Database**: SQLite database managed with Drizzle ORM
## Architecture
The system works through these main steps:
- User submits a query
- The system determines if web search is needed
- If needed, it searches the web using SearXNG
- Results are ranked using embedding-based similarity search
- LLMs are used to generate a comprehensive response with cited sources
## Key Technologies
- **Frontend**: React, Next.js, Tailwind CSS
- **Backend**: Node.js
- **Database**: SQLite with Drizzle ORM
- **AI/ML**: LangChain for orchestration, various LLM providers
- **Search**: SearXNG integration
- **Embedding Models**: For re-ranking search results
## Project Structure
- `/src/app`: Next.js app directory with page components and API routes
- `/src/components`: Reusable UI components
- `/src/lib`: Backend functionality
- `/lib/search`: Search functionality and meta search agent
- `/lib/db`: Database schema and operations
- `/lib/providers`: LLM and embedding model integrations
- `/lib/prompts`: Prompt templates for LLMs
- `/lib/chains`: LangChain chains for various operations
## Focus Modes
Perplexica supports multiple specialized search modes:
- All Mode: General web search
- Local Research Mode: Research and interact with local files with citations
- Chat Mode: Have a creative conversation
- Academic Search Mode: For academic research
- YouTube Search Mode: For video content
- Wolfram Alpha Search Mode: For calculations and data analysis
- Reddit Search Mode: For community discussions
## Development Workflow
- Use `npm run dev` for local development
- Format code with `npm run format:write` before committing
- Database migrations: `npm run db:push`
- Build for production: `npm run build`
- Start production server: `npm run start`
## Configuration
The application uses a `config.toml` file (created from `sample.config.toml`) for configuration, including:
- API keys for various LLM providers
- Database settings
- Search engine configuration
- Similarity measure settings
## Common Tasks
When working on this codebase, you might need to:
- Add new API endpoints in `/src/app/api`
- Modify UI components in `/src/components`
- Extend search functionality in `/src/lib/search`
- Add new LLM providers in `/src/lib/providers`
- Update database schema in `/src/lib/db/schema.ts`
- Create new prompt templates in `/src/lib/prompts`
- Build new chains in `/src/lib/chains`
## AI Behavior
- Avoid conciliatory language
- It is not necessary to apologize
- If you don't know the answer, ask for clarification
- Do not add additional packages or dependencies unless explicitly requested
- Only make changes to the code that are relevant to the task at hand

View File

@@ -1,27 +1,23 @@
import prompts from '@/lib/prompts';
import MetaSearchAgent from '@/lib/search/metaSearchAgent';
import crypto from 'crypto';
import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
import { EventEmitter } from 'stream';
import {
chatModelProviders,
embeddingModelProviders,
getAvailableChatModelProviders,
getAvailableEmbeddingModelProviders,
} from '@/lib/providers';
import db from '@/lib/db';
import { chats, messages as messagesSchema } from '@/lib/db/schema';
import { and, eq, gt } from 'drizzle-orm';
import { getFileDetails } from '@/lib/utils/files';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { ChatOpenAI } from '@langchain/openai';
import { import {
getCustomOpenaiApiKey, getCustomOpenaiApiKey,
getCustomOpenaiApiUrl, getCustomOpenaiApiUrl,
getCustomOpenaiModelName, getCustomOpenaiModelName,
} from '@/lib/config'; } from '@/lib/config';
import { ChatOllama } from '@langchain/ollama'; import db from '@/lib/db';
import { chats, messages as messagesSchema } from '@/lib/db/schema';
import {
getAvailableChatModelProviders,
getAvailableEmbeddingModelProviders
} from '@/lib/providers';
import { searchHandlers } from '@/lib/search'; import { searchHandlers } from '@/lib/search';
import { getFileDetails } from '@/lib/utils/files';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
import { ChatOllama } from '@langchain/ollama';
import { ChatOpenAI } from '@langchain/openai';
import crypto from 'crypto';
import { and, eq, gte } from 'drizzle-orm';
import { EventEmitter } from 'stream';
export const runtime = 'nodejs'; export const runtime = 'nodejs';
export const dynamic = 'force-dynamic'; export const dynamic = 'force-dynamic';
@@ -69,6 +65,7 @@ const handleEmitterEvents = async (
) => { ) => {
let recievedMessage = ''; let recievedMessage = '';
let sources: any[] = []; let sources: any[] = [];
let searchQuery: string | undefined;
stream.on('data', (data) => { stream.on('data', (data) => {
const parsedData = JSON.parse(data); const parsedData = JSON.parse(data);
@@ -85,11 +82,17 @@ const handleEmitterEvents = async (
recievedMessage += parsedData.data; recievedMessage += parsedData.data;
} else if (parsedData.type === 'sources') { } else if (parsedData.type === 'sources') {
// Capture the search query if available
if (parsedData.searchQuery) {
searchQuery = parsedData.searchQuery;
}
writer.write( writer.write(
encoder.encode( encoder.encode(
JSON.stringify({ JSON.stringify({
type: 'sources', type: 'sources',
data: parsedData.data, data: parsedData.data,
searchQuery: parsedData.searchQuery,
messageId: aiMessageId, messageId: aiMessageId,
}) + '\n', }) + '\n',
), ),
@@ -124,6 +127,7 @@ const handleEmitterEvents = async (
type: 'messageEnd', type: 'messageEnd',
messageId: aiMessageId, messageId: aiMessageId,
modelStats: modelStats, modelStats: modelStats,
searchQuery: searchQuery,
}) + '\n', }) + '\n',
), ),
); );
@@ -138,6 +142,7 @@ const handleEmitterEvents = async (
metadata: JSON.stringify({ metadata: JSON.stringify({
createdAt: new Date(), createdAt: new Date(),
...(sources && sources.length > 0 && { sources }), ...(sources && sources.length > 0 && { sources }),
...(searchQuery && { searchQuery }),
modelStats: modelStats, modelStats: modelStats,
}), }),
}) })
@@ -202,7 +207,7 @@ const handleHistorySave = async (
.delete(messagesSchema) .delete(messagesSchema)
.where( .where(
and( and(
gt(messagesSchema.id, messageExists.id), gte(messagesSchema.id, messageExists.id),
eq(messagesSchema.chatId, message.chatId), eq(messagesSchema.chatId, message.chatId),
), ),
) )

View File

@@ -27,6 +27,7 @@ export type Message = {
suggestions?: string[]; suggestions?: string[];
sources?: Document[]; sources?: Document[];
modelStats?: ModelStats; modelStats?: ModelStats;
searchQuery?: string;
}; };
export interface File { export interface File {
@@ -416,6 +417,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
if (data.type === 'sources') { if (data.type === 'sources') {
sources = data.data; sources = data.data;
const searchQuery = data.searchQuery;
if (!added) { if (!added) {
setMessages((prevMessages) => [ setMessages((prevMessages) => [
...prevMessages, ...prevMessages,
@@ -425,6 +427,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
chatId: chatId!, chatId: chatId!,
role: 'assistant', role: 'assistant',
sources: sources, sources: sources,
searchQuery: searchQuery,
createdAt: new Date(), createdAt: new Date(),
}, },
]); ]);
@@ -481,6 +484,8 @@ const ChatWindow = ({ id }: { id?: string }) => {
...message, ...message,
// Include model stats if available, otherwise null // Include model stats if available, otherwise null
modelStats: data.modelStats || null, modelStats: data.modelStats || null,
// Make sure the searchQuery is preserved (if available in the message data)
searchQuery: message.searchQuery || data.searchQuery,
}; };
} }
return message; return message;

View File

@@ -278,6 +278,12 @@ const MessageBox = ({
Sources Sources
</h3> </h3>
</div> </div>
{message.searchQuery && (
<div className="mb-2 text-sm bg-light-secondary dark:bg-dark-secondary rounded-lg p-3">
<span className="font-medium text-black/70 dark:text-white/70">Search query:</span>{' '}
<span className="text-black dark:text-white">{message.searchQuery}</span>
</div>
)}
<MessageSources sources={message.sources} /> <MessageSources sources={message.sources} />
</div> </div>
)} )}

View File

@@ -254,7 +254,7 @@ const ModelSelector = ({
{isExpanded && ( {isExpanded && (
<div className="pl-6"> <div className="pl-6">
{provider.models.map((modelOption) => ( {provider.models.map((modelOption) => (
<button <PopoverButton
key={`${modelOption.provider}-${modelOption.model}`} key={`${modelOption.provider}-${modelOption.model}`}
className={cn( className={cn(
'w-full text-left px-4 py-2 text-sm flex items-center', 'w-full text-left px-4 py-2 text-sm flex items-center',
@@ -283,7 +283,7 @@ const ModelSelector = ({
Active Active
</div> </div>
)} )}
</button> </PopoverButton>
))} ))}
</div> </div>
)} )}

View File

@@ -1,5 +1,5 @@
export const webSearchRetrieverPrompt = ` export const webSearchRetrieverPrompt = `
You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it. You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it. You should condense the question to its essence and remove any unnecessary details. You should also make sure that the question is clear and easy to understand. You should not add any new information or change the meaning of the question. You should also make sure that the question is grammatically correct and free of spelling errors.
If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic). If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block. If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response. You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
@@ -49,6 +49,21 @@ summarize
https://example.com https://example.com
</links> </links>
\` \`
6. Follow-up question: Get the current F1 constructor standings and return the results in a table
Rephrased question: \`
<question>
Current F1 constructor standings
</question>
\`
7. Follow-up question: What are the top 10 restaurants in New York? Show the results in a table and include a short description of each restaurant.
Rephrased question: \`
<question>
Top 10 restaurants in New York
</question>
\`
</examples> </examples>
Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above. Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above.

View File

@@ -55,6 +55,7 @@ type BasicChainInput = {
class MetaSearchAgent implements MetaSearchAgentType { class MetaSearchAgent implements MetaSearchAgentType {
private config: Config; private config: Config;
private strParser = new StringOutputParser(); private strParser = new StringOutputParser();
private searchQuery?: string;
constructor(config: Config) { constructor(config: Config) {
this.config = config; this.config = config;
@@ -226,7 +227,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
}), }),
); );
return { query: question, docs: documents }; return { query: question, docs: documents, searchQuery: question };
} }
}), }),
]); ]);
@@ -264,6 +265,11 @@ class MetaSearchAgent implements MetaSearchAgentType {
query = searchRetrieverResult.query; query = searchRetrieverResult.query;
docs = searchRetrieverResult.docs; docs = searchRetrieverResult.docs;
// Store the search query in the context for emitting to the client
if (searchRetrieverResult.searchQuery) {
this.searchQuery = searchRetrieverResult.searchQuery;
}
} }
const sortedDocs = await this.rerankDocs( const sortedDocs = await this.rerankDocs(
@@ -441,10 +447,24 @@ class MetaSearchAgent implements MetaSearchAgentType {
event.event === 'on_chain_end' && event.event === 'on_chain_end' &&
event.name === 'FinalSourceRetriever' event.name === 'FinalSourceRetriever'
) { ) {
emitter.emit( // Add searchQuery to the sources data if it exists
'data', const sourcesData = event.data.output;
JSON.stringify({ type: 'sources', data: event.data.output }), // @ts-ignore - we added searchQuery property
); if (this.searchQuery) {
emitter.emit(
'data',
JSON.stringify({
type: 'sources',
data: sourcesData,
searchQuery: this.searchQuery
}),
);
} else {
emitter.emit(
'data',
JSON.stringify({ type: 'sources', data: sourcesData }),
);
}
} }
if ( if (
event.event === 'on_chain_stream' && event.event === 'on_chain_stream' &&