Merge f65b168388 into 68e151b2bd

feat(UI): Add the search query to the response.
Also, tweaked the search retriever prompt so it gives better search queries.
2025-11-24 14:08:14 +00:00 · 2025-05-07 07:16:55 +00:00 · 2025-05-07 01:16:51 -06:00 · 2025-05-06 23:45:46 -06:00
7 changed files with 172 additions and 27 deletions
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,94 @@
+# GitHub Copilot Instructions for Perplexica
+
+This file provides context and guidance for GitHub Copilot when working with the Perplexica codebase.
+
+## Project Overview
+
+Perplexica is an open-source AI-powered search engine that uses advanced machine learning to provide intelligent search results. It combines web search capabilities with LLM-based processing to understand and answer user questions, similar to Perplexity AI but fully open source.
+
+## Key Components
+
+- **Frontend**: Next.js application with React components (in `/src/components` and `/src/app`)
+- **Backend Logic**: Node.js backend with API routes (in `/src/app/api`) and library code (in `/src/lib`)
+- **Search Engine**: Uses SearXNG as a metadata search engine
+- **LLM Integration**: Supports multiple models including OpenAI, Anthropic, Groq, Ollama (local models)
+- **Database**: SQLite database managed with Drizzle ORM
+
+## Architecture
+
+The system works through these main steps:
+
+- User submits a query
+- The system determines if web search is needed
+- If needed, it searches the web using SearXNG
+- Results are ranked using embedding-based similarity search
+- LLMs are used to generate a comprehensive response with cited sources
+
+## Key Technologies
+
+- **Frontend**: React, Next.js, Tailwind CSS
+- **Backend**: Node.js
+- **Database**: SQLite with Drizzle ORM
+- **AI/ML**: LangChain for orchestration, various LLM providers
+- **Search**: SearXNG integration
+- **Embedding Models**: For re-ranking search results
+
+## Project Structure
+
+- `/src/app`: Next.js app directory with page components and API routes
+- `/src/components`: Reusable UI components
+- `/src/lib`: Backend functionality
+  - `/lib/search`: Search functionality and meta search agent
+  - `/lib/db`: Database schema and operations
+  - `/lib/providers`: LLM and embedding model integrations
+  - `/lib/prompts`: Prompt templates for LLMs
+  - `/lib/chains`: LangChain chains for various operations
+
+## Focus Modes
+
+Perplexica supports multiple specialized search modes:
+
+- All Mode: General web search
+- Local Research Mode: Research and interact with local files with citations
+- Chat Mode: Have a creative conversation
+- Academic Search Mode: For academic research
+- YouTube Search Mode: For video content
+- Wolfram Alpha Search Mode: For calculations and data analysis
+- Reddit Search Mode: For community discussions
+
+## Development Workflow
+
+- Use `npm run dev` for local development
+- Format code with `npm run format:write` before committing
+- Database migrations: `npm run db:push`
+- Build for production: `npm run build`
+- Start production server: `npm run start`
+
+## Configuration
+
+The application uses a `config.toml` file (created from `sample.config.toml`) for configuration, including:
+
+- API keys for various LLM providers
+- Database settings
+- Search engine configuration
+- Similarity measure settings
+
+## Common Tasks
+
+When working on this codebase, you might need to:
+
+- Add new API endpoints in `/src/app/api`
+- Modify UI components in `/src/components`
+- Extend search functionality in `/src/lib/search`
+- Add new LLM providers in `/src/lib/providers`
+- Update database schema in `/src/lib/db/schema.ts`
+- Create new prompt templates in `/src/lib/prompts`
+- Build new chains in `/src/lib/chains`
+
+## AI Behavior
+
+- Avoid conciliatory language
+- It is not necessary to apologize
+- If you don't know the answer, ask for clarification
+- Do not add additional packages or dependencies unless explicitly requested
+- Only make changes to the code that are relevant to the task at hand
--- a/src/app/api/chat/route.ts
+++ b/src/app/api/chat/route.ts
@@ -1,27 +1,23 @@
-import prompts from '@/lib/prompts';
-import MetaSearchAgent from '@/lib/search/metaSearchAgent';
-import crypto from 'crypto';
-import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
-import { EventEmitter } from 'stream';
-import {
-  chatModelProviders,
-  embeddingModelProviders,
-  getAvailableChatModelProviders,
-  getAvailableEmbeddingModelProviders,
-} from '@/lib/providers';
-import db from '@/lib/db';
-import { chats, messages as messagesSchema } from '@/lib/db/schema';
-import { and, eq, gt } from 'drizzle-orm';
-import { getFileDetails } from '@/lib/utils/files';
-import { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import { ChatOpenAI } from '@langchain/openai';
 import {
  getCustomOpenaiApiKey,
  getCustomOpenaiApiUrl,
  getCustomOpenaiModelName,
 } from '@/lib/config';
-import { ChatOllama } from '@langchain/ollama';
+import db from '@/lib/db';
+import { chats, messages as messagesSchema } from '@/lib/db/schema';
+import {
+  getAvailableChatModelProviders,
+  getAvailableEmbeddingModelProviders
+} from '@/lib/providers';
 import { searchHandlers } from '@/lib/search';
+import { getFileDetails } from '@/lib/utils/files';
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
+import { ChatOllama } from '@langchain/ollama';
+import { ChatOpenAI } from '@langchain/openai';
+import crypto from 'crypto';
+import { and, eq, gte } from 'drizzle-orm';
+import { EventEmitter } from 'stream';

 export const runtime = 'nodejs';
 export const dynamic = 'force-dynamic';
@@ -69,6 +65,7 @@ const handleEmitterEvents = async (
 ) => {
  let recievedMessage = '';
  let sources: any[] = [];
+  let searchQuery: string | undefined;

  stream.on('data', (data) => {
    const parsedData = JSON.parse(data);
@@ -85,11 +82,17 @@ const handleEmitterEvents = async (

      recievedMessage += parsedData.data;
    } else if (parsedData.type === 'sources') {
+      // Capture the search query if available
+      if (parsedData.searchQuery) {
+        searchQuery = parsedData.searchQuery;
+      }
+
      writer.write(
        encoder.encode(
          JSON.stringify({
            type: 'sources',
            data: parsedData.data,
+            searchQuery: parsedData.searchQuery,
            messageId: aiMessageId,
          }) + '\n',
        ),
@@ -124,6 +127,7 @@ const handleEmitterEvents = async (
          type: 'messageEnd',
          messageId: aiMessageId,
          modelStats: modelStats,
+          searchQuery: searchQuery,
        }) + '\n',
      ),
    );
@@ -138,6 +142,7 @@ const handleEmitterEvents = async (
        metadata: JSON.stringify({
          createdAt: new Date(),
          ...(sources && sources.length > 0 && { sources }),
+          ...(searchQuery && { searchQuery }),
          modelStats: modelStats,
        }),
      })
@@ -202,7 +207,7 @@ const handleHistorySave = async (
      .delete(messagesSchema)
      .where(
        and(
-          gt(messagesSchema.id, messageExists.id),
+          gte(messagesSchema.id, messageExists.id),
          eq(messagesSchema.chatId, message.chatId),
        ),
      )
--- a/src/components/ChatWindow.tsx
+++ b/src/components/ChatWindow.tsx
@@ -27,6 +27,7 @@ export type Message = {
  suggestions?: string[];
  sources?: Document[];
  modelStats?: ModelStats;
+  searchQuery?: string;
 };

 export interface File {
@@ -416,6 +417,7 @@ const ChatWindow = ({ id }: { id?: string }) => {

      if (data.type === 'sources') {
        sources = data.data;
+        const searchQuery = data.searchQuery;
        if (!added) {
          setMessages((prevMessages) => [
            ...prevMessages,
@@ -425,6 +427,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
              chatId: chatId!,
              role: 'assistant',
              sources: sources,
+              searchQuery: searchQuery,
              createdAt: new Date(),
            },
          ]);
@@ -481,6 +484,8 @@ const ChatWindow = ({ id }: { id?: string }) => {
                ...message,
                // Include model stats if available, otherwise null
                modelStats: data.modelStats || null,
+                // Make sure the searchQuery is preserved (if available in the message data)
+                searchQuery: message.searchQuery || data.searchQuery,
              };
            }
            return message;
--- a/src/components/MessageBox.tsx
+++ b/src/components/MessageBox.tsx
@@ -278,6 +278,12 @@ const MessageBox = ({
                    Sources
                  </h3>
                </div>
+                {message.searchQuery && (
+                  <div className="mb-2 text-sm bg-light-secondary dark:bg-dark-secondary rounded-lg p-3">
+                    <span className="font-medium text-black/70 dark:text-white/70">Search query:</span>{' '}
+                    <span className="text-black dark:text-white">{message.searchQuery}</span>
+                  </div>
+                )}
                <MessageSources sources={message.sources} />
              </div>
            )}
--- a/src/components/MessageInputActions/ModelSelector.tsx
+++ b/src/components/MessageInputActions/ModelSelector.tsx
@@ -254,7 +254,7 @@ const ModelSelector = ({
                            {isExpanded && (
                              <div className="pl-6">
                                {provider.models.map((modelOption) => (
-                                  <button
+                                  <PopoverButton
                                    key={`${modelOption.provider}-${modelOption.model}`}
                                    className={cn(
                                      'w-full text-left px-4 py-2 text-sm flex items-center',
@@ -283,7 +283,7 @@ const ModelSelector = ({
                                          Active
                                        </div>
                                      )}
-                                  </button>
+                                  </PopoverButton>
                                ))}
                              </div>
                            )}
--- a/src/lib/prompts/webSearch.ts
+++ b/src/lib/prompts/webSearch.ts
@@ -1,5 +1,5 @@
 export const webSearchRetrieverPrompt = `
-You are an AI question rephraser. You will be given a conversation and a follow-up question,  you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it.
+You are an AI question rephraser. You will be given a conversation and a follow-up question,  you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it. You should condense the question to its essence and remove any unnecessary details. You should also make sure that the question is clear and easy to understand. You should not add any new information or change the meaning of the question. You should also make sure that the question is grammatically correct and free of spelling errors.
 If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
 If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
 You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
@@ -49,6 +49,21 @@ summarize
 https://example.com
 </links>
 \`
+
+6. Follow-up question: Get the current F1 constructor standings and return the results in a table
+Rephrased question: \`
+<question>
+Current F1 constructor standings
+</question>
+\`
+
+7. Follow-up question: What are the top 10 restaurants in New York? Show the results in a table and include a short description of each restaurant.
+Rephrased question: \`
+<question>
+Top 10 restaurants in New York
+</question>
+\`
+
 </examples>

 Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above.
--- a/src/lib/search/metaSearchAgent.ts
+++ b/src/lib/search/metaSearchAgent.ts
@@ -55,6 +55,7 @@ type BasicChainInput = {
 class MetaSearchAgent implements MetaSearchAgentType {
  private config: Config;
  private strParser = new StringOutputParser();
+  private searchQuery?: string;

  constructor(config: Config) {
    this.config = config;
@@ -226,7 +227,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
              }),
          );

-          return { query: question, docs: documents };
+          return { query: question, docs: documents, searchQuery: question };
        }
      }),
    ]);
@@ -264,6 +265,11 @@ class MetaSearchAgent implements MetaSearchAgentType {

            query = searchRetrieverResult.query;
            docs = searchRetrieverResult.docs;
+            
+            // Store the search query in the context for emitting to the client
+            if (searchRetrieverResult.searchQuery) {
+              this.searchQuery = searchRetrieverResult.searchQuery;
+            }
          }

          const sortedDocs = await this.rerankDocs(
@@ -441,10 +447,24 @@ class MetaSearchAgent implements MetaSearchAgentType {
        event.event === 'on_chain_end' &&
        event.name === 'FinalSourceRetriever'
      ) {
-        emitter.emit(
-          'data',
-          JSON.stringify({ type: 'sources', data: event.data.output }),
-        );
+        // Add searchQuery to the sources data if it exists
+        const sourcesData = event.data.output;
+        // @ts-ignore - we added searchQuery property
+        if (this.searchQuery) {
+          emitter.emit(
+            'data',
+            JSON.stringify({ 
+              type: 'sources', 
+              data: sourcesData,
+              searchQuery: this.searchQuery 
+            }),
+          );
+        } else {
+          emitter.emit(
+            'data',
+            JSON.stringify({ type: 'sources', data: sourcesData }),
+          );
+        }
      }
      if (
        event.event === 'on_chain_stream' &&
Author	SHA1	Message	Date
Willie Zutz	7d0cf97545	Merge `f65b168388` into `68e151b2bd`	2025-05-07 07:16:55 +00:00
Willie Zutz	f65b168388	feat(UI): Add the search query to the response. Also, tweaked the search retriever prompt so it gives better search queries.	2025-05-07 01:16:51 -06:00
Willie Zutz	8796009141	fix(api): History rewriting should delete the current message. fix(UI): Model changes shouldn't submit the form.	2025-05-06 23:45:46 -06:00