mirror of
				https://github.com/ItzCrazyKns/Perplexica.git
				synced 2025-10-30 19:08:15 +00:00 
			
		
		
		
	Compare commits
	
		
			5 Commits
		
	
	
		
			feat/impro
			...
			feat/struc
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | df33229934 | ||
|  | 49fafaa096 | ||
|  | ca9b32a23b | ||
|  | 76e3ff4e02 | ||
|  | eabf3ca7d3 | 
							
								
								
									
										13
									
								
								package.json
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								package.json
									
									
									
									
									
								
							| @@ -15,11 +15,12 @@ | ||||
|     "@headlessui/react": "^2.2.0", | ||||
|     "@iarna/toml": "^2.2.5", | ||||
|     "@icons-pack/react-simple-icons": "^12.3.0", | ||||
|     "@langchain/anthropic": "^0.3.15", | ||||
|     "@langchain/community": "^0.3.36", | ||||
|     "@langchain/core": "^0.3.42", | ||||
|     "@langchain/google-genai": "^0.1.12", | ||||
|     "@langchain/openai": "^0.0.25", | ||||
|     "@langchain/anthropic": "^0.3.24", | ||||
|     "@langchain/community": "^0.3.49", | ||||
|     "@langchain/core": "^0.3.66", | ||||
|     "@langchain/google-genai": "^0.2.15", | ||||
|     "@langchain/ollama": "^0.2.3", | ||||
|     "@langchain/openai": "^0.6.2", | ||||
|     "@langchain/textsplitters": "^0.1.0", | ||||
|     "@tailwindcss/typography": "^0.5.12", | ||||
|     "@xenova/transformers": "^2.17.2", | ||||
| @@ -31,7 +32,7 @@ | ||||
|     "drizzle-orm": "^0.40.1", | ||||
|     "html-to-text": "^9.0.5", | ||||
|     "jspdf": "^3.0.1", | ||||
|     "langchain": "^0.1.30", | ||||
|     "langchain": "^0.3.30", | ||||
|     "lucide-react": "^0.363.0", | ||||
|     "mammoth": "^1.9.1", | ||||
|     "markdown-to-jsx": "^7.7.2", | ||||
|   | ||||
| @@ -223,7 +223,7 @@ export const POST = async (req: Request) => { | ||||
|  | ||||
|     if (body.chatModel?.provider === 'custom_openai') { | ||||
|       llm = new ChatOpenAI({ | ||||
|         openAIApiKey: getCustomOpenaiApiKey(), | ||||
|         apiKey: getCustomOpenaiApiKey(), | ||||
|         modelName: getCustomOpenaiModelName(), | ||||
|         temperature: 0.7, | ||||
|         configuration: { | ||||
|   | ||||
| @@ -49,7 +49,7 @@ export const POST = async (req: Request) => { | ||||
|  | ||||
|     if (body.chatModel?.provider === 'custom_openai') { | ||||
|       llm = new ChatOpenAI({ | ||||
|         openAIApiKey: getCustomOpenaiApiKey(), | ||||
|         apiKey: getCustomOpenaiApiKey(), | ||||
|         modelName: getCustomOpenaiModelName(), | ||||
|         temperature: 0.7, | ||||
|         configuration: { | ||||
|   | ||||
| @@ -81,7 +81,7 @@ export const POST = async (req: Request) => { | ||||
|     if (body.chatModel?.provider === 'custom_openai') { | ||||
|       llm = new ChatOpenAI({ | ||||
|         modelName: body.chatModel?.name || getCustomOpenaiModelName(), | ||||
|         openAIApiKey: | ||||
|         apiKey: | ||||
|           body.chatModel?.customOpenAIKey || getCustomOpenaiApiKey(), | ||||
|         temperature: 0.7, | ||||
|         configuration: { | ||||
|   | ||||
| @@ -48,7 +48,7 @@ export const POST = async (req: Request) => { | ||||
|  | ||||
|     if (body.chatModel?.provider === 'custom_openai') { | ||||
|       llm = new ChatOpenAI({ | ||||
|         openAIApiKey: getCustomOpenaiApiKey(), | ||||
|         apiKey: getCustomOpenaiApiKey(), | ||||
|         modelName: getCustomOpenaiModelName(), | ||||
|         temperature: 0.7, | ||||
|         configuration: { | ||||
|   | ||||
| @@ -49,7 +49,7 @@ export const POST = async (req: Request) => { | ||||
|  | ||||
|     if (body.chatModel?.provider === 'custom_openai') { | ||||
|       llm = new ChatOpenAI({ | ||||
|         openAIApiKey: getCustomOpenaiApiKey(), | ||||
|         apiKey: getCustomOpenaiApiKey(), | ||||
|         modelName: getCustomOpenaiModelName(), | ||||
|         temperature: 0.7, | ||||
|         configuration: { | ||||
|   | ||||
| @@ -1,63 +1,41 @@ | ||||
| export const webSearchRetrieverPrompt = ` | ||||
| You are an AI question rephraser. You will be given a conversation and a follow-up question,  you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it. | ||||
| If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic). | ||||
| If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block. | ||||
| You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response. | ||||
| You are an AI question rephraser. You will be given a conversation and a follow-up question; rephrase it into a standalone question that another LLM can use to search the web. | ||||
|  | ||||
| There are several examples attached for your reference inside the below \`examples\` XML block | ||||
| Return ONLY a JSON object that matches this schema: | ||||
| query: string   // the standalone question (or "summarize") | ||||
| links: string[] // URLs extracted from the user query (empty if none) | ||||
| searchRequired: boolean // true if web search is needed, false for greetings/simple writing tasks | ||||
| searchMode: "" | "normal" | "news" // "" when searchRequired is false; "news" if the user asks for news/articles, otherwise "normal" | ||||
|  | ||||
| <examples> | ||||
| 1. Follow up question: What is the capital of France | ||||
| Rephrased question:\` | ||||
| <question> | ||||
| Capital of france | ||||
| </question> | ||||
| \` | ||||
| Rules | ||||
| - Greetings / simple writing tasks → query:"", links:[], searchRequired:false, searchMode:"" | ||||
| - Summarizing a URL → query:"summarize", links:[url...], searchRequired:true, searchMode:"normal" | ||||
| - Asking for news/articles → searchMode:"news" | ||||
|  | ||||
| Examples | ||||
| 1. Follow-up: What is the capital of France? | ||||
| "query":"capital of France","links":[],"searchRequired":true,"searchMode":"normal" | ||||
|  | ||||
| 2. Hi, how are you? | ||||
| Rephrased question\` | ||||
| <question> | ||||
| not_needed | ||||
| </question> | ||||
| \` | ||||
| "query":"","links":[],"searchRequired":false,"searchMode":"" | ||||
|  | ||||
| 3. Follow up question: What is Docker? | ||||
| Rephrased question: \` | ||||
| <question> | ||||
| What is Docker | ||||
| </question> | ||||
| \` | ||||
| 3. Follow-up: What is Docker? | ||||
| "query":"what is Docker","links":[],"searchRequired":true,"searchMode":"normal" | ||||
|  | ||||
| 4. Follow up question: Can you tell me what is X from https://example.com | ||||
| Rephrased question: \` | ||||
| <question> | ||||
| Can you tell me what is X? | ||||
| </question> | ||||
| 4. Follow-up: Can you tell me what is X from https://example.com? | ||||
| "query":"what is X","links":["https://example.com"],"searchRequired":true,"searchMode":"normal" | ||||
|  | ||||
| <links> | ||||
| https://example.com | ||||
| </links> | ||||
| \` | ||||
| 5. Follow-up: Summarize the content from https://example.com | ||||
| "query":"summarize","links":["https://example.com"],"searchRequired":true,"searchMode":"normal" | ||||
|  | ||||
| 5. Follow up question: Summarize the content from https://example.com | ||||
| Rephrased question: \` | ||||
| <question> | ||||
| summarize | ||||
| </question> | ||||
|  | ||||
| <links> | ||||
| https://example.com | ||||
| </links> | ||||
| \` | ||||
| </examples> | ||||
|  | ||||
| Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above. | ||||
| 6. Follow-up: Latest news about AI | ||||
| "query":"latest news about AI","links":[],"searchRequired":true,"searchMode":"news" | ||||
|  | ||||
| <conversation> | ||||
| {chat_history} | ||||
| </conversation> | ||||
|  | ||||
| Follow up question: {query} | ||||
| Follow-up question: {query} | ||||
| Rephrased question: | ||||
| `; | ||||
|  | ||||
|   | ||||
| @@ -38,7 +38,7 @@ export const loadAimlApiChatModels = async () => { | ||||
|         chatModels[model.id] = { | ||||
|           displayName: model.name || model.id, | ||||
|           model: new ChatOpenAI({ | ||||
|             openAIApiKey: apiKey, | ||||
|             apiKey: apiKey, | ||||
|             modelName: model.id, | ||||
|             temperature: 0.7, | ||||
|             configuration: { | ||||
| @@ -76,7 +76,7 @@ export const loadAimlApiEmbeddingModels = async () => { | ||||
|         embeddingModels[model.id] = { | ||||
|           displayName: model.name || model.id, | ||||
|           model: new OpenAIEmbeddings({ | ||||
|             openAIApiKey: apiKey, | ||||
|             apiKey: apiKey, | ||||
|             modelName: model.id, | ||||
|             configuration: { | ||||
|               baseURL: API_URL, | ||||
|   | ||||
| @@ -31,7 +31,7 @@ export const loadDeepseekChatModels = async () => { | ||||
|       chatModels[model.key] = { | ||||
|         displayName: model.displayName, | ||||
|         model: new ChatOpenAI({ | ||||
|           openAIApiKey: deepseekApiKey, | ||||
|           apiKey: deepseekApiKey, | ||||
|           modelName: model.key, | ||||
|           temperature: 0.7, | ||||
|           configuration: { | ||||
|   | ||||
| @@ -29,12 +29,15 @@ export const loadGroqChatModels = async () => { | ||||
|       chatModels[model.id] = { | ||||
|         displayName: model.id, | ||||
|         model: new ChatOpenAI({ | ||||
|           openAIApiKey: groqApiKey, | ||||
|           apiKey: groqApiKey, | ||||
|           modelName: model.id, | ||||
|           temperature: 0.7, | ||||
|           configuration: { | ||||
|             baseURL: 'https://api.groq.com/openai/v1', | ||||
|           }, | ||||
|           metadata: { | ||||
|             'model-type': 'groq', | ||||
|           }, | ||||
|         }) as unknown as BaseChatModel, | ||||
|       }; | ||||
|     }); | ||||
|   | ||||
| @@ -118,7 +118,7 @@ export const getAvailableChatModelProviders = async () => { | ||||
|           [customOpenAiModelName]: { | ||||
|             displayName: customOpenAiModelName, | ||||
|             model: new ChatOpenAI({ | ||||
|               openAIApiKey: customOpenAiApiKey, | ||||
|               apiKey: customOpenAiApiKey, | ||||
|               modelName: customOpenAiModelName, | ||||
|               temperature: 0.7, | ||||
|               configuration: { | ||||
|   | ||||
| @@ -47,7 +47,7 @@ export const loadLMStudioChatModels = async () => { | ||||
|       chatModels[model.id] = { | ||||
|         displayName: model.name || model.id, | ||||
|         model: new ChatOpenAI({ | ||||
|           openAIApiKey: 'lm-studio', | ||||
|           apiKey: 'lm-studio', | ||||
|           configuration: { | ||||
|             baseURL: ensureV1Endpoint(endpoint), | ||||
|           }, | ||||
| @@ -83,7 +83,7 @@ export const loadLMStudioEmbeddingsModels = async () => { | ||||
|       embeddingsModels[model.id] = { | ||||
|         displayName: model.name || model.id, | ||||
|         model: new OpenAIEmbeddings({ | ||||
|           openAIApiKey: 'lm-studio', | ||||
|           apiKey: 'lm-studio', | ||||
|           configuration: { | ||||
|             baseURL: ensureV1Endpoint(endpoint), | ||||
|           }, | ||||
|   | ||||
| @@ -6,8 +6,8 @@ export const PROVIDER_INFO = { | ||||
|   key: 'ollama', | ||||
|   displayName: 'Ollama', | ||||
| }; | ||||
| import { ChatOllama } from '@langchain/community/chat_models/ollama'; | ||||
| import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama'; | ||||
| import { ChatOllama } from '@langchain/ollama'; | ||||
| import { OllamaEmbeddings } from '@langchain/ollama'; | ||||
|  | ||||
| export const loadOllamaChatModels = async () => { | ||||
|   const ollamaApiEndpoint = getOllamaApiEndpoint(); | ||||
|   | ||||
| @@ -67,7 +67,7 @@ export const loadOpenAIChatModels = async () => { | ||||
|       chatModels[model.key] = { | ||||
|         displayName: model.displayName, | ||||
|         model: new ChatOpenAI({ | ||||
|           openAIApiKey: openaiApiKey, | ||||
|           apiKey: openaiApiKey, | ||||
|           modelName: model.key, | ||||
|           temperature: 0.7, | ||||
|         }) as unknown as BaseChatModel, | ||||
| @@ -93,7 +93,7 @@ export const loadOpenAIEmbeddingModels = async () => { | ||||
|       embeddingModels[model.key] = { | ||||
|         displayName: model.displayName, | ||||
|         model: new OpenAIEmbeddings({ | ||||
|           openAIApiKey: openaiApiKey, | ||||
|           apiKey: openaiApiKey, | ||||
|           modelName: model.key, | ||||
|         }) as unknown as Embeddings, | ||||
|       }; | ||||
|   | ||||
| @@ -24,6 +24,7 @@ import computeSimilarity from '../utils/computeSimilarity'; | ||||
| import formatChatHistoryAsString from '../utils/formatHistory'; | ||||
| import eventEmitter from 'events'; | ||||
| import { StreamEvent } from '@langchain/core/tracers/log_stream'; | ||||
| import { z } from 'zod'; | ||||
|  | ||||
| export interface MetaSearchAgentType { | ||||
|   searchAndAnswer: ( | ||||
| @@ -52,6 +53,17 @@ type BasicChainInput = { | ||||
|   query: string; | ||||
| }; | ||||
|  | ||||
| const retrieverLLMOutputSchema = z.object({ | ||||
|   query: z.string().describe('The query to search the web for.'), | ||||
|   links: z | ||||
|     .array(z.string()) | ||||
|     .describe('The links to search/summarize if present'), | ||||
|   searchRequired: z | ||||
|     .boolean() | ||||
|     .describe('Wether there is a need to search the web'), | ||||
|   searchMode: z.enum(['', 'normal', 'news']).describe('The search mode.'), | ||||
| }); | ||||
|  | ||||
| class MetaSearchAgent implements MetaSearchAgentType { | ||||
|   private config: Config; | ||||
|   private strParser = new StringOutputParser(); | ||||
| @@ -62,73 +74,71 @@ class MetaSearchAgent implements MetaSearchAgentType { | ||||
|  | ||||
|   private async createSearchRetrieverChain(llm: BaseChatModel) { | ||||
|     (llm as unknown as ChatOpenAI).temperature = 0; | ||||
|  | ||||
|     return RunnableSequence.from([ | ||||
|       PromptTemplate.fromTemplate(this.config.queryGeneratorPrompt), | ||||
|       llm, | ||||
|       this.strParser, | ||||
|       RunnableLambda.from(async (input: string) => { | ||||
|         const linksOutputParser = new LineListOutputParser({ | ||||
|           key: 'links', | ||||
|         }); | ||||
|       Object.assign( | ||||
|         Object.create(Object.getPrototypeOf(llm)), | ||||
|         llm, | ||||
|       ).withStructuredOutput(retrieverLLMOutputSchema, { | ||||
|         ...(llm.metadata?.['model-type'] === 'groq' | ||||
|           ? { | ||||
|               method: 'json-object', | ||||
|             } | ||||
|           : {}), | ||||
|       }), | ||||
|       RunnableLambda.from( | ||||
|         async (input: z.infer<typeof retrieverLLMOutputSchema>) => { | ||||
|           let question = input.query; | ||||
|           const links = input.links; | ||||
|  | ||||
|         const questionOutputParser = new LineOutputParser({ | ||||
|           key: 'question', | ||||
|         }); | ||||
|  | ||||
|         const links = await linksOutputParser.parse(input); | ||||
|         let question = this.config.summarizer | ||||
|           ? await questionOutputParser.parse(input) | ||||
|           : input; | ||||
|  | ||||
|         if (question === 'not_needed') { | ||||
|           return { query: '', docs: [] }; | ||||
|         } | ||||
|  | ||||
|         if (links.length > 0) { | ||||
|           if (question.length === 0) { | ||||
|             question = 'summarize'; | ||||
|           if (!input.searchRequired) { | ||||
|             return { query: '', docs: [] }; | ||||
|           } | ||||
|  | ||||
|           let docs: Document[] = []; | ||||
|  | ||||
|           const linkDocs = await getDocumentsFromLinks({ links }); | ||||
|  | ||||
|           const docGroups: Document[] = []; | ||||
|  | ||||
|           linkDocs.map((doc) => { | ||||
|             const URLDocExists = docGroups.find( | ||||
|               (d) => | ||||
|                 d.metadata.url === doc.metadata.url && | ||||
|                 d.metadata.totalDocs < 10, | ||||
|             ); | ||||
|  | ||||
|             if (!URLDocExists) { | ||||
|               docGroups.push({ | ||||
|                 ...doc, | ||||
|                 metadata: { | ||||
|                   ...doc.metadata, | ||||
|                   totalDocs: 1, | ||||
|                 }, | ||||
|               }); | ||||
|           if (links.length > 0) { | ||||
|             if (question.length === 0) { | ||||
|               question = 'summarize'; | ||||
|             } | ||||
|  | ||||
|             const docIndex = docGroups.findIndex( | ||||
|               (d) => | ||||
|                 d.metadata.url === doc.metadata.url && | ||||
|                 d.metadata.totalDocs < 10, | ||||
|             ); | ||||
|             let docs: Document[] = []; | ||||
|  | ||||
|             if (docIndex !== -1) { | ||||
|               docGroups[docIndex].pageContent = | ||||
|                 docGroups[docIndex].pageContent + `\n\n` + doc.pageContent; | ||||
|               docGroups[docIndex].metadata.totalDocs += 1; | ||||
|             } | ||||
|           }); | ||||
|             const linkDocs = await getDocumentsFromLinks({ links }); | ||||
|  | ||||
|           await Promise.all( | ||||
|             docGroups.map(async (doc) => { | ||||
|               const res = await llm.invoke(` | ||||
|             const docGroups: Document[] = []; | ||||
|  | ||||
|             linkDocs.map((doc) => { | ||||
|               const URLDocExists = docGroups.find( | ||||
|                 (d) => | ||||
|                   d.metadata.url === doc.metadata.url && | ||||
|                   d.metadata.totalDocs < 10, | ||||
|               ); | ||||
|  | ||||
|               if (!URLDocExists) { | ||||
|                 docGroups.push({ | ||||
|                   ...doc, | ||||
|                   metadata: { | ||||
|                     ...doc.metadata, | ||||
|                     totalDocs: 1, | ||||
|                   }, | ||||
|                 }); | ||||
|               } | ||||
|  | ||||
|               const docIndex = docGroups.findIndex( | ||||
|                 (d) => | ||||
|                   d.metadata.url === doc.metadata.url && | ||||
|                   d.metadata.totalDocs < 10, | ||||
|               ); | ||||
|  | ||||
|               if (docIndex !== -1) { | ||||
|                 docGroups[docIndex].pageContent = | ||||
|                   docGroups[docIndex].pageContent + `\n\n` + doc.pageContent; | ||||
|                 docGroups[docIndex].metadata.totalDocs += 1; | ||||
|               } | ||||
|             }); | ||||
|  | ||||
|             await Promise.all( | ||||
|               docGroups.map(async (doc) => { | ||||
|                 const res = await llm.invoke(` | ||||
|             You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the  | ||||
|             text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query. | ||||
|             If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary. | ||||
| @@ -189,46 +199,50 @@ class MetaSearchAgent implements MetaSearchAgentType { | ||||
|             Make sure to answer the query in the summary. | ||||
|           `); | ||||
|  | ||||
|               const document = new Document({ | ||||
|                 pageContent: res.content as string, | ||||
|                 metadata: { | ||||
|                   title: doc.metadata.title, | ||||
|                   url: doc.metadata.url, | ||||
|                 }, | ||||
|               }); | ||||
|                 const document = new Document({ | ||||
|                   pageContent: res.content as string, | ||||
|                   metadata: { | ||||
|                     title: doc.metadata.title, | ||||
|                     url: doc.metadata.url, | ||||
|                   }, | ||||
|                 }); | ||||
|  | ||||
|               docs.push(document); | ||||
|             }), | ||||
|           ); | ||||
|  | ||||
|           return { query: question, docs: docs }; | ||||
|         } else { | ||||
|           question = question.replace(/<think>.*?<\/think>/g, ''); | ||||
|  | ||||
|           const res = await searchSearxng(question, { | ||||
|             language: 'en', | ||||
|             engines: this.config.activeEngines, | ||||
|           }); | ||||
|  | ||||
|           const documents = res.results.map( | ||||
|             (result) => | ||||
|               new Document({ | ||||
|                 pageContent: | ||||
|                   result.content || | ||||
|                   (this.config.activeEngines.includes('youtube') | ||||
|                     ? result.title | ||||
|                     : '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */, | ||||
|                 metadata: { | ||||
|                   title: result.title, | ||||
|                   url: result.url, | ||||
|                   ...(result.img_src && { img_src: result.img_src }), | ||||
|                 }, | ||||
|                 docs.push(document); | ||||
|               }), | ||||
|           ); | ||||
|             ); | ||||
|  | ||||
|           return { query: question, docs: documents }; | ||||
|         } | ||||
|       }), | ||||
|             return { query: question, docs: docs }; | ||||
|           } else { | ||||
|             question = question.replace(/<think>.*?<\/think>/g, ''); | ||||
|  | ||||
|             const res = await searchSearxng(question, { | ||||
|               language: 'en', | ||||
|               engines: | ||||
|                 input.searchMode === 'normal' | ||||
|                   ? this.config.activeEngines | ||||
|                   : ['bing news'], | ||||
|             }); | ||||
|  | ||||
|             const documents = res.results.map( | ||||
|               (result) => | ||||
|                 new Document({ | ||||
|                   pageContent: | ||||
|                     result.content || | ||||
|                     (this.config.activeEngines.includes('youtube') | ||||
|                       ? result.title | ||||
|                       : '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */, | ||||
|                   metadata: { | ||||
|                     title: result.title, | ||||
|                     url: result.url, | ||||
|                     ...(result.img_src && { img_src: result.img_src }), | ||||
|                   }, | ||||
|                 }), | ||||
|             ); | ||||
|  | ||||
|             return { query: question, docs: documents }; | ||||
|           } | ||||
|         }, | ||||
|       ), | ||||
|     ]); | ||||
|   } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user