mirror of
				https://github.com/ItzCrazyKns/Perplexica.git
				synced 2025-11-03 20:28:14 +00:00 
			
		
		
		
	feat(agents): add a unified agent
This commit is contained in:
		
							
								
								
									
										486
									
								
								src/search/metaSearchAgent.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										486
									
								
								src/search/metaSearchAgent.ts
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,486 @@
 | 
				
			|||||||
 | 
					import { ChatOpenAI } from '@langchain/openai';
 | 
				
			||||||
 | 
					import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 | 
				
			||||||
 | 
					import type { Embeddings } from '@langchain/core/embeddings';
 | 
				
			||||||
 | 
					import {
 | 
				
			||||||
 | 
					  ChatPromptTemplate,
 | 
				
			||||||
 | 
					  MessagesPlaceholder,
 | 
				
			||||||
 | 
					  PromptTemplate,
 | 
				
			||||||
 | 
					} from '@langchain/core/prompts';
 | 
				
			||||||
 | 
					import {
 | 
				
			||||||
 | 
					  RunnableLambda,
 | 
				
			||||||
 | 
					  RunnableMap,
 | 
				
			||||||
 | 
					  RunnableSequence,
 | 
				
			||||||
 | 
					} from '@langchain/core/runnables';
 | 
				
			||||||
 | 
					import { BaseMessage } from '@langchain/core/messages';
 | 
				
			||||||
 | 
					import { StringOutputParser } from '@langchain/core/output_parsers';
 | 
				
			||||||
 | 
					import LineListOutputParser from '../lib/outputParsers/listLineOutputParser';
 | 
				
			||||||
 | 
					import LineOutputParser from '../lib/outputParsers/lineOutputParser';
 | 
				
			||||||
 | 
					import { getDocumentsFromLinks } from '../utils/documents';
 | 
				
			||||||
 | 
					import { Document } from 'langchain/document';
 | 
				
			||||||
 | 
					import { searchSearxng } from '../lib/searxng';
 | 
				
			||||||
 | 
					import path from 'path';
 | 
				
			||||||
 | 
					import fs from 'fs';
 | 
				
			||||||
 | 
					import computeSimilarity from '../utils/computeSimilarity';
 | 
				
			||||||
 | 
					import formatChatHistoryAsString from '../utils/formatHistory';
 | 
				
			||||||
 | 
					import eventEmitter from 'events';
 | 
				
			||||||
 | 
					import { StreamEvent } from '@langchain/core/tracers/log_stream';
 | 
				
			||||||
 | 
					import { IterableReadableStream } from '@langchain/core/utils/stream';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export interface MetaSearchAgentType {
 | 
				
			||||||
 | 
					  searchAndAnswer: (
 | 
				
			||||||
 | 
					    message: string,
 | 
				
			||||||
 | 
					    history: BaseMessage[],
 | 
				
			||||||
 | 
					    llm: BaseChatModel,
 | 
				
			||||||
 | 
					    embeddings: Embeddings,
 | 
				
			||||||
 | 
					    optimizationMode: 'speed' | 'balanced' | 'quality',
 | 
				
			||||||
 | 
					    fileIds: string[],
 | 
				
			||||||
 | 
					  ) => Promise<eventEmitter>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface Config {
 | 
				
			||||||
 | 
					  searchWeb: boolean;
 | 
				
			||||||
 | 
					  rerank: boolean;
 | 
				
			||||||
 | 
					  summarizer: boolean;
 | 
				
			||||||
 | 
					  rerankThreshold: number;
 | 
				
			||||||
 | 
					  queryGeneratorPrompt: string;
 | 
				
			||||||
 | 
					  responsePrompt: string;
 | 
				
			||||||
 | 
					  activeEngines: string[];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type BasicChainInput = {
 | 
				
			||||||
 | 
					  chat_history: BaseMessage[];
 | 
				
			||||||
 | 
					  query: string;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class MetaSearchAgent implements MetaSearchAgentType {
 | 
				
			||||||
 | 
					  private config: Config;
 | 
				
			||||||
 | 
					  private strParser = new StringOutputParser();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  constructor(config: Config) {
 | 
				
			||||||
 | 
					    this.config = config;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async createSearchRetrieverChain(llm: BaseChatModel) {
 | 
				
			||||||
 | 
					    (llm as unknown as ChatOpenAI).temperature = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return RunnableSequence.from([
 | 
				
			||||||
 | 
					      PromptTemplate.fromTemplate(this.config.queryGeneratorPrompt),
 | 
				
			||||||
 | 
					      llm,
 | 
				
			||||||
 | 
					      this.strParser,
 | 
				
			||||||
 | 
					      RunnableLambda.from(async (input: string) => {
 | 
				
			||||||
 | 
					        const linksOutputParser = new LineListOutputParser({
 | 
				
			||||||
 | 
					          key: 'links',
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const questionOutputParser = new LineOutputParser({
 | 
				
			||||||
 | 
					          key: 'question',
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const links = await linksOutputParser.parse(input);
 | 
				
			||||||
 | 
					        let question = this.config.summarizer
 | 
				
			||||||
 | 
					          ? await questionOutputParser.parse(input)
 | 
				
			||||||
 | 
					          : input;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (question === 'not_needed') {
 | 
				
			||||||
 | 
					          return { query: '', docs: [] };
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (links.length > 0) {
 | 
				
			||||||
 | 
					          if (question.length === 0) {
 | 
				
			||||||
 | 
					            question = 'summarize';
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          let docs = [];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          const linkDocs = await getDocumentsFromLinks({ links });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          const docGroups: Document[] = [];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          linkDocs.map((doc) => {
 | 
				
			||||||
 | 
					            const URLDocExists = docGroups.find(
 | 
				
			||||||
 | 
					              (d) =>
 | 
				
			||||||
 | 
					                d.metadata.url === doc.metadata.url &&
 | 
				
			||||||
 | 
					                d.metadata.totalDocs < 10,
 | 
				
			||||||
 | 
					            );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if (!URLDocExists) {
 | 
				
			||||||
 | 
					              docGroups.push({
 | 
				
			||||||
 | 
					                ...doc,
 | 
				
			||||||
 | 
					                metadata: {
 | 
				
			||||||
 | 
					                  ...doc.metadata,
 | 
				
			||||||
 | 
					                  totalDocs: 1,
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					              });
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            const docIndex = docGroups.findIndex(
 | 
				
			||||||
 | 
					              (d) =>
 | 
				
			||||||
 | 
					                d.metadata.url === doc.metadata.url &&
 | 
				
			||||||
 | 
					                d.metadata.totalDocs < 10,
 | 
				
			||||||
 | 
					            );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if (docIndex !== -1) {
 | 
				
			||||||
 | 
					              docGroups[docIndex].pageContent =
 | 
				
			||||||
 | 
					                docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
 | 
				
			||||||
 | 
					              docGroups[docIndex].metadata.totalDocs += 1;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          await Promise.all(
 | 
				
			||||||
 | 
					            docGroups.map(async (doc) => {
 | 
				
			||||||
 | 
					              const res = await llm.invoke(`
 | 
				
			||||||
 | 
					            You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the 
 | 
				
			||||||
 | 
					            text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
 | 
				
			||||||
 | 
					            If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague.
 | 
				
			||||||
 | 
					            - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query.
 | 
				
			||||||
 | 
					            - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            The text will be shared inside the \`text\` XML tag, and the query inside the \`query\` XML tag.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            <example>
 | 
				
			||||||
 | 
					            1. \`<text>
 | 
				
			||||||
 | 
					            Docker is a set of platform-as-a-service products that use OS-level virtualization to deliver software in packages called containers. 
 | 
				
			||||||
 | 
					            It was first released in 2013 and is developed by Docker, Inc. Docker is designed to make it easier to create, deploy, and run applications 
 | 
				
			||||||
 | 
					            by using containers.
 | 
				
			||||||
 | 
					            </text>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            <query>
 | 
				
			||||||
 | 
					            What is Docker and how does it work?
 | 
				
			||||||
 | 
					            </query>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            Response:
 | 
				
			||||||
 | 
					            Docker is a revolutionary platform-as-a-service product developed by Docker, Inc., that uses container technology to make application 
 | 
				
			||||||
 | 
					            deployment more efficient. It allows developers to package their software with all necessary dependencies, making it easier to run in 
 | 
				
			||||||
 | 
					            any environment. Released in 2013, Docker has transformed the way applications are built, deployed, and managed.
 | 
				
			||||||
 | 
					            \`
 | 
				
			||||||
 | 
					            2. \`<text>
 | 
				
			||||||
 | 
					            The theory of relativity, or simply relativity, encompasses two interrelated theories of Albert Einstein: special relativity and general
 | 
				
			||||||
 | 
					            relativity. However, the word "relativity" is sometimes used in reference to Galilean invariance. The term "theory of relativity" was based
 | 
				
			||||||
 | 
					            on the expression "relative theory" used by Max Planck in 1906. The theory of relativity usually encompasses two interrelated theories by
 | 
				
			||||||
 | 
					            Albert Einstein: special relativity and general relativity. Special relativity applies to all physical phenomena in the absence of gravity.
 | 
				
			||||||
 | 
					            General relativity explains the law of gravitation and its relation to other forces of nature. It applies to the cosmological and astrophysical
 | 
				
			||||||
 | 
					            realm, including astronomy.
 | 
				
			||||||
 | 
					            </text>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            <query>
 | 
				
			||||||
 | 
					            summarize
 | 
				
			||||||
 | 
					            </query>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            Response:
 | 
				
			||||||
 | 
					            The theory of relativity, developed by Albert Einstein, encompasses two main theories: special relativity and general relativity. Special
 | 
				
			||||||
 | 
					            relativity applies to all physical phenomena in the absence of gravity, while general relativity explains the law of gravitation and its
 | 
				
			||||||
 | 
					            relation to other forces of nature. The theory of relativity is based on the concept of "relative theory," as introduced by Max Planck in
 | 
				
			||||||
 | 
					            1906. It is a fundamental theory in physics that has revolutionized our understanding of the universe.
 | 
				
			||||||
 | 
					            \`
 | 
				
			||||||
 | 
					            </example>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            Everything below is the actual data you will be working with. Good luck!
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            <query>
 | 
				
			||||||
 | 
					            ${question}
 | 
				
			||||||
 | 
					            </query>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            <text>
 | 
				
			||||||
 | 
					            ${doc.pageContent}
 | 
				
			||||||
 | 
					            </text>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            Make sure to answer the query in the summary.
 | 
				
			||||||
 | 
					          `);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              const document = new Document({
 | 
				
			||||||
 | 
					                pageContent: res.content as string,
 | 
				
			||||||
 | 
					                metadata: {
 | 
				
			||||||
 | 
					                  title: doc.metadata.title,
 | 
				
			||||||
 | 
					                  url: doc.metadata.url,
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					              });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              docs.push(document);
 | 
				
			||||||
 | 
					            }),
 | 
				
			||||||
 | 
					          );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          return { query: question, docs: docs };
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					          const res = await searchSearxng(question, {
 | 
				
			||||||
 | 
					            language: 'en',
 | 
				
			||||||
 | 
					            engines: this.config.activeEngines,
 | 
				
			||||||
 | 
					          });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          const documents = res.results.map(
 | 
				
			||||||
 | 
					            (result) =>
 | 
				
			||||||
 | 
					              new Document({
 | 
				
			||||||
 | 
					                pageContent: result.content,
 | 
				
			||||||
 | 
					                metadata: {
 | 
				
			||||||
 | 
					                  title: result.title,
 | 
				
			||||||
 | 
					                  url: result.url,
 | 
				
			||||||
 | 
					                  ...(result.img_src && { img_src: result.img_src }),
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					              }),
 | 
				
			||||||
 | 
					          );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          return { query: question, docs: documents };
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }),
 | 
				
			||||||
 | 
					    ]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async createAnsweringChain(
 | 
				
			||||||
 | 
					    llm: BaseChatModel,
 | 
				
			||||||
 | 
					    fileIds: string[],
 | 
				
			||||||
 | 
					    embeddings: Embeddings,
 | 
				
			||||||
 | 
					    optimizationMode: 'speed' | 'balanced' | 'quality',
 | 
				
			||||||
 | 
					  ) {
 | 
				
			||||||
 | 
					    return RunnableSequence.from([
 | 
				
			||||||
 | 
					      RunnableMap.from({
 | 
				
			||||||
 | 
					        query: (input: BasicChainInput) => input.query,
 | 
				
			||||||
 | 
					        chat_history: (input: BasicChainInput) => input.chat_history,
 | 
				
			||||||
 | 
					        context: RunnableLambda.from(async (input: BasicChainInput) => {
 | 
				
			||||||
 | 
					          const processedHistory = formatChatHistoryAsString(
 | 
				
			||||||
 | 
					            input.chat_history,
 | 
				
			||||||
 | 
					          );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          let docs: Document[] | null = null;
 | 
				
			||||||
 | 
					          let query = input.query;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          if (this.config.searchWeb) {
 | 
				
			||||||
 | 
					            const searchRetrieverChain =
 | 
				
			||||||
 | 
					              await this.createSearchRetrieverChain(llm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            const searchRetrieverResult = await searchRetrieverChain.invoke({
 | 
				
			||||||
 | 
					              chat_history: processedHistory,
 | 
				
			||||||
 | 
					              query,
 | 
				
			||||||
 | 
					            });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            query = searchRetrieverResult.query;
 | 
				
			||||||
 | 
					            docs = searchRetrieverResult.docs;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          const sortedDocs = await this.rerankDocs(
 | 
				
			||||||
 | 
					            query,
 | 
				
			||||||
 | 
					            docs ?? [],
 | 
				
			||||||
 | 
					            fileIds,
 | 
				
			||||||
 | 
					            embeddings,
 | 
				
			||||||
 | 
					            optimizationMode,
 | 
				
			||||||
 | 
					          );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          return sortedDocs;
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					          .withConfig({
 | 
				
			||||||
 | 
					            runName: 'FinalSourceRetriever',
 | 
				
			||||||
 | 
					          })
 | 
				
			||||||
 | 
					          .pipe(this.processDocs),
 | 
				
			||||||
 | 
					      }),
 | 
				
			||||||
 | 
					      ChatPromptTemplate.fromMessages([
 | 
				
			||||||
 | 
					        ['system', this.config.responsePrompt],
 | 
				
			||||||
 | 
					        new MessagesPlaceholder('chat_history'),
 | 
				
			||||||
 | 
					        ['user', '{query}'],
 | 
				
			||||||
 | 
					      ]),
 | 
				
			||||||
 | 
					      llm,
 | 
				
			||||||
 | 
					      this.strParser,
 | 
				
			||||||
 | 
					    ]).withConfig({
 | 
				
			||||||
 | 
					      runName: 'FinalResponseGenerator',
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async rerankDocs(
 | 
				
			||||||
 | 
					    query: string,
 | 
				
			||||||
 | 
					    docs: Document[],
 | 
				
			||||||
 | 
					    fileIds: string[],
 | 
				
			||||||
 | 
					    embeddings: Embeddings,
 | 
				
			||||||
 | 
					    optimizationMode: 'speed' | 'balanced' | 'quality',
 | 
				
			||||||
 | 
					  ) {
 | 
				
			||||||
 | 
					    if (docs.length === 0 && fileIds.length === 0) {
 | 
				
			||||||
 | 
					      return docs;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const filesData = fileIds
 | 
				
			||||||
 | 
					      .map((file) => {
 | 
				
			||||||
 | 
					        const filePath = path.join(process.cwd(), 'uploads', file);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const contentPath = filePath + '-extracted.json';
 | 
				
			||||||
 | 
					        const embeddingsPath = filePath + '-embeddings.json';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const content = JSON.parse(fs.readFileSync(contentPath, 'utf8'));
 | 
				
			||||||
 | 
					        const embeddings = JSON.parse(fs.readFileSync(embeddingsPath, 'utf8'));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const fileSimilaritySearchObject = content.contents.map(
 | 
				
			||||||
 | 
					          (c: string, i) => {
 | 
				
			||||||
 | 
					            return {
 | 
				
			||||||
 | 
					              fileName: content.title,
 | 
				
			||||||
 | 
					              content: c,
 | 
				
			||||||
 | 
					              embeddings: embeddings.embeddings[i],
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					          },
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return fileSimilaritySearchObject;
 | 
				
			||||||
 | 
					      })
 | 
				
			||||||
 | 
					      .flat();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (query.toLocaleLowerCase() === 'summarize') {
 | 
				
			||||||
 | 
					      return docs.slice(0, 15);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const docsWithContent = docs.filter(
 | 
				
			||||||
 | 
					      (doc) => doc.pageContent && doc.pageContent.length > 0,
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (optimizationMode === 'speed' || this.config.rerank === false) {
 | 
				
			||||||
 | 
					      if (filesData.length > 0) {
 | 
				
			||||||
 | 
					        const [queryEmbedding] = await Promise.all([
 | 
				
			||||||
 | 
					          embeddings.embedQuery(query),
 | 
				
			||||||
 | 
					        ]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const fileDocs = filesData.map((fileData) => {
 | 
				
			||||||
 | 
					          return new Document({
 | 
				
			||||||
 | 
					            pageContent: fileData.content,
 | 
				
			||||||
 | 
					            metadata: {
 | 
				
			||||||
 | 
					              title: fileData.fileName,
 | 
				
			||||||
 | 
					              url: `File`,
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					          });
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const similarity = filesData.map((fileData, i) => {
 | 
				
			||||||
 | 
					          const sim = computeSimilarity(queryEmbedding, fileData.embeddings);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          return {
 | 
				
			||||||
 | 
					            index: i,
 | 
				
			||||||
 | 
					            similarity: sim,
 | 
				
			||||||
 | 
					          };
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let sortedDocs = similarity
 | 
				
			||||||
 | 
					          .filter(
 | 
				
			||||||
 | 
					            (sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3),
 | 
				
			||||||
 | 
					          )
 | 
				
			||||||
 | 
					          .sort((a, b) => b.similarity - a.similarity)
 | 
				
			||||||
 | 
					          .slice(0, 15)
 | 
				
			||||||
 | 
					          .map((sim) => fileDocs[sim.index]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        sortedDocs =
 | 
				
			||||||
 | 
					          docsWithContent.length > 0 ? sortedDocs.slice(0, 8) : sortedDocs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return [
 | 
				
			||||||
 | 
					          ...sortedDocs,
 | 
				
			||||||
 | 
					          ...docsWithContent.slice(0, 15 - sortedDocs.length),
 | 
				
			||||||
 | 
					        ];
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        return docsWithContent.slice(0, 15);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else if (optimizationMode === 'balanced') {
 | 
				
			||||||
 | 
					      const [docEmbeddings, queryEmbedding] = await Promise.all([
 | 
				
			||||||
 | 
					        embeddings.embedDocuments(
 | 
				
			||||||
 | 
					          docsWithContent.map((doc) => doc.pageContent),
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					        embeddings.embedQuery(query),
 | 
				
			||||||
 | 
					      ]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      docsWithContent.push(
 | 
				
			||||||
 | 
					        ...filesData.map((fileData) => {
 | 
				
			||||||
 | 
					          return new Document({
 | 
				
			||||||
 | 
					            pageContent: fileData.content,
 | 
				
			||||||
 | 
					            metadata: {
 | 
				
			||||||
 | 
					              title: fileData.fileName,
 | 
				
			||||||
 | 
					              url: `File`,
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					          });
 | 
				
			||||||
 | 
					        }),
 | 
				
			||||||
 | 
					      );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      docEmbeddings.push(...filesData.map((fileData) => fileData.embeddings));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const similarity = docEmbeddings.map((docEmbedding, i) => {
 | 
				
			||||||
 | 
					        const sim = computeSimilarity(queryEmbedding, docEmbedding);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return {
 | 
				
			||||||
 | 
					          index: i,
 | 
				
			||||||
 | 
					          similarity: sim,
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const sortedDocs = similarity
 | 
				
			||||||
 | 
					        .filter((sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3))
 | 
				
			||||||
 | 
					        .sort((a, b) => b.similarity - a.similarity)
 | 
				
			||||||
 | 
					        .slice(0, 15)
 | 
				
			||||||
 | 
					        .map((sim) => docsWithContent[sim.index]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      return sortedDocs;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private processDocs(docs: Document[]) {
 | 
				
			||||||
 | 
					    return docs
 | 
				
			||||||
 | 
					      .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
 | 
				
			||||||
 | 
					      .join('\n');
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async handleStream(
 | 
				
			||||||
 | 
					    stream: IterableReadableStream<StreamEvent>,
 | 
				
			||||||
 | 
					    emitter: eventEmitter,
 | 
				
			||||||
 | 
					  ) {
 | 
				
			||||||
 | 
					    for await (const event of stream) {
 | 
				
			||||||
 | 
					      if (
 | 
				
			||||||
 | 
					        event.event === 'on_chain_end' &&
 | 
				
			||||||
 | 
					        event.name === 'FinalSourceRetriever'
 | 
				
			||||||
 | 
					      ) {
 | 
				
			||||||
 | 
					        ``;
 | 
				
			||||||
 | 
					        emitter.emit(
 | 
				
			||||||
 | 
					          'data',
 | 
				
			||||||
 | 
					          JSON.stringify({ type: 'sources', data: event.data.output }),
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      if (
 | 
				
			||||||
 | 
					        event.event === 'on_chain_stream' &&
 | 
				
			||||||
 | 
					        event.name === 'FinalResponseGenerator'
 | 
				
			||||||
 | 
					      ) {
 | 
				
			||||||
 | 
					        emitter.emit(
 | 
				
			||||||
 | 
					          'data',
 | 
				
			||||||
 | 
					          JSON.stringify({ type: 'response', data: event.data.chunk }),
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      if (
 | 
				
			||||||
 | 
					        event.event === 'on_chain_end' &&
 | 
				
			||||||
 | 
					        event.name === 'FinalResponseGenerator'
 | 
				
			||||||
 | 
					      ) {
 | 
				
			||||||
 | 
					        emitter.emit('end');
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  async searchAndAnswer(
 | 
				
			||||||
 | 
					    message: string,
 | 
				
			||||||
 | 
					    history: BaseMessage[],
 | 
				
			||||||
 | 
					    llm: BaseChatModel,
 | 
				
			||||||
 | 
					    embeddings: Embeddings,
 | 
				
			||||||
 | 
					    optimizationMode: 'speed' | 'balanced' | 'quality',
 | 
				
			||||||
 | 
					    fileIds: string[],
 | 
				
			||||||
 | 
					  ) {
 | 
				
			||||||
 | 
					    const emitter = new eventEmitter();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const answeringChain = await this.createAnsweringChain(
 | 
				
			||||||
 | 
					      llm,
 | 
				
			||||||
 | 
					      fileIds,
 | 
				
			||||||
 | 
					      embeddings,
 | 
				
			||||||
 | 
					      optimizationMode,
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const stream = answeringChain.streamEvents(
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        chat_history: history,
 | 
				
			||||||
 | 
					        query: message,
 | 
				
			||||||
 | 
					      },
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        version: 'v1',
 | 
				
			||||||
 | 
					      },
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    this.handleStream(stream, emitter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return emitter;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export default MetaSearchAgent;
 | 
				
			||||||
		Reference in New Issue
	
	Block a user