diff --git a/src/lib/search/metaSearchAgent.ts b/src/lib/search/metaSearchAgent.ts
index 4a3d817..8308936 100644
--- a/src/lib/search/metaSearchAgent.ts
+++ b/src/lib/search/metaSearchAgent.ts
@@ -6,11 +6,6 @@ import {
MessagesPlaceholder,
PromptTemplate,
} from '@langchain/core/prompts';
-import {
- RunnableLambda,
- RunnableMap,
- RunnableSequence,
-} from '@langchain/core/runnables';
import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers';
import LineListOutputParser from '../outputParsers/listLineOutputParser';
@@ -24,6 +19,7 @@ import computeSimilarity from '../utils/computeSimilarity';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import { StreamEvent } from '@langchain/core/tracers/log_stream';
+import { EventEmitter } from 'node:stream';
export interface MetaSearchAgentType {
searchAndAnswer: (
@@ -46,7 +42,7 @@ interface Config {
activeEngines: string[];
}
-type BasicChainInput = {
+type SearchInput = {
chat_history: BaseMessage[];
query: string;
};
@@ -59,235 +55,240 @@ class MetaSearchAgent implements MetaSearchAgentType {
this.config = config;
}
- private async createSearchRetrieverChain(llm: BaseChatModel) {
+ private async searchSources(
+ llm: BaseChatModel,
+ input: SearchInput,
+ emitter: EventEmitter,
+ ) {
(llm as unknown as ChatOpenAI).temperature = 0;
- return RunnableSequence.from([
- PromptTemplate.fromTemplate(this.config.queryGeneratorPrompt),
- llm,
- this.strParser,
- RunnableLambda.from(async (input: string) => {
- const linksOutputParser = new LineListOutputParser({
- key: 'links',
- });
+ const chatPrompt = PromptTemplate.fromTemplate(
+ this.config.queryGeneratorPrompt,
+ );
- const questionOutputParser = new LineOutputParser({
- key: 'question',
- });
+ const processedChatPrompt = await chatPrompt.invoke({
+ chat_history: formatChatHistoryAsString(input.chat_history),
+ query: input.query,
+ });
- const links = await linksOutputParser.parse(input);
- let question = this.config.summarizer
- ? await questionOutputParser.parse(input)
- : input;
+ const llmRes = await llm.invoke(processedChatPrompt);
+ const messageStr = await this.strParser.invoke(llmRes);
- if (question === 'not_needed') {
- return { query: '', docs: [] };
+ const linksOutputParser = new LineListOutputParser({
+ key: 'links',
+ });
+
+ const questionOutputParser = new LineOutputParser({
+ key: 'question',
+ });
+
+ const links = await linksOutputParser.parse(messageStr);
+ let question = this.config.summarizer
+ ? await questionOutputParser.parse(messageStr)
+ : messageStr;
+
+ if (question === 'not_needed') {
+ return { query: '', docs: [] };
+ }
+
+ if (links.length > 0) {
+ if (question.length === 0) {
+ question = 'summarize';
+ }
+
+ let docs: Document[] = [];
+
+ const linkDocs = await getDocumentsFromLinks({ links });
+
+ const docGroups: Document[] = [];
+
+ linkDocs.map((doc) => {
+ const URLDocExists = docGroups.find(
+ (d) =>
+ d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
+ );
+
+ if (!URLDocExists) {
+ docGroups.push({
+ ...doc,
+ metadata: {
+ ...doc.metadata,
+ totalDocs: 1,
+ },
+ });
}
- if (links.length > 0) {
- if (question.length === 0) {
- question = 'summarize';
- }
+ const docIndex = docGroups.findIndex(
+ (d) =>
+ d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
+ );
- let docs: Document[] = [];
-
- const linkDocs = await getDocumentsFromLinks({ links });
-
- const docGroups: Document[] = [];
-
- linkDocs.map((doc) => {
- const URLDocExists = docGroups.find(
- (d) =>
- d.metadata.url === doc.metadata.url &&
- d.metadata.totalDocs < 10,
- );
-
- if (!URLDocExists) {
- docGroups.push({
- ...doc,
- metadata: {
- ...doc.metadata,
- totalDocs: 1,
- },
- });
- }
-
- const docIndex = docGroups.findIndex(
- (d) =>
- d.metadata.url === doc.metadata.url &&
- d.metadata.totalDocs < 10,
- );
-
- if (docIndex !== -1) {
- docGroups[docIndex].pageContent =
- docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
- docGroups[docIndex].metadata.totalDocs += 1;
- }
- });
-
- await Promise.all(
- docGroups.map(async (doc) => {
- const res = await llm.invoke(`
- You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the
- text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
- If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
-
- - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague.
- - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query.
- - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format.
-
- The text will be shared inside the \`text\` XML tag, and the query inside the \`query\` XML tag.
-
-
- 1. \`
- Docker is a set of platform-as-a-service products that use OS-level virtualization to deliver software in packages called containers.
- It was first released in 2013 and is developed by Docker, Inc. Docker is designed to make it easier to create, deploy, and run applications
- by using containers.
-
-
-
- What is Docker and how does it work?
-
-
- Response:
- Docker is a revolutionary platform-as-a-service product developed by Docker, Inc., that uses container technology to make application
- deployment more efficient. It allows developers to package their software with all necessary dependencies, making it easier to run in
- any environment. Released in 2013, Docker has transformed the way applications are built, deployed, and managed.
- \`
- 2. \`
- The theory of relativity, or simply relativity, encompasses two interrelated theories of Albert Einstein: special relativity and general
- relativity. However, the word "relativity" is sometimes used in reference to Galilean invariance. The term "theory of relativity" was based
- on the expression "relative theory" used by Max Planck in 1906. The theory of relativity usually encompasses two interrelated theories by
- Albert Einstein: special relativity and general relativity. Special relativity applies to all physical phenomena in the absence of gravity.
- General relativity explains the law of gravitation and its relation to other forces of nature. It applies to the cosmological and astrophysical
- realm, including astronomy.
-
-
-
- summarize
-
-
- Response:
- The theory of relativity, developed by Albert Einstein, encompasses two main theories: special relativity and general relativity. Special
- relativity applies to all physical phenomena in the absence of gravity, while general relativity explains the law of gravitation and its
- relation to other forces of nature. The theory of relativity is based on the concept of "relative theory," as introduced by Max Planck in
- 1906. It is a fundamental theory in physics that has revolutionized our understanding of the universe.
- \`
-
-
- Everything below is the actual data you will be working with. Good luck!
-
-
- ${question}
-
-
-
- ${doc.pageContent}
-
-
- Make sure to answer the query in the summary.
- `);
-
- const document = new Document({
- pageContent: res.content as string,
- metadata: {
- title: doc.metadata.title,
- url: doc.metadata.url,
- },
- });
-
- docs.push(document);
- }),
- );
-
- return { query: question, docs: docs };
- } else {
- question = question.replace(/.*?<\/think>/g, '');
-
- const res = await searchSearxng(question, {
- language: 'en',
- engines: this.config.activeEngines,
- });
-
- const documents = res.results.map(
- (result) =>
- new Document({
- pageContent:
- result.content ||
- (this.config.activeEngines.includes('youtube')
- ? result.title
- : '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */,
- metadata: {
- title: result.title,
- url: result.url,
- ...(result.img_src && { img_src: result.img_src }),
- },
- }),
- );
-
- return { query: question, docs: documents };
+ if (docIndex !== -1) {
+ docGroups[docIndex].pageContent =
+ docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
+ docGroups[docIndex].metadata.totalDocs += 1;
}
- }),
- ]);
+ });
+
+ await Promise.all(
+ docGroups.map(async (doc) => {
+ const res = await llm.invoke(`
+ You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the
+ text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
+ If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
+
+ - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague.
+ - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query.
+ - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format.
+
+ The text will be shared inside the \`text\` XML tag, and the query inside the \`query\` XML tag.
+
+
+ 1. \`
+ Docker is a set of platform-as-a-service products that use OS-level virtualization to deliver software in packages called containers.
+ It was first released in 2013 and is developed by Docker, Inc. Docker is designed to make it easier to create, deploy, and run applications
+ by using containers.
+
+
+
+ What is Docker and how does it work?
+
+
+ Response:
+ Docker is a revolutionary platform-as-a-service product developed by Docker, Inc., that uses container technology to make application
+ deployment more efficient. It allows developers to package their software with all necessary dependencies, making it easier to run in
+ any environment. Released in 2013, Docker has transformed the way applications are built, deployed, and managed.
+ \`
+ 2. \`
+ The theory of relativity, or simply relativity, encompasses two interrelated theories of Albert Einstein: special relativity and general
+ relativity. However, the word "relativity" is sometimes used in reference to Galilean invariance. The term "theory of relativity" was based
+ on the expression "relative theory" used by Max Planck in 1906. The theory of relativity usually encompasses two interrelated theories by
+ Albert Einstein: special relativity and general relativity. Special relativity applies to all physical phenomena in the absence of gravity.
+ General relativity explains the law of gravitation and its relation to other forces of nature. It applies to the cosmological and astrophysical
+ realm, including astronomy.
+
+
+
+ summarize
+
+
+ Response:
+ The theory of relativity, developed by Albert Einstein, encompasses two main theories: special relativity and general relativity. Special
+ relativity applies to all physical phenomena in the absence of gravity, while general relativity explains the law of gravitation and its
+ relation to other forces of nature. The theory of relativity is based on the concept of "relative theory," as introduced by Max Planck in
+ 1906. It is a fundamental theory in physics that has revolutionized our understanding of the universe.
+ \`
+
+
+ Everything below is the actual data you will be working with. Good luck!
+
+
+ ${question}
+
+
+
+ ${doc.pageContent}
+
+
+ Make sure to answer the query in the summary.
+ `);
+
+ const document = new Document({
+ pageContent: res.content as string,
+ metadata: {
+ title: doc.metadata.title,
+ url: doc.metadata.url,
+ },
+ });
+
+ docs.push(document);
+ }),
+ );
+
+ return { query: question, docs: docs };
+ } else {
+ question = question.replace(/.*?<\/think>/g, '');
+
+ const res = await searchSearxng(question, {
+ language: 'en',
+ engines: this.config.activeEngines,
+ });
+
+ const documents = res.results.map(
+ (result) =>
+ new Document({
+ pageContent:
+ result.content ||
+ (this.config.activeEngines.includes('youtube')
+ ? result.title
+ : '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */,
+ metadata: {
+ title: result.title,
+ url: result.url,
+ ...(result.img_src && { img_src: result.img_src }),
+ },
+ }),
+ );
+
+ return { query: question, docs: documents };
+ }
}
- private async createAnsweringChain(
+ private async streamAnswer(
llm: BaseChatModel,
fileIds: string[],
embeddings: Embeddings,
optimizationMode: 'speed' | 'balanced' | 'quality',
+ input: SearchInput,
+ emitter: EventEmitter,
) {
- return RunnableSequence.from([
- RunnableMap.from({
- query: (input: BasicChainInput) => input.query,
- chat_history: (input: BasicChainInput) => input.chat_history,
- date: () => new Date().toISOString(),
- context: RunnableLambda.from(async (input: BasicChainInput) => {
- const processedHistory = formatChatHistoryAsString(
- input.chat_history,
- );
+ const chatPrompt = ChatPromptTemplate.fromMessages([
+ ['system', this.config.responsePrompt],
+ new MessagesPlaceholder('chat_history'),
+ ['user', '{query}'],
+ ]);
- let docs: Document[] | null = null;
- let query = input.query;
+ let docs: Document[] | null = null;
+ let query = input.query;
- if (this.config.searchWeb) {
- const searchRetrieverChain =
- await this.createSearchRetrieverChain(llm);
+ if (this.config.searchWeb) {
+ const searchResults = await this.searchSources(llm, input, emitter);
- const searchRetrieverResult = await searchRetrieverChain.invoke({
- chat_history: processedHistory,
- query,
- });
+ query = searchResults.query;
+ docs = searchResults.docs;
+ }
- query = searchRetrieverResult.query;
- docs = searchRetrieverResult.docs;
- }
+ const sortedDocs = await this.rerankDocs(
+ query,
+ docs ?? [],
+ fileIds,
+ embeddings,
+ optimizationMode,
+ );
- const sortedDocs = await this.rerankDocs(
- query,
- docs ?? [],
- fileIds,
- embeddings,
- optimizationMode,
- );
+ emitter.emit('data', JSON.stringify({ type: 'sources', data: sortedDocs }));
- return sortedDocs;
- })
- .withConfig({
- runName: 'FinalSourceRetriever',
- })
- .pipe(this.processDocs),
- }),
- ChatPromptTemplate.fromMessages([
- ['system', this.config.responsePrompt],
- new MessagesPlaceholder('chat_history'),
- ['user', '{query}'],
- ]),
- llm,
- this.strParser,
- ]).withConfig({
- runName: 'FinalResponseGenerator',
+ const context = this.processDocs(sortedDocs);
+
+ const formattedChatPrompt = await chatPrompt.invoke({
+ query: input.query,
+ chat_history: input.chat_history,
+ date: new Date().toISOString(),
+ context: context,
});
+
+ const llmRes = await llm.stream(formattedChatPrompt);
+
+ for await (const data of llmRes) {
+ const messageStr = await this.strParser.invoke(data);
+
+ emitter.emit(
+ 'data',
+ JSON.stringify({ type: 'response', data: messageStr }),
+ );
+ }
+
+ emitter.emit('end');
}
private async rerankDocs(
@@ -428,39 +429,6 @@ class MetaSearchAgent implements MetaSearchAgentType {
.join('\n');
}
- private async handleStream(
- stream: AsyncGenerator,
- emitter: eventEmitter,
- ) {
- for await (const event of stream) {
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalSourceRetriever'
- ) {
- ``;
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'sources', data: event.data.output }),
- );
- }
- if (
- event.event === 'on_chain_stream' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit(
- 'data',
- JSON.stringify({ type: 'response', data: event.data.chunk }),
- );
- }
- if (
- event.event === 'on_chain_end' &&
- event.name === 'FinalResponseGenerator'
- ) {
- emitter.emit('end');
- }
- }
- }
-
async searchAndAnswer(
message: string,
history: BaseMessage[],
@@ -471,25 +439,18 @@ class MetaSearchAgent implements MetaSearchAgentType {
) {
const emitter = new eventEmitter();
- const answeringChain = await this.createAnsweringChain(
+ this.streamAnswer(
llm,
fileIds,
embeddings,
optimizationMode,
- );
-
- const stream = answeringChain.streamEvents(
{
chat_history: history,
query: message,
},
- {
- version: 'v1',
- },
+ emitter,
);
- this.handleStream(stream, emitter);
-
return emitter;
}
}