Merge branch 'feat/deep-research' into master-deep-research

2026-06-28 23:33:27 +00:00 · 2025-04-07 10:21:22 +05:30
parent e226645bc7 934fb0a23b
commit d0ba8c9038
2 changed files with 239 additions and 280 deletions
--- a/src/components/ChatWindow.tsx
+++ b/src/components/ChatWindow.tsx
@@ -363,20 +363,18 @@ const ChatWindow = ({ id }: { id?: string }) => {

      if (data.type === 'sources') {
        sources = data.data;
-        if (!added) {
-          setMessages((prevMessages) => [
-            ...prevMessages,
-            {
-              content: '',
-              messageId: data.messageId,
-              chatId: chatId!,
-              role: 'assistant',
-              sources: sources,
-              createdAt: new Date(),
-            },
-          ]);
-          added = true;
-        }
+        setMessages((prevMessages) => [
+          ...prevMessages,
+          {
+            content: '',
+            messageId: data.messageId,
+            chatId: chatId!,
+            role: 'assistant',
+            sources: sources,
+            createdAt: new Date(),
+          },
+        ]);
+        added = true;
        setMessageAppeared(true);
      }

@@ -394,20 +392,20 @@ const ChatWindow = ({ id }: { id?: string }) => {
            },
          ]);
          added = true;
+          setMessageAppeared(true);
+        } else {
+          setMessages((prev) =>
+            prev.map((message) => {
+              if (message.messageId === data.messageId) {
+                return { ...message, content: message.content + data.data };
+              }
+
+              return message;
+            }),
+          );
        }

-        setMessages((prev) =>
-          prev.map((message) => {
-            if (message.messageId === data.messageId) {
-              return { ...message, content: message.content + data.data };
-            }
-
-            return message;
-          }),
-        );
-
        recievedMessage += data.data;
-        setMessageAppeared(true);
      }

      if (data.type === 'messageEnd') {
--- a/src/lib/search/metaSearchAgent.ts
+++ b/src/lib/search/metaSearchAgent.ts
@@ -6,11 +6,6 @@ import {
  MessagesPlaceholder,
  PromptTemplate,
 } from '@langchain/core/prompts';
-import {
-  RunnableLambda,
-  RunnableMap,
-  RunnableSequence,
-} from '@langchain/core/runnables';
 import { BaseMessage } from '@langchain/core/messages';
 import { StringOutputParser } from '@langchain/core/output_parsers';
 import LineListOutputParser from '../outputParsers/listLineOutputParser';
@@ -24,6 +19,7 @@ import computeSimilarity from '../utils/computeSimilarity';
 import formatChatHistoryAsString from '../utils/formatHistory';
 import eventEmitter from 'events';
 import { StreamEvent } from '@langchain/core/tracers/log_stream';
+import { EventEmitter } from 'node:stream';

 export interface MetaSearchAgentType {
  searchAndAnswer: (
@@ -47,7 +43,7 @@ interface Config {
  activeEngines: string[];
 }

-type BasicChainInput = {
+type SearchInput = {
  chat_history: BaseMessage[];
  query: string;
 };
@@ -60,237 +56,242 @@ class MetaSearchAgent implements MetaSearchAgentType {
    this.config = config;
  }

-  private async createSearchRetrieverChain(llm: BaseChatModel) {
+  private async searchSources(
+    llm: BaseChatModel,
+    input: SearchInput,
+    emitter: EventEmitter,
+  ) {
    (llm as unknown as ChatOpenAI).temperature = 0;

-    return RunnableSequence.from([
-      PromptTemplate.fromTemplate(this.config.queryGeneratorPrompt),
-      llm,
-      this.strParser,
-      RunnableLambda.from(async (input: string) => {
-        const linksOutputParser = new LineListOutputParser({
-          key: 'links',
-        });
+    const chatPrompt = PromptTemplate.fromTemplate(
+      this.config.queryGeneratorPrompt,
+    );

-        const questionOutputParser = new LineOutputParser({
-          key: 'question',
-        });
+    const processedChatPrompt = await chatPrompt.invoke({
+      chat_history: formatChatHistoryAsString(input.chat_history),
+      query: input.query,
+    });

-        const links = await linksOutputParser.parse(input);
-        let question = this.config.summarizer
-          ? await questionOutputParser.parse(input)
-          : input;
+    const llmRes = await llm.invoke(processedChatPrompt);
+    const messageStr = await this.strParser.invoke(llmRes);

-        if (question === 'not_needed') {
-          return { query: '', docs: [] };
+    const linksOutputParser = new LineListOutputParser({
+      key: 'links',
+    });
+
+    const questionOutputParser = new LineOutputParser({
+      key: 'question',
+    });
+
+    const links = await linksOutputParser.parse(messageStr);
+    let question = this.config.summarizer
+      ? await questionOutputParser.parse(messageStr)
+      : messageStr;
+
+    if (question === 'not_needed') {
+      return { query: '', docs: [] };
+    }
+
+    if (links.length > 0) {
+      if (question.length === 0) {
+        question = 'summarize';
+      }
+
+      let docs: Document[] = [];
+
+      const linkDocs = await getDocumentsFromLinks({ links });
+
+      const docGroups: Document[] = [];
+
+      linkDocs.map((doc) => {
+        const URLDocExists = docGroups.find(
+          (d) =>
+            d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
+        );
+
+        if (!URLDocExists) {
+          docGroups.push({
+            ...doc,
+            metadata: {
+              ...doc.metadata,
+              totalDocs: 1,
+            },
+          });
        }

-        if (links.length > 0) {
-          if (question.length === 0) {
-            question = 'summarize';
-          }
+        const docIndex = docGroups.findIndex(
+          (d) =>
+            d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
+        );

-          let docs: Document[] = [];
-
-          const linkDocs = await getDocumentsFromLinks({ links });
-
-          const docGroups: Document[] = [];
-
-          linkDocs.map((doc) => {
-            const URLDocExists = docGroups.find(
-              (d) =>
-                d.metadata.url === doc.metadata.url &&
-                d.metadata.totalDocs < 10,
-            );
-
-            if (!URLDocExists) {
-              docGroups.push({
-                ...doc,
-                metadata: {
-                  ...doc.metadata,
-                  totalDocs: 1,
-                },
-              });
-            }
-
-            const docIndex = docGroups.findIndex(
-              (d) =>
-                d.metadata.url === doc.metadata.url &&
-                d.metadata.totalDocs < 10,
-            );
-
-            if (docIndex !== -1) {
-              docGroups[docIndex].pageContent =
-                docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
-              docGroups[docIndex].metadata.totalDocs += 1;
-            }
-          });
-
-          await Promise.all(
-            docGroups.map(async (doc) => {
-              const res = await llm.invoke(`
-            You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the 
-            text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
-            If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
-            
-            - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague.
-            - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query.
-            - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format.
-
-            The text will be shared inside the \`text\` XML tag, and the query inside the \`query\` XML tag.
-
-            <example>
-            1. \`<text>
-            Docker is a set of platform-as-a-service products that use OS-level virtualization to deliver software in packages called containers. 
-            It was first released in 2013 and is developed by Docker, Inc. Docker is designed to make it easier to create, deploy, and run applications 
-            by using containers.
-            </text>
-
-            <query>
-            What is Docker and how does it work?
-            </query>
-
-            Response:
-            Docker is a revolutionary platform-as-a-service product developed by Docker, Inc., that uses container technology to make application 
-            deployment more efficient. It allows developers to package their software with all necessary dependencies, making it easier to run in 
-            any environment. Released in 2013, Docker has transformed the way applications are built, deployed, and managed.
-            \`
-            2. \`<text>
-            The theory of relativity, or simply relativity, encompasses two interrelated theories of Albert Einstein: special relativity and general
-            relativity. However, the word "relativity" is sometimes used in reference to Galilean invariance. The term "theory of relativity" was based
-            on the expression "relative theory" used by Max Planck in 1906. The theory of relativity usually encompasses two interrelated theories by
-            Albert Einstein: special relativity and general relativity. Special relativity applies to all physical phenomena in the absence of gravity.
-            General relativity explains the law of gravitation and its relation to other forces of nature. It applies to the cosmological and astrophysical
-            realm, including astronomy.
-            </text>
-
-            <query>
-            summarize
-            </query>
-
-            Response:
-            The theory of relativity, developed by Albert Einstein, encompasses two main theories: special relativity and general relativity. Special
-            relativity applies to all physical phenomena in the absence of gravity, while general relativity explains the law of gravitation and its
-            relation to other forces of nature. The theory of relativity is based on the concept of "relative theory," as introduced by Max Planck in
-            1906. It is a fundamental theory in physics that has revolutionized our understanding of the universe.
-            \`
-            </example>
-
-            Everything below is the actual data you will be working with. Good luck!
-
-            <query>
-            ${question}
-            </query>
-
-            <text>
-            ${doc.pageContent}
-            </text>
-
-            Make sure to answer the query in the summary.
-          `);
-
-              const document = new Document({
-                pageContent: res.content as string,
-                metadata: {
-                  title: doc.metadata.title,
-                  url: doc.metadata.url,
-                },
-              });
-
-              docs.push(document);
-            }),
-          );
-
-          return { query: question, docs: docs };
-        } else {
-          question = question.replace(/<think>.*?<\/think>/g, '');
-
-          const res = await searchSearxng(question, {
-            language: 'en',
-            engines: this.config.activeEngines,
-          });
-
-          const documents = res.results.map(
-            (result) =>
-              new Document({
-                pageContent:
-                  result.content ||
-                  (this.config.activeEngines.includes('youtube')
-                    ? result.title
-                    : '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */,
-                metadata: {
-                  title: result.title,
-                  url: result.url,
-                  ...(result.img_src && { img_src: result.img_src }),
-                },
-              }),
-          );
-
-          return { query: question, docs: documents };
+        if (docIndex !== -1) {
+          docGroups[docIndex].pageContent =
+            docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
+          docGroups[docIndex].metadata.totalDocs += 1;
        }
-      }),
-    ]);
+      });
+
+      await Promise.all(
+        docGroups.map(async (doc) => {
+          const res = await llm.invoke(`
+        You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the 
+        text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
+        If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
+        
+        - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague.
+        - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query.
+        - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format.
+
+        The text will be shared inside the \`text\` XML tag, and the query inside the \`query\` XML tag.
+
+        <example>
+        1. \`<text>
+        Docker is a set of platform-as-a-service products that use OS-level virtualization to deliver software in packages called containers. 
+        It was first released in 2013 and is developed by Docker, Inc. Docker is designed to make it easier to create, deploy, and run applications 
+        by using containers.
+        </text>
+
+        <query>
+        What is Docker and how does it work?
+        </query>
+
+        Response:
+        Docker is a revolutionary platform-as-a-service product developed by Docker, Inc., that uses container technology to make application 
+        deployment more efficient. It allows developers to package their software with all necessary dependencies, making it easier to run in 
+        any environment. Released in 2013, Docker has transformed the way applications are built, deployed, and managed.
+        \`
+        2. \`<text>
+        The theory of relativity, or simply relativity, encompasses two interrelated theories of Albert Einstein: special relativity and general
+        relativity. However, the word "relativity" is sometimes used in reference to Galilean invariance. The term "theory of relativity" was based
+        on the expression "relative theory" used by Max Planck in 1906. The theory of relativity usually encompasses two interrelated theories by
+        Albert Einstein: special relativity and general relativity. Special relativity applies to all physical phenomena in the absence of gravity.
+        General relativity explains the law of gravitation and its relation to other forces of nature. It applies to the cosmological and astrophysical
+        realm, including astronomy.
+        </text>
+
+        <query>
+        summarize
+        </query>
+
+        Response:
+        The theory of relativity, developed by Albert Einstein, encompasses two main theories: special relativity and general relativity. Special
+        relativity applies to all physical phenomena in the absence of gravity, while general relativity explains the law of gravitation and its
+        relation to other forces of nature. The theory of relativity is based on the concept of "relative theory," as introduced by Max Planck in
+        1906. It is a fundamental theory in physics that has revolutionized our understanding of the universe.
+        \`
+        </example>
+
+        Everything below is the actual data you will be working with. Good luck!
+
+        <query>
+        ${question}
+        </query>
+
+        <text>
+        ${doc.pageContent}
+        </text>
+
+        Make sure to answer the query in the summary.
+      `);
+
+          const document = new Document({
+            pageContent: res.content as string,
+            metadata: {
+              title: doc.metadata.title,
+              url: doc.metadata.url,
+            },
+          });
+
+          docs.push(document);
+        }),
+      );
+
+      return { query: question, docs: docs };
+    } else {
+      question = question.replace(/<think>.*?<\/think>/g, '');
+
+      const res = await searchSearxng(question, {
+        language: 'en',
+        engines: this.config.activeEngines,
+      });
+
+      const documents = res.results.map(
+        (result) =>
+          new Document({
+            pageContent:
+              result.content ||
+              (this.config.activeEngines.includes('youtube')
+                ? result.title
+                : '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */,
+            metadata: {
+              title: result.title,
+              url: result.url,
+              ...(result.img_src && { img_src: result.img_src }),
+            },
+          }),
+      );
+
+      return { query: question, docs: documents };
+    }
  }

-  private async createAnsweringChain(
+  private async streamAnswer(
    llm: BaseChatModel,
    fileIds: string[],
    embeddings: Embeddings,
    optimizationMode: 'speed' | 'balanced' | 'quality',
    systemInstructions: string,
+    input: SearchInput,
+    emitter: EventEmitter,
  ) {
-    return RunnableSequence.from([
-      RunnableMap.from({
-        systemInstructions: () => systemInstructions,
-        query: (input: BasicChainInput) => input.query,
-        chat_history: (input: BasicChainInput) => input.chat_history,
-        date: () => new Date().toISOString(),
-        context: RunnableLambda.from(async (input: BasicChainInput) => {
-          const processedHistory = formatChatHistoryAsString(
-            input.chat_history,
-          );
+    const chatPrompt = ChatPromptTemplate.fromMessages([
+      ['system', this.config.responsePrompt],
+      new MessagesPlaceholder('chat_history'),
+      ['user', '{query}'],
+    ]);

-          let docs: Document[] | null = null;
-          let query = input.query;
+    let docs: Document[] | null = null;
+    let query = input.query;

-          if (this.config.searchWeb) {
-            const searchRetrieverChain =
-              await this.createSearchRetrieverChain(llm);
+    if (this.config.searchWeb) {
+      const searchResults = await this.searchSources(llm, input, emitter);

-            const searchRetrieverResult = await searchRetrieverChain.invoke({
-              chat_history: processedHistory,
-              query,
-            });
+      query = searchResults.query;
+      docs = searchResults.docs;
+    }

-            query = searchRetrieverResult.query;
-            docs = searchRetrieverResult.docs;
-          }
+    const sortedDocs = await this.rerankDocs(
+      query,
+      docs ?? [],
+      fileIds,
+      embeddings,
+      optimizationMode,
+    );

-          const sortedDocs = await this.rerankDocs(
-            query,
-            docs ?? [],
-            fileIds,
-            embeddings,
-            optimizationMode,
-          );
+    emitter.emit('data', JSON.stringify({ type: 'sources', data: sortedDocs }));

-          return sortedDocs;
-        })
-          .withConfig({
-            runName: 'FinalSourceRetriever',
-          })
-          .pipe(this.processDocs),
-      }),
-      ChatPromptTemplate.fromMessages([
-        ['system', this.config.responsePrompt],
-        new MessagesPlaceholder('chat_history'),
-        ['user', '{query}'],
-      ]),
-      llm,
-      this.strParser,
-    ]).withConfig({
-      runName: 'FinalResponseGenerator',
+    const context = this.processDocs(sortedDocs);
+
+    const formattedChatPrompt = await chatPrompt.invoke({
+      query: input.query,
+      chat_history: input.chat_history,
+      date: new Date().toISOString(),
+      context: context,
+      systemInstructions: systemInstructions,
    });
+
+    const llmRes = await llm.stream(formattedChatPrompt);
+
+    for await (const data of llmRes) {
+      const messageStr = await this.strParser.invoke(data);
+
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'response', data: messageStr }),
+      );
+    }
+
+    emitter.emit('end');
  }

  private async rerankDocs(
@@ -431,39 +432,6 @@ class MetaSearchAgent implements MetaSearchAgentType {
      .join('\n');
  }

-  private async handleStream(
-    stream: AsyncGenerator<StreamEvent, any, any>,
-    emitter: eventEmitter,
-  ) {
-    for await (const event of stream) {
-      if (
-        event.event === 'on_chain_end' &&
-        event.name === 'FinalSourceRetriever'
-      ) {
-        ``;
-        emitter.emit(
-          'data',
-          JSON.stringify({ type: 'sources', data: event.data.output }),
-        );
-      }
-      if (
-        event.event === 'on_chain_stream' &&
-        event.name === 'FinalResponseGenerator'
-      ) {
-        emitter.emit(
-          'data',
-          JSON.stringify({ type: 'response', data: event.data.chunk }),
-        );
-      }
-      if (
-        event.event === 'on_chain_end' &&
-        event.name === 'FinalResponseGenerator'
-      ) {
-        emitter.emit('end');
-      }
-    }
-  }
-
  async searchAndAnswer(
    message: string,
    history: BaseMessage[],
@@ -475,26 +443,19 @@ class MetaSearchAgent implements MetaSearchAgentType {
  ) {
    const emitter = new eventEmitter();

-    const answeringChain = await this.createAnsweringChain(
+    this.streamAnswer(
      llm,
      fileIds,
      embeddings,
      optimizationMode,
      systemInstructions,
-    );
-
-    const stream = answeringChain.streamEvents(
      {
        chat_history: history,
        query: message,
      },
-      {
-        version: 'v1',
-      },
+      emitter,
    );

-    this.handleStream(stream, emitter);
-
    return emitter;
  }
 }