feat(agents): update media agents

2026-01-08 12:25:46 +00:00 · 2025-11-23 19:23:42 +05:30
parent 0ac8569a9e
commit 6da6acbcd0
4 changed files with 103 additions and 95 deletions
--- a/src/lib/agents/media/image.ts
+++ b/src/lib/agents/media/image.ts
@@ -1,21 +1,17 @@
 /* I don't think can be classified as agents but to keep the structure consistent i guess ill keep it here */

-import {
-  RunnableSequence,
-  RunnableMap,
-  RunnableLambda,
-} from '@langchain/core/runnables';
-import { ChatPromptTemplate } from '@langchain/core/prompts';
-import formatChatHistoryAsString from '@/lib/utils/formatHistory';
-import { BaseMessage, HumanMessage, SystemMessage } from '@langchain/core/messages';
-import { StringOutputParser } from '@langchain/core/output_parsers';
 import { searchSearxng } from '@/lib/searxng';
-import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import LineOutputParser from '@/lib/outputParsers/lineOutputParser';
-import { imageSearchFewShots, imageSearchPrompt } from '@/lib/prompts/media/image';
+import {
+  imageSearchFewShots,
+  imageSearchPrompt,
+} from '@/lib/prompts/media/image';
+import BaseLLM from '@/lib/models/base/llm';
+import z from 'zod';
+import { ChatTurnMessage } from '@/lib/types';
+import formatChatHistoryAsString from '@/lib/utils/formatHistory';

 type ImageSearchChainInput = {
-  chatHistory: BaseMessage[];
+  chatHistory: ChatTurnMessage[];
  query: string;
 };

@@ -23,27 +19,32 @@ type ImageSearchResult = {
  img_src: string;
  url: string;
  title: string;
-}
-
-const outputParser = new LineOutputParser({
-  key: 'query',
-})
+};

 const searchImages = async (
  input: ImageSearchChainInput,
-  llm: BaseChatModel,
+  llm: BaseLLM<any>,
 ) => {
-  const chatPrompt = await ChatPromptTemplate.fromMessages([
-    new SystemMessage(imageSearchPrompt),
-    ...imageSearchFewShots,
-    new HumanMessage(`<conversation>\n${formatChatHistoryAsString(input.chatHistory)}\n</conversation>\n<follow_up>\n${input.query}\n</follow_up>`)
-  ]).formatMessages({})
+  const schema = z.object({
+    query: z.string().describe('The image search query.'),
+  });

-  const res = await llm.invoke(chatPrompt)
+  const res = await llm.generateObject<z.infer<typeof schema>>({
+    messages: [
+      {
+        role: 'system',
+        content: imageSearchPrompt,
+      },
+      ...imageSearchFewShots,
+      {
+        role: 'user',
+        content: `<conversation>\n${formatChatHistoryAsString(input.chatHistory)}\n</conversation>\n<follow_up>\n${input.query}\n</follow_up>`,
+      },
+    ],
+    schema: schema,
+  });

-  const query = await outputParser.invoke(res)
-
-  const searchRes = await searchSearxng(query!, {
+  const searchRes = await searchSearxng(res.query, {
    engines: ['bing images', 'google images'],
  });

@@ -62,4 +63,4 @@ const searchImages = async (
  return images.slice(0, 10);
 };

-export default searchImages;
+export default searchImages;
--- a/src/lib/agents/media/video.ts
+++ b/src/lib/agents/media/video.ts
@@ -1,13 +1,15 @@
-import { ChatPromptTemplate } from '@langchain/core/prompts';
 import formatChatHistoryAsString from '@/lib/utils/formatHistory';
-import { BaseMessage, HumanMessage, SystemMessage } from '@langchain/core/messages';
 import { searchSearxng } from '@/lib/searxng';
-import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import LineOutputParser from '@/lib/outputParsers/lineOutputParser';
-import { videoSearchFewShots, videoSearchPrompt } from '@/lib/prompts/media/videos';
+import {
+  videoSearchFewShots,
+  videoSearchPrompt,
+} from '@/lib/prompts/media/videos';
+import { ChatTurnMessage } from '@/lib/types';
+import BaseLLM from '@/lib/models/base/llm';
+import z from 'zod';

 type VideoSearchChainInput = {
-  chatHistory: BaseMessage[];
+  chatHistory: ChatTurnMessage[];
  query: string;
 };

@@ -16,39 +18,39 @@ type VideoSearchResult = {
  url: string;
  title: string;
  iframe_src: string;
-}
-
-const outputParser = new LineOutputParser({
-  key: 'query',
-});
+};

 const searchVideos = async (
  input: VideoSearchChainInput,
-  llm: BaseChatModel,
+  llm: BaseLLM<any>,
 ) => {
-  const chatPrompt = await ChatPromptTemplate.fromMessages([
-    new SystemMessage(videoSearchPrompt),
-    ...videoSearchFewShots,
-    new HumanMessage(`<conversation>${formatChatHistoryAsString(input.chatHistory)}\n</conversation>\n<follow_up>\n${input.query}\n</follow_up>`)
-  ]).formatMessages({})
+  const schema = z.object({
+    query: z.string().describe('The video search query.'),
+  });

-  const res = await llm.invoke(chatPrompt)
+  const res = await llm.generateObject<z.infer<typeof schema>>({
+    messages: [
+      {
+        role: 'system',
+        content: videoSearchPrompt,
+      },
+      ...videoSearchFewShots,
+      {
+        role: 'user',
+        content: `<conversation>\n${formatChatHistoryAsString(input.chatHistory)}\n</conversation>\n<follow_up>\n${input.query}\n</follow_up>`,
+      },
+    ],
+    schema: schema,
+  });

-  const query = await outputParser.invoke(res)
-
-  const searchRes = await searchSearxng(query!, {
+  const searchRes = await searchSearxng(res.query, {
    engines: ['youtube'],
  });

  const videos: VideoSearchResult[] = [];

  searchRes.results.forEach((result) => {
-    if (
-      result.thumbnail &&
-      result.url &&
-      result.title &&
-      result.iframe_src
-    ) {
+    if (result.thumbnail && result.url && result.title && result.iframe_src) {
      videos.push({
        img_src: result.thumbnail,
        url: result.url,
@@ -59,7 +61,6 @@ const searchVideos = async (
  });

  return videos.slice(0, 10);
-
 };

 export default searchVideos;
--- a/src/lib/prompts/media/image.ts
+++ b/src/lib/prompts/media/image.ts
@@ -1,26 +1,29 @@
-import { BaseMessageLike } from "@langchain/core/messages";
+import { ChatTurnMessage } from '@/lib/types';

 export const imageSearchPrompt = `
 You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images.
 You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
-Output only the rephrased query wrapped in an XML <query> element. Do not include any explanation or additional text.
+Output only the rephrased query in query key JSON format. Do not include any explanation or additional text.
 `;

-export const imageSearchFewShots: BaseMessageLike[] = [
-    [
-        'user',
-        '<conversation>\n</conversation>\n<follow_up>\nWhat is a cat?\n</follow_up>',
-    ],
-    ['assistant', '<query>A cat</query>'],
+export const imageSearchFewShots: ChatTurnMessage[] = [
+  {
+    role: 'user',
+    content:
+      '<conversation>\n</conversation>\n<follow_up>\nWhat is a cat?\n</follow_up>',
+  },
+  { role: 'assistant', content: '{"query":"A cat"}' },

-    [
-        'user',
-        '<conversation>\n</conversation>\n<follow_up>\nWhat is a car? How does it work?\n</follow_up>',
-    ],
-    ['assistant', '<query>Car working</query>'],
-    [
-        'user',
-        '<conversation>\n</conversation>\n<follow_up>\nHow does an AC work?\n</follow_up>',
-    ],
-    ['assistant', '<query>AC working</query>']
-]
+  {
+    role: 'user',
+    content:
+      '<conversation>\n</conversation>\n<follow_up>\nWhat is a car? How does it work?\n</follow_up>',
+  },
+  { role: 'assistant', content: '{"query":"Car working"}' },
+  {
+    role: 'user',
+    content:
+      '<conversation>\n</conversation>\n<follow_up>\nHow does an AC work?\n</follow_up>',
+  },
+  { role: 'assistant', content: '{"query":"AC working"}' },
+];
--- a/src/lib/prompts/media/videos.ts
+++ b/src/lib/prompts/media/videos.ts
@@ -1,25 +1,28 @@
-import { BaseMessageLike } from "@langchain/core/messages";
+import { ChatTurnMessage } from '@/lib/types';

 export const videoSearchPrompt = `
 You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos.
 You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
-Output only the rephrased query wrapped in an XML <query> element. Do not include any explanation or additional text.
+Output only the rephrased query in query key JSON format. Do not include any explanation or additional text.
 `;

-export const videoSearchFewShots: BaseMessageLike[] = [
-    [
-        'user',
-        '<conversation>\n</conversation>\n<follow_up>\nHow does a car work?\n</follow_up>',
-    ],
-    ['assistant', '<query>How does a car work?</query>'],
-    [
-        'user',
-        '<conversation>\n</conversation>\n<follow_up>\nWhat is the theory of relativity?\n</follow_up>',
-    ],
-    ['assistant', '<query>Theory of relativity</query>'],
-    [
-        'user',
-        '<conversation>\n</conversation>\n<follow_up>\nHow does an AC work?\n</follow_up>',
-    ],
-    ['assistant', '<query>AC working</query>'],
-]
+export const videoSearchFewShots: ChatTurnMessage[] = [
+  {
+    role: 'user',
+    content:
+      '<conversation>\n</conversation>\n<follow_up>\nHow does a car work?\n</follow_up>',
+  },
+  { role: 'assistant', content: '{"query":"How does a car work?"}' },
+  {
+    role: 'user',
+    content:
+      '<conversation>\n</conversation>\n<follow_up>\nWhat is the theory of relativity?\n</follow_up>',
+  },
+  { role: 'assistant', content: '{"query":"Theory of relativity"}' },
+  {
+    role: 'user',
+    content:
+      '<conversation>\n</conversation>\n<follow_up>\nHow does an AC work?\n</follow_up>',
+  },
+  { role: 'assistant', content: '{"query":"AC working"}' },
+];