From 6da6acbcd0261215699933747a6f854f8670974e Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Sun, 23 Nov 2025 19:23:42 +0530 Subject: [PATCH] feat(agents): update media agents --- src/lib/agents/media/image.ts | 59 +++++++++++++++++---------------- src/lib/agents/media/video.ts | 57 +++++++++++++++---------------- src/lib/prompts/media/image.ts | 41 ++++++++++++----------- src/lib/prompts/media/videos.ts | 41 ++++++++++++----------- 4 files changed, 103 insertions(+), 95 deletions(-) diff --git a/src/lib/agents/media/image.ts b/src/lib/agents/media/image.ts index 648b5ce..f146824 100644 --- a/src/lib/agents/media/image.ts +++ b/src/lib/agents/media/image.ts @@ -1,21 +1,17 @@ /* I don't think can be classified as agents but to keep the structure consistent i guess ill keep it here */ -import { - RunnableSequence, - RunnableMap, - RunnableLambda, -} from '@langchain/core/runnables'; -import { ChatPromptTemplate } from '@langchain/core/prompts'; -import formatChatHistoryAsString from '@/lib/utils/formatHistory'; -import { BaseMessage, HumanMessage, SystemMessage } from '@langchain/core/messages'; -import { StringOutputParser } from '@langchain/core/output_parsers'; import { searchSearxng } from '@/lib/searxng'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import LineOutputParser from '@/lib/outputParsers/lineOutputParser'; -import { imageSearchFewShots, imageSearchPrompt } from '@/lib/prompts/media/image'; +import { + imageSearchFewShots, + imageSearchPrompt, +} from '@/lib/prompts/media/image'; +import BaseLLM from '@/lib/models/base/llm'; +import z from 'zod'; +import { ChatTurnMessage } from '@/lib/types'; +import formatChatHistoryAsString from '@/lib/utils/formatHistory'; type ImageSearchChainInput = { - chatHistory: BaseMessage[]; + chatHistory: ChatTurnMessage[]; query: string; }; @@ -23,27 +19,32 @@ type ImageSearchResult = { img_src: string; url: string; title: string; -} - -const outputParser = new LineOutputParser({ - key: 'query', -}) +}; const searchImages = async ( input: ImageSearchChainInput, - llm: BaseChatModel, + llm: BaseLLM, ) => { - const chatPrompt = await ChatPromptTemplate.fromMessages([ - new SystemMessage(imageSearchPrompt), - ...imageSearchFewShots, - new HumanMessage(`\n${formatChatHistoryAsString(input.chatHistory)}\n\n\n${input.query}\n`) - ]).formatMessages({}) + const schema = z.object({ + query: z.string().describe('The image search query.'), + }); - const res = await llm.invoke(chatPrompt) + const res = await llm.generateObject>({ + messages: [ + { + role: 'system', + content: imageSearchPrompt, + }, + ...imageSearchFewShots, + { + role: 'user', + content: `\n${formatChatHistoryAsString(input.chatHistory)}\n\n\n${input.query}\n`, + }, + ], + schema: schema, + }); - const query = await outputParser.invoke(res) - - const searchRes = await searchSearxng(query!, { + const searchRes = await searchSearxng(res.query, { engines: ['bing images', 'google images'], }); @@ -62,4 +63,4 @@ const searchImages = async ( return images.slice(0, 10); }; -export default searchImages; \ No newline at end of file +export default searchImages; diff --git a/src/lib/agents/media/video.ts b/src/lib/agents/media/video.ts index 60fc04f..feac720 100644 --- a/src/lib/agents/media/video.ts +++ b/src/lib/agents/media/video.ts @@ -1,13 +1,15 @@ -import { ChatPromptTemplate } from '@langchain/core/prompts'; import formatChatHistoryAsString from '@/lib/utils/formatHistory'; -import { BaseMessage, HumanMessage, SystemMessage } from '@langchain/core/messages'; import { searchSearxng } from '@/lib/searxng'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import LineOutputParser from '@/lib/outputParsers/lineOutputParser'; -import { videoSearchFewShots, videoSearchPrompt } from '@/lib/prompts/media/videos'; +import { + videoSearchFewShots, + videoSearchPrompt, +} from '@/lib/prompts/media/videos'; +import { ChatTurnMessage } from '@/lib/types'; +import BaseLLM from '@/lib/models/base/llm'; +import z from 'zod'; type VideoSearchChainInput = { - chatHistory: BaseMessage[]; + chatHistory: ChatTurnMessage[]; query: string; }; @@ -16,39 +18,39 @@ type VideoSearchResult = { url: string; title: string; iframe_src: string; -} - -const outputParser = new LineOutputParser({ - key: 'query', -}); +}; const searchVideos = async ( input: VideoSearchChainInput, - llm: BaseChatModel, + llm: BaseLLM, ) => { - const chatPrompt = await ChatPromptTemplate.fromMessages([ - new SystemMessage(videoSearchPrompt), - ...videoSearchFewShots, - new HumanMessage(`${formatChatHistoryAsString(input.chatHistory)}\n\n\n${input.query}\n`) - ]).formatMessages({}) + const schema = z.object({ + query: z.string().describe('The video search query.'), + }); - const res = await llm.invoke(chatPrompt) + const res = await llm.generateObject>({ + messages: [ + { + role: 'system', + content: videoSearchPrompt, + }, + ...videoSearchFewShots, + { + role: 'user', + content: `\n${formatChatHistoryAsString(input.chatHistory)}\n\n\n${input.query}\n`, + }, + ], + schema: schema, + }); - const query = await outputParser.invoke(res) - - const searchRes = await searchSearxng(query!, { + const searchRes = await searchSearxng(res.query, { engines: ['youtube'], }); const videos: VideoSearchResult[] = []; searchRes.results.forEach((result) => { - if ( - result.thumbnail && - result.url && - result.title && - result.iframe_src - ) { + if (result.thumbnail && result.url && result.title && result.iframe_src) { videos.push({ img_src: result.thumbnail, url: result.url, @@ -59,7 +61,6 @@ const searchVideos = async ( }); return videos.slice(0, 10); - }; export default searchVideos; diff --git a/src/lib/prompts/media/image.ts b/src/lib/prompts/media/image.ts index 5f707c1..d4584cb 100644 --- a/src/lib/prompts/media/image.ts +++ b/src/lib/prompts/media/image.ts @@ -1,26 +1,29 @@ -import { BaseMessageLike } from "@langchain/core/messages"; +import { ChatTurnMessage } from '@/lib/types'; export const imageSearchPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images. You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. -Output only the rephrased query wrapped in an XML element. Do not include any explanation or additional text. +Output only the rephrased query in query key JSON format. Do not include any explanation or additional text. `; -export const imageSearchFewShots: BaseMessageLike[] = [ - [ - 'user', - '\n\n\nWhat is a cat?\n', - ], - ['assistant', 'A cat'], +export const imageSearchFewShots: ChatTurnMessage[] = [ + { + role: 'user', + content: + '\n\n\nWhat is a cat?\n', + }, + { role: 'assistant', content: '{"query":"A cat"}' }, - [ - 'user', - '\n\n\nWhat is a car? How does it work?\n', - ], - ['assistant', 'Car working'], - [ - 'user', - '\n\n\nHow does an AC work?\n', - ], - ['assistant', 'AC working'] -] \ No newline at end of file + { + role: 'user', + content: + '\n\n\nWhat is a car? How does it work?\n', + }, + { role: 'assistant', content: '{"query":"Car working"}' }, + { + role: 'user', + content: + '\n\n\nHow does an AC work?\n', + }, + { role: 'assistant', content: '{"query":"AC working"}' }, +]; diff --git a/src/lib/prompts/media/videos.ts b/src/lib/prompts/media/videos.ts index b4a0d55..adaa7b5 100644 --- a/src/lib/prompts/media/videos.ts +++ b/src/lib/prompts/media/videos.ts @@ -1,25 +1,28 @@ -import { BaseMessageLike } from "@langchain/core/messages"; +import { ChatTurnMessage } from '@/lib/types'; export const videoSearchPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos. You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. -Output only the rephrased query wrapped in an XML element. Do not include any explanation or additional text. +Output only the rephrased query in query key JSON format. Do not include any explanation or additional text. `; -export const videoSearchFewShots: BaseMessageLike[] = [ - [ - 'user', - '\n\n\nHow does a car work?\n', - ], - ['assistant', 'How does a car work?'], - [ - 'user', - '\n\n\nWhat is the theory of relativity?\n', - ], - ['assistant', 'Theory of relativity'], - [ - 'user', - '\n\n\nHow does an AC work?\n', - ], - ['assistant', 'AC working'], -] \ No newline at end of file +export const videoSearchFewShots: ChatTurnMessage[] = [ + { + role: 'user', + content: + '\n\n\nHow does a car work?\n', + }, + { role: 'assistant', content: '{"query":"How does a car work?"}' }, + { + role: 'user', + content: + '\n\n\nWhat is the theory of relativity?\n', + }, + { role: 'assistant', content: '{"query":"Theory of relativity"}' }, + { + role: 'user', + content: + '\n\n\nHow does an AC work?\n', + }, + { role: 'assistant', content: '{"query":"AC working"}' }, +];