From 33b736e1e830bead1e803e2829f290d344d677e5 Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Thu, 13 Nov 2025 11:51:13 +0530 Subject: [PATCH] feat(app): migrate image search chain --- src/app/api/images/route.ts | 20 +++--- src/lib/agents/media/image.ts | 114 +++++++++++---------------------- src/lib/prompts/media/image.ts | 26 ++++++++ 3 files changed, 72 insertions(+), 88 deletions(-) create mode 100644 src/lib/prompts/media/image.ts diff --git a/src/app/api/images/route.ts b/src/app/api/images/route.ts index 71d679e..bc62a1d 100644 --- a/src/app/api/images/route.ts +++ b/src/app/api/images/route.ts @@ -1,4 +1,4 @@ -import handleImageSearch from '@/lib/agents/media/image'; +import searchImages from '@/lib/agents/media/image'; import ModelRegistry from '@/lib/models/registry'; import { ModelWithProvider } from '@/lib/models/types'; import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; @@ -13,6 +13,13 @@ export const POST = async (req: Request) => { try { const body: ImageSearchBody = await req.json(); + const registry = new ModelRegistry(); + + const llm = await registry.loadChatModel( + body.chatModel.providerId, + body.chatModel.key, + ); + const chatHistory = body.chatHistory .map((msg: any) => { if (msg.role === 'user') { @@ -23,16 +30,9 @@ export const POST = async (req: Request) => { }) .filter((msg) => msg !== undefined) as BaseMessage[]; - const registry = new ModelRegistry(); - - const llm = await registry.loadChatModel( - body.chatModel.providerId, - body.chatModel.key, - ); - - const images = await handleImageSearch( + const images = await searchImages( { - chat_history: chatHistory, + chatHistory: chatHistory, query: body.query, }, llm, diff --git a/src/lib/agents/media/image.ts b/src/lib/agents/media/image.ts index 2dd719b..648b5ce 100644 --- a/src/lib/agents/media/image.ts +++ b/src/lib/agents/media/image.ts @@ -7,101 +7,59 @@ import { } from '@langchain/core/runnables'; import { ChatPromptTemplate } from '@langchain/core/prompts'; import formatChatHistoryAsString from '@/lib/utils/formatHistory'; -import { BaseMessage } from '@langchain/core/messages'; +import { BaseMessage, HumanMessage, SystemMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { searchSearxng } from '@/lib/searxng'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import LineOutputParser from '@/lib/outputParsers/lineOutputParser'; - -const imageSearchChainPrompt = ` -You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images. -You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. -Output only the rephrased query wrapped in an XML element. Do not include any explanation or additional text. -`; +import { imageSearchFewShots, imageSearchPrompt } from '@/lib/prompts/media/image'; type ImageSearchChainInput = { - chat_history: BaseMessage[]; + chatHistory: BaseMessage[]; query: string; }; -interface ImageSearchResult { +type ImageSearchResult = { img_src: string; url: string; title: string; } -const strParser = new StringOutputParser(); +const outputParser = new LineOutputParser({ + key: 'query', +}) -const createImageSearchChain = (llm: BaseChatModel) => { - return RunnableSequence.from([ - RunnableMap.from({ - chat_history: (input: ImageSearchChainInput) => { - return formatChatHistoryAsString(input.chat_history); - }, - query: (input: ImageSearchChainInput) => { - return input.query; - }, - }), - ChatPromptTemplate.fromMessages([ - ['system', imageSearchChainPrompt], - [ - 'user', - '\n\n\nWhat is a cat?\n', - ], - ['assistant', 'A cat'], - - [ - 'user', - '\n\n\nWhat is a car? How does it work?\n', - ], - ['assistant', 'Car working'], - [ - 'user', - '\n\n\nHow does an AC work?\n', - ], - ['assistant', 'AC working'], - [ - 'user', - '{chat_history}\n\n{query}\n', - ], - ]), - llm, - strParser, - RunnableLambda.from(async (input: string) => { - const queryParser = new LineOutputParser({ - key: 'query', - }); - - return await queryParser.parse(input); - }), - RunnableLambda.from(async (input: string) => { - const res = await searchSearxng(input, { - engines: ['bing images', 'google images'], - }); - - const images: ImageSearchResult[] = []; - - res.results.forEach((result) => { - if (result.img_src && result.url && result.title) { - images.push({ - img_src: result.img_src, - url: result.url, - title: result.title, - }); - } - }); - - return images.slice(0, 10); - }), - ]); -}; - -const handleImageSearch = ( +const searchImages = async ( input: ImageSearchChainInput, llm: BaseChatModel, ) => { - const imageSearchChain = createImageSearchChain(llm); - return imageSearchChain.invoke(input); + const chatPrompt = await ChatPromptTemplate.fromMessages([ + new SystemMessage(imageSearchPrompt), + ...imageSearchFewShots, + new HumanMessage(`\n${formatChatHistoryAsString(input.chatHistory)}\n\n\n${input.query}\n`) + ]).formatMessages({}) + + const res = await llm.invoke(chatPrompt) + + const query = await outputParser.invoke(res) + + const searchRes = await searchSearxng(query!, { + engines: ['bing images', 'google images'], + }); + + const images: ImageSearchResult[] = []; + + searchRes.results.forEach((result) => { + if (result.img_src && result.url && result.title) { + images.push({ + img_src: result.img_src, + url: result.url, + title: result.title, + }); + } + }); + + return images.slice(0, 10); }; -export default handleImageSearch; \ No newline at end of file +export default searchImages; \ No newline at end of file diff --git a/src/lib/prompts/media/image.ts b/src/lib/prompts/media/image.ts new file mode 100644 index 0000000..5f707c1 --- /dev/null +++ b/src/lib/prompts/media/image.ts @@ -0,0 +1,26 @@ +import { BaseMessageLike } from "@langchain/core/messages"; + +export const imageSearchPrompt = ` +You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images. +You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. +Output only the rephrased query wrapped in an XML element. Do not include any explanation or additional text. +`; + +export const imageSearchFewShots: BaseMessageLike[] = [ + [ + 'user', + '\n\n\nWhat is a cat?\n', + ], + ['assistant', 'A cat'], + + [ + 'user', + '\n\n\nWhat is a car? How does it work?\n', + ], + ['assistant', 'Car working'], + [ + 'user', + '\n\n\nHow does an AC work?\n', + ], + ['assistant', 'AC working'] +] \ No newline at end of file