feat(app): add image & video search functionality

2025-12-05 03:08:14 +00:00 · 2025-03-19 13:38:40 +05:30
parent d1e9361665
commit 1130746f5d
6 changed files with 393 additions and 42 deletions
--- a/ui/app/api/images/route.ts
+++ b/ui/app/api/images/route.ts
@@ -0,0 +1,83 @@
+import handleImageSearch from '@/lib/chains/imageSearchAgent';
+import {
+  getCustomOpenaiApiKey,
+  getCustomOpenaiApiUrl,
+  getCustomOpenaiModelName,
+} from '@/lib/config';
+import { getAvailableChatModelProviders } from '@/lib/providers';
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import { AIMessage, HumanMessage } from '@langchain/core/messages';
+import { ChatOpenAI } from '@langchain/openai';
+
+interface ChatModel {
+  provider: string;
+  model: string;
+}
+
+interface ImageSearchBody {
+  query: string;
+  chatHistory: any[];
+  chatModel?: ChatModel;
+}
+
+export const POST = async (req: Request) => {
+  try {
+    const body: ImageSearchBody = await req.json();
+
+    const chatHistory = body.chatHistory
+      .map((msg: any) => {
+        if (msg.role === 'user') {
+          return new HumanMessage(msg.content);
+        } else if (msg.role === 'assistant') {
+          return new AIMessage(msg.content);
+        }
+      })
+      .filter((msg) => msg !== undefined);
+
+    const chatModelProviders = await getAvailableChatModelProviders();
+
+    const chatModelProvider =
+      chatModelProviders[
+        body.chatModel?.provider || Object.keys(chatModelProviders)[0]
+      ];
+    const chatModel =
+      chatModelProvider[
+        body.chatModel?.model || Object.keys(chatModelProvider)[0]
+      ];
+
+    let llm: BaseChatModel | undefined;
+
+    if (body.chatModel?.provider === 'custom_openai') {
+      llm = new ChatOpenAI({
+        openAIApiKey: getCustomOpenaiApiKey(),
+        modelName: getCustomOpenaiModelName(),
+        temperature: 0.7,
+        configuration: {
+          baseURL: getCustomOpenaiApiUrl(),
+        },
+      });
+    } else if (chatModelProvider && chatModel) {
+      llm = chatModel.model;
+    }
+
+    if (!llm) {
+      return Response.json({ error: 'Invalid chat model' }, { status: 400 });
+    }
+
+    const images = await handleImageSearch(
+      {
+        chat_history: chatHistory,
+        query: body.query,
+      },
+      llm,
+    );
+
+    return Response.json({ images }, { status: 200 });
+  } catch (err) {
+    console.error(`An error ocurred while searching images: ${err}`);
+    return Response.json(
+      { message: 'An error ocurred while searching images' },
+      { status: 500 },
+    );
+  }
+};
--- a/ui/app/api/videos/route.ts
+++ b/ui/app/api/videos/route.ts
@@ -0,0 +1,83 @@
+import handleVideoSearch from '@/lib/chains/videoSearchAgent';
+import {
+  getCustomOpenaiApiKey,
+  getCustomOpenaiApiUrl,
+  getCustomOpenaiModelName,
+} from '@/lib/config';
+import { getAvailableChatModelProviders } from '@/lib/providers';
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import { AIMessage, HumanMessage } from '@langchain/core/messages';
+import { ChatOpenAI } from '@langchain/openai';
+
+interface ChatModel {
+  provider: string;
+  model: string;
+}
+
+interface VideoSearchBody {
+  query: string;
+  chatHistory: any[];
+  chatModel?: ChatModel;
+}
+
+export const POST = async (req: Request) => {
+  try {
+    const body: VideoSearchBody = await req.json();
+
+    const chatHistory = body.chatHistory
+      .map((msg: any) => {
+        if (msg.role === 'user') {
+          return new HumanMessage(msg.content);
+        } else if (msg.role === 'assistant') {
+          return new AIMessage(msg.content);
+        }
+      })
+      .filter((msg) => msg !== undefined);
+
+    const chatModelProviders = await getAvailableChatModelProviders();
+
+    const chatModelProvider =
+      chatModelProviders[
+        body.chatModel?.provider || Object.keys(chatModelProviders)[0]
+      ];
+    const chatModel =
+      chatModelProvider[
+        body.chatModel?.model || Object.keys(chatModelProvider)[0]
+      ];
+
+    let llm: BaseChatModel | undefined;
+
+    if (body.chatModel?.provider === 'custom_openai') {
+      llm = new ChatOpenAI({
+        openAIApiKey: getCustomOpenaiApiKey(),
+        modelName: getCustomOpenaiModelName(),
+        temperature: 0.7,
+        configuration: {
+          baseURL: getCustomOpenaiApiUrl(),
+        },
+      });
+    } else if (chatModelProvider && chatModel) {
+      llm = chatModel.model;
+    }
+
+    if (!llm) {
+      return Response.json({ error: 'Invalid chat model' }, { status: 400 });
+    }
+
+    const videos = await handleVideoSearch(
+      {
+        chat_history: chatHistory,
+        query: body.query,
+      },
+      llm,
+    );
+
+    return Response.json({ videos }, { status: 200 });
+  } catch (err) {
+    console.error(`An error ocurred while searching videos: ${err}`);
+    return Response.json(
+      { message: 'An error ocurred while searching videos' },
+      { status: 500 },
+    );
+  }
+};
--- a/ui/components/SearchImages.tsx
+++ b/ui/components/SearchImages.tsx
@@ -14,9 +14,11 @@ type Image = {
 const SearchImages = ({
  query,
  chatHistory,
+  messageId,
 }: {
  query: string;
  chatHistory: Message[];
+  messageId: string;
 }) => {
  const [images, setImages] = useState<Image[] | null>(null);
  const [loading, setLoading] = useState(false);
@@ -27,7 +29,7 @@ const SearchImages = ({
    <>
      {!loading && images === null && (
        <button
-          id="search-images"
+          id={`search-images-${messageId}`}
          onClick={async () => {
            setLoading(true);

@@ -37,27 +39,24 @@ const SearchImages = ({
            const customOpenAIBaseURL = localStorage.getItem('openAIBaseURL');
            const customOpenAIKey = localStorage.getItem('openAIApiKey');

-            const res = await fetch(
-              `${process.env.NEXT_PUBLIC_API_URL}/images`,
-              {
-                method: 'POST',
-                headers: {
-                  'Content-Type': 'application/json',
-                },
-                body: JSON.stringify({
-                  query: query,
-                  chatHistory: chatHistory,
-                  chatModel: {
-                    provider: chatModelProvider,
-                    model: chatModel,
-                    ...(chatModelProvider === 'custom_openai' && {
-                      customOpenAIBaseURL: customOpenAIBaseURL,
-                      customOpenAIKey: customOpenAIKey,
-                    }),
-                  },
-                }),
+            const res = await fetch(`/api/images`, {
+              method: 'POST',
+              headers: {
+                'Content-Type': 'application/json',
              },
-            );
+              body: JSON.stringify({
+                query: query,
+                chatHistory: chatHistory,
+                chatModel: {
+                  provider: chatModelProvider,
+                  model: chatModel,
+                  ...(chatModelProvider === 'custom_openai' && {
+                    customOpenAIBaseURL: customOpenAIBaseURL,
+                    customOpenAIKey: customOpenAIKey,
+                  }),
+                },
+              }),
+            });

            const data = await res.json();

--- a/ui/components/SearchVideos.tsx
+++ b/ui/components/SearchVideos.tsx
@@ -27,9 +27,11 @@ declare module 'yet-another-react-lightbox' {
 const Searchvideos = ({
  query,
  chatHistory,
+  messageId,
 }: {
  query: string;
  chatHistory: Message[];
+  messageId: string;
 }) => {
  const [videos, setVideos] = useState<Video[] | null>(null);
  const [loading, setLoading] = useState(false);
@@ -42,7 +44,7 @@ const Searchvideos = ({
    <>
      {!loading && videos === null && (
        <button
-          id="search-videos"
+          id={`search-videos-${messageId}`}
          onClick={async () => {
            setLoading(true);

@@ -52,27 +54,24 @@ const Searchvideos = ({
            const customOpenAIBaseURL = localStorage.getItem('openAIBaseURL');
            const customOpenAIKey = localStorage.getItem('openAIApiKey');

-            const res = await fetch(
-              `${process.env.NEXT_PUBLIC_API_URL}/videos`,
-              {
-                method: 'POST',
-                headers: {
-                  'Content-Type': 'application/json',
-                },
-                body: JSON.stringify({
-                  query: query,
-                  chatHistory: chatHistory,
-                  chatModel: {
-                    provider: chatModelProvider,
-                    model: chatModel,
-                    ...(chatModelProvider === 'custom_openai' && {
-                      customOpenAIBaseURL: customOpenAIBaseURL,
-                      customOpenAIKey: customOpenAIKey,
-                    }),
-                  },
-                }),
+            const res = await fetch(`/api/videos`, {
+              method: 'POST',
+              headers: {
+                'Content-Type': 'application/json',
              },
-            );
+              body: JSON.stringify({
+                query: query,
+                chatHistory: chatHistory,
+                chatModel: {
+                  provider: chatModelProvider,
+                  model: chatModel,
+                  ...(chatModelProvider === 'custom_openai' && {
+                    customOpenAIBaseURL: customOpenAIBaseURL,
+                    customOpenAIKey: customOpenAIKey,
+                  }),
+                },
+              }),
+            });

            const data = await res.json();

--- a/ui/lib/chains/imageSearchAgent.ts
+++ b/ui/lib/chains/imageSearchAgent.ts
@@ -0,0 +1,90 @@
+import {
+  RunnableSequence,
+  RunnableMap,
+  RunnableLambda,
+} from '@langchain/core/runnables';
+import { PromptTemplate } from '@langchain/core/prompts';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import { BaseMessage } from '@langchain/core/messages';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import { searchSearxng } from '../searxng';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+
+const imageSearchChainPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images.
+You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
+
+Example:
+1. Follow up question: What is a cat?
+Rephrased: A cat
+
+2. Follow up question: What is a car? How does it works?
+Rephrased: Car working
+
+3. Follow up question: How does an AC work?
+Rephrased: AC working
+
+Conversation:
+{chat_history}
+
+Follow up question: {query}
+Rephrased question:
+`;
+
+type ImageSearchChainInput = {
+  chat_history: BaseMessage[];
+  query: string;
+};
+
+interface ImageSearchResult {
+  img_src: string;
+  url: string;
+  title: string;
+}
+
+const strParser = new StringOutputParser();
+
+const createImageSearchChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    RunnableMap.from({
+      chat_history: (input: ImageSearchChainInput) => {
+        return formatChatHistoryAsString(input.chat_history);
+      },
+      query: (input: ImageSearchChainInput) => {
+        return input.query;
+      },
+    }),
+    PromptTemplate.fromTemplate(imageSearchChainPrompt),
+    llm,
+    strParser,
+    RunnableLambda.from(async (input: string) => {
+      const res = await searchSearxng(input, {
+        engines: ['bing images', 'google images'],
+      });
+
+      const images: ImageSearchResult[] = [];
+
+      res.results.forEach((result) => {
+        if (result.img_src && result.url && result.title) {
+          images.push({
+            img_src: result.img_src,
+            url: result.url,
+            title: result.title,
+          });
+        }
+      });
+
+      return images.slice(0, 10);
+    }),
+  ]);
+};
+
+const handleImageSearch = (
+  input: ImageSearchChainInput,
+  llm: BaseChatModel,
+) => {
+  const imageSearchChain = createImageSearchChain(llm);
+  return imageSearchChain.invoke(input);
+};
+
+export default handleImageSearch;
--- a/ui/lib/chains/videoSearchAgent.ts
+++ b/ui/lib/chains/videoSearchAgent.ts
@@ -0,0 +1,97 @@
+import {
+  RunnableSequence,
+  RunnableMap,
+  RunnableLambda,
+} from '@langchain/core/runnables';
+import { PromptTemplate } from '@langchain/core/prompts';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import { BaseMessage } from '@langchain/core/messages';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import { searchSearxng } from '../searxng';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+
+const VideoSearchChainPrompt = `
+  You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos.
+  You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
+  
+  Example:
+  1. Follow up question: How does a car work?
+  Rephrased: How does a car work?
+  
+  2. Follow up question: What is the theory of relativity?
+  Rephrased: What is theory of relativity
+  
+  3. Follow up question: How does an AC work?
+  Rephrased: How does an AC work
+  
+  Conversation:
+  {chat_history}
+  
+  Follow up question: {query}
+  Rephrased question:
+  `;
+
+type VideoSearchChainInput = {
+  chat_history: BaseMessage[];
+  query: string;
+};
+
+interface VideoSearchResult {
+  img_src: string;
+  url: string;
+  title: string;
+  iframe_src: string;
+}
+
+const strParser = new StringOutputParser();
+
+const createVideoSearchChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    RunnableMap.from({
+      chat_history: (input: VideoSearchChainInput) => {
+        return formatChatHistoryAsString(input.chat_history);
+      },
+      query: (input: VideoSearchChainInput) => {
+        return input.query;
+      },
+    }),
+    PromptTemplate.fromTemplate(VideoSearchChainPrompt),
+    llm,
+    strParser,
+    RunnableLambda.from(async (input: string) => {
+      const res = await searchSearxng(input, {
+        engines: ['youtube'],
+      });
+
+      const videos: VideoSearchResult[] = [];
+
+      res.results.forEach((result) => {
+        if (
+          result.thumbnail &&
+          result.url &&
+          result.title &&
+          result.iframe_src
+        ) {
+          videos.push({
+            img_src: result.thumbnail,
+            url: result.url,
+            title: result.title,
+            iframe_src: result.iframe_src,
+          });
+        }
+      });
+
+      return videos.slice(0, 10);
+    }),
+  ]);
+};
+
+const handleVideoSearch = (
+  input: VideoSearchChainInput,
+  llm: BaseChatModel,
+) => {
+  const VideoSearchChain = createVideoSearchChain(llm);
+  return VideoSearchChain.invoke(input);
+};
+
+export default handleVideoSearch;