From ca86a7e3584854445d850070591255bb69c1113a Mon Sep 17 00:00:00 2001 From: HadiCherkaoui Date: Fri, 28 Feb 2025 13:11:00 +0100 Subject: [PATCH] add brave as a search engine --- sample.config.toml | 2 +- src/chains/imageSearchAgent.ts | 35 +++++++--- src/chains/videoSearchAgent.ts | 27 ++++++-- src/lib/searchEngines/bing.ts | 116 +++++++++++++++++++++++++++++++++ src/lib/searchEngines/brave.ts | 96 +++++++++++++++++++++++++++ src/routes/discover.ts | 15 +++++ src/search/metaSearchAgent.ts | 10 ++- 7 files changed, 284 insertions(+), 17 deletions(-) create mode 100644 src/lib/searchEngines/bing.ts create mode 100644 src/lib/searchEngines/brave.ts diff --git a/sample.config.toml b/sample.config.toml index c993294..2294a41 100644 --- a/sample.config.toml +++ b/sample.config.toml @@ -2,7 +2,7 @@ PORT = 3001 # Port to run the server on SIMILARITY_MEASURE = "cosine" # "cosine" or "dot" KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m") -SEARCH_ENGINE_BACKEND = "google" # "google" | "searxng" | "ddg" | "bing" | "brave" +SEARCH_ENGINE_BACKEND = "searxng" # "google" | "searxng" | "bing" | "brave" [MODELS.OPENAI] API_KEY = "" diff --git a/src/chains/imageSearchAgent.ts b/src/chains/imageSearchAgent.ts index 920fdf6..8b35de0 100644 --- a/src/chains/imageSearchAgent.ts +++ b/src/chains/imageSearchAgent.ts @@ -9,6 +9,7 @@ import { BaseMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; import { getSearchEngineBackend } from '../config'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; @@ -46,16 +47,15 @@ async function performImageSearch(query: string) { case 'google': { const googleResult = await searchGooglePSE(query); images = googleResult.results.map((result) => { - if (result.img_src && result.url && result.title) { - return { - img_src: result.img_src, - url: result.url, - title: result.title, - source: result.displayLink - }; - } - }) - .filter(Boolean); + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.displayLink + }; + } + }).filter(Boolean); break; } @@ -76,6 +76,21 @@ async function performImageSearch(query: string) { break; } + case 'brave': { + const braveResult = await searchBraveAPI(query); + images = braveResult.results.map((result) => { + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.url + }; + } + }).filter(Boolean); + break; + } + default: throw new Error(`Unknown search engine ${searchEngine}`); } diff --git a/src/chains/videoSearchAgent.ts b/src/chains/videoSearchAgent.ts index b16fb44..f1f2dfb 100644 --- a/src/chains/videoSearchAgent.ts +++ b/src/chains/videoSearchAgent.ts @@ -9,26 +9,27 @@ import { BaseMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; import { getSearchEngineBackend } from '../config'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; const VideoSearchChainPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos. You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. - + Example: 1. Follow up question: How does a car work? Rephrased: How does a car work? - + 2. Follow up question: What is the theory of relativity? Rephrased: What is theory of relativity - + 3. Follow up question: How does an AC work? Rephrased: How does an AC work - + Conversation: {chat_history} - + Follow up question: {query} Rephrased question: `; @@ -84,6 +85,22 @@ async function performVideoSearch(query: string) { break; } + case 'brave': { + const braveResult = await searchBraveAPI(youtubeQuery); + braveResult.results.forEach((result) => { + if (result.img_src && result.url && result.title) { + const videoId = new URL(result.url).searchParams.get('v'); + videos.push({ + img_src: result.img_src, + url: result.url, + title: result.title, + iframe_src: videoId ? `https://www.youtube.com/embed/${videoId}` : null + }); + } + }); + break; + } + default: throw new Error(`Unknown search engine ${searchEngine}`); } diff --git a/src/lib/searchEngines/bing.ts b/src/lib/searchEngines/bing.ts new file mode 100644 index 0000000..c03c82d --- /dev/null +++ b/src/lib/searchEngines/bing.ts @@ -0,0 +1,116 @@ +import axios from 'axios'; +import { getBingSubscriptionKey } from '../../config'; + +interface BingAPISearchResult { + _type: string; + name: string; + url: string; + displayUrl: string; + snippet?: string; + dateLastCrawled?: string; + thumbnailUrl?: string; + contentUrl?: string; + hostPageUrl?: string; + width?: number; + height?: number; + accentColor?: string; + contentSize?: string; + datePublished?: string; + encodingFormat?: string; + hostPageDisplayUrl?: string; + id?: string; + isLicensed?: boolean; + isFamilyFriendly?: boolean; + language?: string; + mediaUrl?: string; + motionThumbnailUrl?: string; + publisher?: string; + viewCount?: number; + webSearchUrl?: string; + primaryImageOfPage?: { + thumbnailUrl?: string; + width?: number; + height?: number; + }; + provider?: Array<{ + name: string; + _type: string; + }>; + video?: { + allowHttpsEmbed?: boolean; + embedHtml?: string; + allowMobileEmbed?: boolean; + viewCount?: number; + }; + image?: { + thumbnail?: { + contentUrl?: string; + width?: number; + height?: number; + }; + imageInsightsToken?: string; + imageId?: string; + }; + metatags?: Array<{ + [key: string]: string; + 'og:type'?: string; + 'og:image'?: string; + 'og:video'?: string; + }>; + mentions?: Array<{ + name: string; + }>; + entity?: { + entityPresentationInfo?: { + entityTypeHints?: string[]; + }; + }; +} + + +export const searchBingAPI = async (query: string) => { + try { + const bingApiKey = await getBingSubscriptionKey(); + const url = new URL(`https://api.cognitive.microsoft.com/bing/v7.0/search`); + url.searchParams.append('q', query); + url.searchParams.append('responseFilter', 'Webpages,Images,Videos,News'); + + const res = await axios.get(url.toString(), { + headers: { + 'Ocp-Apim-Subscription-Key': bingApiKey, + 'Accept': 'application/json' + } + }); + + if (res.data.error) { + throw new Error(`Bing API Error: ${res.data.error.message}`); + } + + const originalres = res.data; + const webResults = originalres.webPages?.value || []; + const imageResults = originalres.images?.value || []; + const videoResults = originalres.videos?.value || []; + + const results = webResults.map((item: any) => ({ + title: item.name, + url: item.url, + content: item.snippet, + img_src: item.primaryImageOfPage?.thumbnailUrl + || imageResults.find((img: any) => img.hostPageUrl === item.url)?.thumbnailUrl + || videoResults.find((vid: any) => vid.hostPageUrl === item.url)?.thumbnailUrl, + ...(item.video && { + videoData: { + duration: item.video.duration, + embedUrl: item.video.embedHtml?.match(/src="(.*?)"/)?.[1] + } + }) + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`Bing API Error: ${errorMessage}`); + } +}; diff --git a/src/lib/searchEngines/brave.ts b/src/lib/searchEngines/brave.ts new file mode 100644 index 0000000..3d2ed68 --- /dev/null +++ b/src/lib/searchEngines/brave.ts @@ -0,0 +1,96 @@ +import axios from 'axios'; +import { getBraveApiKey } from '../../config'; + +interface BraveSearchResult { + title: string; + url: string; + content?: string; + img_src?: string; + age?: string; + family_friendly?: boolean; + language?: string; + video?: { + embedUrl?: string; + duration?: string; + }; + rating?: { + value: number; + scale: number; + }; + products?: Array<{ + name: string; + price?: string; + }>; + recipe?: { + ingredients?: string[]; + cookTime?: string; + }; + meta?: { + fetched?: string; + lastCrawled?: string; + }; +} + +export const searchBraveAPI = async ( + query: string, + numResults: number = 20 +): Promise<{ results: BraveSearchResult[]; originalres: any }> => { + try { + const braveApiKey = await getBraveApiKey(); + const url = new URL(`https://api.search.brave.com/res/v1/web/search`); + + url.searchParams.append('q', query); + url.searchParams.append('count', numResults.toString()); + + const res = await axios.get(url.toString(), { + headers: { + 'X-Subscription-Token': braveApiKey, + 'Accept': 'application/json' + } + }); + + if (res.data.error) { + throw new Error(`Brave API Error: ${res.data.error.message}`); + } + + const originalres = res.data; + const webResults = originalres.web?.results || []; + + const results: BraveSearchResult[] = webResults.map((item: any) => ({ + title: item.title, + url: item.url, + content: item.description, + img_src: item.thumbnail?.src || item.deep_results?.images?.[0]?.src, + age: item.age, + family_friendly: item.family_friendly, + language: item.language, + video: item.video ? { + embedUrl: item.video.embed_url, + duration: item.video.duration + } : undefined, + rating: item.rating ? { + value: item.rating.value, + scale: item.rating.scale_max + } : undefined, + products: item.deep_results?.product_cluster?.map((p: any) => ({ + name: p.name, + price: p.price + })), + recipe: item.recipe ? { + ingredients: item.recipe.ingredients, + cookTime: item.recipe.cook_time + } : undefined, + meta: { + fetched: item.meta?.fetched, + lastCrawled: item.meta?.last_crawled + } + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`Brave API Error: ${errorMessage}`); + } +}; diff --git a/src/routes/discover.ts b/src/routes/discover.ts index 0f47b97..bb57c85 100644 --- a/src/routes/discover.ts +++ b/src/routes/discover.ts @@ -1,6 +1,7 @@ import express from 'express'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; import { getSearchEngineBackend } from '../config'; import logger from '../utils/logger'; @@ -42,6 +43,20 @@ async function performSearch(query: string, site: string) { return searxResult.results; } + case 'brave': { + const braveResult = await searchBraveAPI(query); + return braveResult.results.map(item => ({ + title: item.title, + url: item.url, + content: item.content, + thumbnail: item.img_src, + img_src: item.img_src, + iframe_src: null, + author: item.meta?.fetched || site, + publishedDate: item.meta?.lastCrawled + })); + } + default: throw new Error(`Unknown search engine ${searchEngine}`); } diff --git a/src/search/metaSearchAgent.ts b/src/search/metaSearchAgent.ts index 1db9af6..4969f8d 100644 --- a/src/search/metaSearchAgent.ts +++ b/src/search/metaSearchAgent.ts @@ -19,6 +19,8 @@ import { getDocumentsFromLinks } from '../utils/documents'; import { Document } from 'langchain/document'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBingAPI } from '../lib/searchEngines/bing'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; import { getSearchEngineBackend } from '../config'; import path from 'path'; import fs from 'fs'; @@ -134,7 +136,7 @@ class MetaSearchAgent implements MetaSearchAgentType { You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query. If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary. - + - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague. - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query. - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format. @@ -219,6 +221,12 @@ class MetaSearchAgent implements MetaSearchAgentType { case 'google': res = await searchGooglePSE(question); break; + case 'bing': + res = await searchBingAPI(question); + break; + case 'brave': + res = await searchBraveAPI(question); + break; default: throw new Error(`Unknown search engine ${searchEngine}`); }