diff --git a/sample.config.toml b/sample.config.toml index c993294..3690d70 100644 --- a/sample.config.toml +++ b/sample.config.toml @@ -2,7 +2,7 @@ PORT = 3001 # Port to run the server on SIMILARITY_MEASURE = "cosine" # "cosine" or "dot" KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m") -SEARCH_ENGINE_BACKEND = "google" # "google" | "searxng" | "ddg" | "bing" | "brave" +SEARCH_ENGINE_BACKEND = "searxng" # "google" | "searxng" | "bing" | "brave" | "yacy" [MODELS.OPENAI] API_KEY = "" @@ -35,3 +35,6 @@ SUBSCRIPTION_KEY = "" [SEARCH_ENGINES.BRAVE] API_KEY = "" + +[SEARCH_ENGINES.YACY] +ENDPOINT = "" \ No newline at end of file diff --git a/src/chains/imageSearchAgent.ts b/src/chains/imageSearchAgent.ts index 920fdf6..04fb108 100644 --- a/src/chains/imageSearchAgent.ts +++ b/src/chains/imageSearchAgent.ts @@ -9,6 +9,8 @@ import { BaseMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; import { getSearchEngineBackend } from '../config'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; @@ -46,16 +48,15 @@ async function performImageSearch(query: string) { case 'google': { const googleResult = await searchGooglePSE(query); images = googleResult.results.map((result) => { - if (result.img_src && result.url && result.title) { - return { - img_src: result.img_src, - url: result.url, - title: result.title, - source: result.displayLink - }; - } - }) - .filter(Boolean); + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.displayLink + }; + } + }).filter(Boolean); break; } @@ -76,6 +77,36 @@ async function performImageSearch(query: string) { break; } + case 'brave': { + const braveResult = await searchBraveAPI(query); + images = braveResult.results.map((result) => { + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.url + }; + } + }).filter(Boolean); + break; + } + + case 'yacy': { + const yacyResult = await searchYaCy(query); + images = yacyResult.results.map((result) => { + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.url + } + } + }).filter(Boolean); + break; + } + default: throw new Error(`Unknown search engine ${searchEngine}`); } diff --git a/src/chains/videoSearchAgent.ts b/src/chains/videoSearchAgent.ts index b16fb44..1fcbc13 100644 --- a/src/chains/videoSearchAgent.ts +++ b/src/chains/videoSearchAgent.ts @@ -9,26 +9,28 @@ import { BaseMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; import { getSearchEngineBackend } from '../config'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; const VideoSearchChainPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos. You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. - + Example: 1. Follow up question: How does a car work? Rephrased: How does a car work? - + 2. Follow up question: What is the theory of relativity? Rephrased: What is theory of relativity - + 3. Follow up question: How does an AC work? Rephrased: How does an AC work - + Conversation: {chat_history} - + Follow up question: {query} Rephrased question: `; @@ -84,6 +86,28 @@ async function performVideoSearch(query: string) { break; } + case 'brave': { + const braveResult = await searchBraveAPI(youtubeQuery); + braveResult.results.forEach((result) => { + if (result.img_src && result.url && result.title) { + const videoId = new URL(result.url).searchParams.get('v'); + videos.push({ + img_src: result.img_src, + url: result.url, + title: result.title, + iframe_src: videoId ? `https://www.youtube.com/embed/${videoId}` : null + }); + } + }); + break; + } + + case 'yacy': { + console.log('Not available for yacy'); + videos = []; + break; + } + default: throw new Error(`Unknown search engine ${searchEngine}`); } diff --git a/src/config.ts b/src/config.ts index 58d8f17..1bffd49 100644 --- a/src/config.ts +++ b/src/config.ts @@ -47,6 +47,9 @@ interface Config { BRAVE: { API_KEY: string; }; + YACY: { + ENDPOINT: string; + }; }; } @@ -84,6 +87,8 @@ export const getBraveApiKey = () => loadConfig().SEARCH_ENGINES.BRAVE.API_KEY; export const getBingSubscriptionKey = () => loadConfig().SEARCH_ENGINES.BING.SUBSCRIPTION_KEY; +export const getYacyJsonEndpoint = () => loadConfig().SEARCH_ENGINES.YACY.ENDPOINT; + export const getSearxngApiEndpoint = () => process.env.SEARXNG_API_URL || loadConfig().SEARCH_ENGINES.SEARXNG.ENDPOINT; diff --git a/src/lib/searchEngines/bing.ts b/src/lib/searchEngines/bing.ts new file mode 100644 index 0000000..c03c82d --- /dev/null +++ b/src/lib/searchEngines/bing.ts @@ -0,0 +1,116 @@ +import axios from 'axios'; +import { getBingSubscriptionKey } from '../../config'; + +interface BingAPISearchResult { + _type: string; + name: string; + url: string; + displayUrl: string; + snippet?: string; + dateLastCrawled?: string; + thumbnailUrl?: string; + contentUrl?: string; + hostPageUrl?: string; + width?: number; + height?: number; + accentColor?: string; + contentSize?: string; + datePublished?: string; + encodingFormat?: string; + hostPageDisplayUrl?: string; + id?: string; + isLicensed?: boolean; + isFamilyFriendly?: boolean; + language?: string; + mediaUrl?: string; + motionThumbnailUrl?: string; + publisher?: string; + viewCount?: number; + webSearchUrl?: string; + primaryImageOfPage?: { + thumbnailUrl?: string; + width?: number; + height?: number; + }; + provider?: Array<{ + name: string; + _type: string; + }>; + video?: { + allowHttpsEmbed?: boolean; + embedHtml?: string; + allowMobileEmbed?: boolean; + viewCount?: number; + }; + image?: { + thumbnail?: { + contentUrl?: string; + width?: number; + height?: number; + }; + imageInsightsToken?: string; + imageId?: string; + }; + metatags?: Array<{ + [key: string]: string; + 'og:type'?: string; + 'og:image'?: string; + 'og:video'?: string; + }>; + mentions?: Array<{ + name: string; + }>; + entity?: { + entityPresentationInfo?: { + entityTypeHints?: string[]; + }; + }; +} + + +export const searchBingAPI = async (query: string) => { + try { + const bingApiKey = await getBingSubscriptionKey(); + const url = new URL(`https://api.cognitive.microsoft.com/bing/v7.0/search`); + url.searchParams.append('q', query); + url.searchParams.append('responseFilter', 'Webpages,Images,Videos,News'); + + const res = await axios.get(url.toString(), { + headers: { + 'Ocp-Apim-Subscription-Key': bingApiKey, + 'Accept': 'application/json' + } + }); + + if (res.data.error) { + throw new Error(`Bing API Error: ${res.data.error.message}`); + } + + const originalres = res.data; + const webResults = originalres.webPages?.value || []; + const imageResults = originalres.images?.value || []; + const videoResults = originalres.videos?.value || []; + + const results = webResults.map((item: any) => ({ + title: item.name, + url: item.url, + content: item.snippet, + img_src: item.primaryImageOfPage?.thumbnailUrl + || imageResults.find((img: any) => img.hostPageUrl === item.url)?.thumbnailUrl + || videoResults.find((vid: any) => vid.hostPageUrl === item.url)?.thumbnailUrl, + ...(item.video && { + videoData: { + duration: item.video.duration, + embedUrl: item.video.embedHtml?.match(/src="(.*?)"/)?.[1] + } + }) + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`Bing API Error: ${errorMessage}`); + } +}; diff --git a/src/lib/searchEngines/brave.ts b/src/lib/searchEngines/brave.ts new file mode 100644 index 0000000..3d2ed68 --- /dev/null +++ b/src/lib/searchEngines/brave.ts @@ -0,0 +1,96 @@ +import axios from 'axios'; +import { getBraveApiKey } from '../../config'; + +interface BraveSearchResult { + title: string; + url: string; + content?: string; + img_src?: string; + age?: string; + family_friendly?: boolean; + language?: string; + video?: { + embedUrl?: string; + duration?: string; + }; + rating?: { + value: number; + scale: number; + }; + products?: Array<{ + name: string; + price?: string; + }>; + recipe?: { + ingredients?: string[]; + cookTime?: string; + }; + meta?: { + fetched?: string; + lastCrawled?: string; + }; +} + +export const searchBraveAPI = async ( + query: string, + numResults: number = 20 +): Promise<{ results: BraveSearchResult[]; originalres: any }> => { + try { + const braveApiKey = await getBraveApiKey(); + const url = new URL(`https://api.search.brave.com/res/v1/web/search`); + + url.searchParams.append('q', query); + url.searchParams.append('count', numResults.toString()); + + const res = await axios.get(url.toString(), { + headers: { + 'X-Subscription-Token': braveApiKey, + 'Accept': 'application/json' + } + }); + + if (res.data.error) { + throw new Error(`Brave API Error: ${res.data.error.message}`); + } + + const originalres = res.data; + const webResults = originalres.web?.results || []; + + const results: BraveSearchResult[] = webResults.map((item: any) => ({ + title: item.title, + url: item.url, + content: item.description, + img_src: item.thumbnail?.src || item.deep_results?.images?.[0]?.src, + age: item.age, + family_friendly: item.family_friendly, + language: item.language, + video: item.video ? { + embedUrl: item.video.embed_url, + duration: item.video.duration + } : undefined, + rating: item.rating ? { + value: item.rating.value, + scale: item.rating.scale_max + } : undefined, + products: item.deep_results?.product_cluster?.map((p: any) => ({ + name: p.name, + price: p.price + })), + recipe: item.recipe ? { + ingredients: item.recipe.ingredients, + cookTime: item.recipe.cook_time + } : undefined, + meta: { + fetched: item.meta?.fetched, + lastCrawled: item.meta?.last_crawled + } + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`Brave API Error: ${errorMessage}`); + } +}; diff --git a/src/lib/searchEngines/yacy.ts b/src/lib/searchEngines/yacy.ts new file mode 100644 index 0000000..909ca9b --- /dev/null +++ b/src/lib/searchEngines/yacy.ts @@ -0,0 +1,83 @@ +import axios from 'axios'; +import { getYacyJsonEndpoint } from '../../config'; + +interface YaCySearchResult { + channels: { + title: string; + description: string; + link: string; + image: { + url: string; + title: string; + link: string; + }; + startIndex: string; + itemsPerPage: string; + searchTerms: string; + items: { + title: string; + link: string; + code: string; + description: string; + pubDate: string; + image?: string; + size: string; + sizename: string; + guid: string; + faviconUrl: string; + host: string; + path: string; + file: string; + urlhash: string; + ranking: string; + }[]; + navigation: { + facetname: string; + displayname: string; + type: string; + min: string; + max: string; + mean: string; + elements: { + name: string; + count: string; + modifier: string; + url: string; + }[]; + }[]; + }[]; +} + + +export const searchYaCy = async ( + query: string, + numResults: number = 20 +) => { + try { + const yacyBaseUrl = getYacyJsonEndpoint(); + + const url = new URL(`${yacyBaseUrl}/yacysearch.json`); + url.searchParams.append('query', query); + url.searchParams.append('count', numResults.toString()); + + const res = await axios.get(url.toString()); + + const originalres = res.data as YaCySearchResult; + + const results = originalres.channels[0].items.map(item => ({ + title: item.title, + url: item.link, + content: item.description, + img_src: item.image || null, + pubDate: item.pubDate, + host: item.host, + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`YaCy Error: ${errorMessage}`); + } +}; diff --git a/src/routes/discover.ts b/src/routes/discover.ts index 0f47b97..ecf3099 100644 --- a/src/routes/discover.ts +++ b/src/routes/discover.ts @@ -1,6 +1,8 @@ import express from 'express'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; import { getSearchEngineBackend } from '../config'; import logger from '../utils/logger'; @@ -42,6 +44,34 @@ async function performSearch(query: string, site: string) { return searxResult.results; } + case 'brave': { + const braveResult = await searchBraveAPI(query); + return braveResult.results.map(item => ({ + title: item.title, + url: item.url, + content: item.content, + thumbnail: item.img_src, + img_src: item.img_src, + iframe_src: null, + author: item.meta?.fetched || site, + publishedDate: item.meta?.lastCrawled + })); + } + + case 'yacy': { + const yacyResult = await searchYaCy(query); + return yacyResult.results.map((item) => ({ + title: item.title, + url: item.url, + content: item.content, + thumbnail: item.img_src, + img_src: item.img_src, + iframe_src: null, + author: item?.host || site, + publishedDate: item?.pubDate + })) + } + default: throw new Error(`Unknown search engine ${searchEngine}`); } diff --git a/src/search/metaSearchAgent.ts b/src/search/metaSearchAgent.ts index 1db9af6..3c1c150 100644 --- a/src/search/metaSearchAgent.ts +++ b/src/search/metaSearchAgent.ts @@ -19,6 +19,9 @@ import { getDocumentsFromLinks } from '../utils/documents'; import { Document } from 'langchain/document'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBingAPI } from '../lib/searchEngines/bing'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; import { getSearchEngineBackend } from '../config'; import path from 'path'; import fs from 'fs'; @@ -134,7 +137,7 @@ class MetaSearchAgent implements MetaSearchAgentType { You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query. If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary. - + - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague. - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query. - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format. @@ -219,6 +222,15 @@ class MetaSearchAgent implements MetaSearchAgentType { case 'google': res = await searchGooglePSE(question); break; + case 'bing': + res = await searchBingAPI(question); + break; + case 'brave': + res = await searchBraveAPI(question); + break; + case 'yacy': + res = await searchYaCy(question); + break; default: throw new Error(`Unknown search engine ${searchEngine}`); }