From af4b97b7663490ca4ebcdb38a8ee5938e001d0f4 Mon Sep 17 00:00:00 2001 From: HadiCherkaoui Date: Fri, 28 Feb 2025 14:46:24 +0100 Subject: [PATCH] add yacy --- sample.config.toml | 5 +- src/chains/imageSearchAgent.ts | 16 +++++++ src/chains/videoSearchAgent.ts | 7 +++ src/config.ts | 5 ++ src/lib/searchEngines/yacy.ts | 83 ++++++++++++++++++++++++++++++++++ src/routes/discover.ts | 15 ++++++ src/search/metaSearchAgent.ts | 4 ++ 7 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 src/lib/searchEngines/yacy.ts diff --git a/sample.config.toml b/sample.config.toml index 2294a41..3690d70 100644 --- a/sample.config.toml +++ b/sample.config.toml @@ -2,7 +2,7 @@ PORT = 3001 # Port to run the server on SIMILARITY_MEASURE = "cosine" # "cosine" or "dot" KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m") -SEARCH_ENGINE_BACKEND = "searxng" # "google" | "searxng" | "bing" | "brave" +SEARCH_ENGINE_BACKEND = "searxng" # "google" | "searxng" | "bing" | "brave" | "yacy" [MODELS.OPENAI] API_KEY = "" @@ -35,3 +35,6 @@ SUBSCRIPTION_KEY = "" [SEARCH_ENGINES.BRAVE] API_KEY = "" + +[SEARCH_ENGINES.YACY] +ENDPOINT = "" \ No newline at end of file diff --git a/src/chains/imageSearchAgent.ts b/src/chains/imageSearchAgent.ts index 8b35de0..04fb108 100644 --- a/src/chains/imageSearchAgent.ts +++ b/src/chains/imageSearchAgent.ts @@ -10,6 +10,7 @@ import { StringOutputParser } from '@langchain/core/output_parsers'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; import { getSearchEngineBackend } from '../config'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; @@ -91,6 +92,21 @@ async function performImageSearch(query: string) { break; } + case 'yacy': { + const yacyResult = await searchYaCy(query); + images = yacyResult.results.map((result) => { + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.url + } + } + }).filter(Boolean); + break; + } + default: throw new Error(`Unknown search engine ${searchEngine}`); } diff --git a/src/chains/videoSearchAgent.ts b/src/chains/videoSearchAgent.ts index f1f2dfb..1fcbc13 100644 --- a/src/chains/videoSearchAgent.ts +++ b/src/chains/videoSearchAgent.ts @@ -10,6 +10,7 @@ import { StringOutputParser } from '@langchain/core/output_parsers'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; import { getSearchEngineBackend } from '../config'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; @@ -101,6 +102,12 @@ async function performVideoSearch(query: string) { break; } + case 'yacy': { + console.log('Not available for yacy'); + videos = []; + break; + } + default: throw new Error(`Unknown search engine ${searchEngine}`); } diff --git a/src/config.ts b/src/config.ts index 58d8f17..1bffd49 100644 --- a/src/config.ts +++ b/src/config.ts @@ -47,6 +47,9 @@ interface Config { BRAVE: { API_KEY: string; }; + YACY: { + ENDPOINT: string; + }; }; } @@ -84,6 +87,8 @@ export const getBraveApiKey = () => loadConfig().SEARCH_ENGINES.BRAVE.API_KEY; export const getBingSubscriptionKey = () => loadConfig().SEARCH_ENGINES.BING.SUBSCRIPTION_KEY; +export const getYacyJsonEndpoint = () => loadConfig().SEARCH_ENGINES.YACY.ENDPOINT; + export const getSearxngApiEndpoint = () => process.env.SEARXNG_API_URL || loadConfig().SEARCH_ENGINES.SEARXNG.ENDPOINT; diff --git a/src/lib/searchEngines/yacy.ts b/src/lib/searchEngines/yacy.ts new file mode 100644 index 0000000..909ca9b --- /dev/null +++ b/src/lib/searchEngines/yacy.ts @@ -0,0 +1,83 @@ +import axios from 'axios'; +import { getYacyJsonEndpoint } from '../../config'; + +interface YaCySearchResult { + channels: { + title: string; + description: string; + link: string; + image: { + url: string; + title: string; + link: string; + }; + startIndex: string; + itemsPerPage: string; + searchTerms: string; + items: { + title: string; + link: string; + code: string; + description: string; + pubDate: string; + image?: string; + size: string; + sizename: string; + guid: string; + faviconUrl: string; + host: string; + path: string; + file: string; + urlhash: string; + ranking: string; + }[]; + navigation: { + facetname: string; + displayname: string; + type: string; + min: string; + max: string; + mean: string; + elements: { + name: string; + count: string; + modifier: string; + url: string; + }[]; + }[]; + }[]; +} + + +export const searchYaCy = async ( + query: string, + numResults: number = 20 +) => { + try { + const yacyBaseUrl = getYacyJsonEndpoint(); + + const url = new URL(`${yacyBaseUrl}/yacysearch.json`); + url.searchParams.append('query', query); + url.searchParams.append('count', numResults.toString()); + + const res = await axios.get(url.toString()); + + const originalres = res.data as YaCySearchResult; + + const results = originalres.channels[0].items.map(item => ({ + title: item.title, + url: item.link, + content: item.description, + img_src: item.image || null, + pubDate: item.pubDate, + host: item.host, + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`YaCy Error: ${errorMessage}`); + } +}; diff --git a/src/routes/discover.ts b/src/routes/discover.ts index bb57c85..ecf3099 100644 --- a/src/routes/discover.ts +++ b/src/routes/discover.ts @@ -2,6 +2,7 @@ import express from 'express'; import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; import { getSearchEngineBackend } from '../config'; import logger from '../utils/logger'; @@ -57,6 +58,20 @@ async function performSearch(query: string, site: string) { })); } + case 'yacy': { + const yacyResult = await searchYaCy(query); + return yacyResult.results.map((item) => ({ + title: item.title, + url: item.url, + content: item.content, + thumbnail: item.img_src, + img_src: item.img_src, + iframe_src: null, + author: item?.host || site, + publishedDate: item?.pubDate + })) + } + default: throw new Error(`Unknown search engine ${searchEngine}`); } diff --git a/src/search/metaSearchAgent.ts b/src/search/metaSearchAgent.ts index 4969f8d..3c1c150 100644 --- a/src/search/metaSearchAgent.ts +++ b/src/search/metaSearchAgent.ts @@ -21,6 +21,7 @@ import { searchSearxng } from '../lib/searchEngines/searxng'; import { searchGooglePSE } from '../lib/searchEngines/google_pse'; import { searchBingAPI } from '../lib/searchEngines/bing'; import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; import { getSearchEngineBackend } from '../config'; import path from 'path'; import fs from 'fs'; @@ -227,6 +228,9 @@ class MetaSearchAgent implements MetaSearchAgentType { case 'brave': res = await searchBraveAPI(question); break; + case 'yacy': + res = await searchYaCy(question); + break; default: throw new Error(`Unknown search engine ${searchEngine}`); }