fix sample messup

This commit is contained in:
Lars Erhardt
2025-02-28 14:58:16 +01:00
9 changed files with 417 additions and 22 deletions

View File

@ -2,12 +2,7 @@
PORT = 3001 # Port to run the server on
SIMILARITY_MEASURE = "cosine" # "cosine" or "dot"
KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m")
SEARCH_ENGINE_BACKEND = "searxng" # "google" | "searxng" | "ddg" | "bing" | "brave"
[KEYCLOAK]
URL = ""
REALM = ""
CLIENT_ID = ""
SEARCH_ENGINE_BACKEND = "searxng" # "google" | "searxng" | "bing" | "brave" | "yacy"
[MODELS.OPENAI]
API_KEY = ""
@ -40,3 +35,6 @@ SUBSCRIPTION_KEY = ""
[SEARCH_ENGINES.BRAVE]
API_KEY = ""
[SEARCH_ENGINES.YACY]
ENDPOINT = ""

View File

@ -9,6 +9,8 @@ import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { searchSearxng } from '../lib/searchEngines/searxng';
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
import { searchBraveAPI } from '../lib/searchEngines/brave';
import { searchYaCy } from '../lib/searchEngines/yacy';
import { getSearchEngineBackend } from '../config';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
@ -46,16 +48,15 @@ async function performImageSearch(query: string) {
case 'google': {
const googleResult = await searchGooglePSE(query);
images = googleResult.results.map((result) => {
if (result.img_src && result.url && result.title) {
return {
img_src: result.img_src,
url: result.url,
title: result.title,
source: result.displayLink
};
}
})
.filter(Boolean);
if (result.img_src && result.url && result.title) {
return {
img_src: result.img_src,
url: result.url,
title: result.title,
source: result.displayLink
};
}
}).filter(Boolean);
break;
}
@ -76,6 +77,36 @@ async function performImageSearch(query: string) {
break;
}
case 'brave': {
const braveResult = await searchBraveAPI(query);
images = braveResult.results.map((result) => {
if (result.img_src && result.url && result.title) {
return {
img_src: result.img_src,
url: result.url,
title: result.title,
source: result.url
};
}
}).filter(Boolean);
break;
}
case 'yacy': {
const yacyResult = await searchYaCy(query);
images = yacyResult.results.map((result) => {
if (result.img_src && result.url && result.title) {
return {
img_src: result.img_src,
url: result.url,
title: result.title,
source: result.url
}
}
}).filter(Boolean);
break;
}
default:
throw new Error(`Unknown search engine ${searchEngine}`);
}

View File

@ -9,26 +9,28 @@ import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { searchSearxng } from '../lib/searchEngines/searxng';
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
import { searchBraveAPI } from '../lib/searchEngines/brave';
import { searchYaCy } from '../lib/searchEngines/yacy';
import { getSearchEngineBackend } from '../config';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
const VideoSearchChainPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos.
You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
Example:
1. Follow up question: How does a car work?
Rephrased: How does a car work?
2. Follow up question: What is the theory of relativity?
Rephrased: What is theory of relativity
3. Follow up question: How does an AC work?
Rephrased: How does an AC work
Conversation:
{chat_history}
Follow up question: {query}
Rephrased question:
`;
@ -84,6 +86,28 @@ async function performVideoSearch(query: string) {
break;
}
case 'brave': {
const braveResult = await searchBraveAPI(youtubeQuery);
braveResult.results.forEach((result) => {
if (result.img_src && result.url && result.title) {
const videoId = new URL(result.url).searchParams.get('v');
videos.push({
img_src: result.img_src,
url: result.url,
title: result.title,
iframe_src: videoId ? `https://www.youtube.com/embed/${videoId}` : null
});
}
});
break;
}
case 'yacy': {
console.log('Not available for yacy');
videos = [];
break;
}
default:
throw new Error(`Unknown search engine ${searchEngine}`);
}

View File

@ -52,6 +52,9 @@ interface Config {
BRAVE: {
API_KEY: string;
};
YACY: {
ENDPOINT: string;
};
};
}
@ -91,6 +94,8 @@ export const getBraveApiKey = () => loadConfig().SEARCH_ENGINES.BRAVE.API_KEY;
export const getBingSubscriptionKey = () =>
loadConfig().SEARCH_ENGINES.BING.SUBSCRIPTION_KEY;
export const getYacyJsonEndpoint = () => loadConfig().SEARCH_ENGINES.YACY.ENDPOINT;
export const getSearxngApiEndpoint = () =>
process.env.SEARXNG_API_URL || loadConfig().SEARCH_ENGINES.SEARXNG.ENDPOINT;

View File

@ -0,0 +1,116 @@
import axios from 'axios';
import { getBingSubscriptionKey } from '../../config';
interface BingAPISearchResult {
_type: string;
name: string;
url: string;
displayUrl: string;
snippet?: string;
dateLastCrawled?: string;
thumbnailUrl?: string;
contentUrl?: string;
hostPageUrl?: string;
width?: number;
height?: number;
accentColor?: string;
contentSize?: string;
datePublished?: string;
encodingFormat?: string;
hostPageDisplayUrl?: string;
id?: string;
isLicensed?: boolean;
isFamilyFriendly?: boolean;
language?: string;
mediaUrl?: string;
motionThumbnailUrl?: string;
publisher?: string;
viewCount?: number;
webSearchUrl?: string;
primaryImageOfPage?: {
thumbnailUrl?: string;
width?: number;
height?: number;
};
provider?: Array<{
name: string;
_type: string;
}>;
video?: {
allowHttpsEmbed?: boolean;
embedHtml?: string;
allowMobileEmbed?: boolean;
viewCount?: number;
};
image?: {
thumbnail?: {
contentUrl?: string;
width?: number;
height?: number;
};
imageInsightsToken?: string;
imageId?: string;
};
metatags?: Array<{
[key: string]: string;
'og:type'?: string;
'og:image'?: string;
'og:video'?: string;
}>;
mentions?: Array<{
name: string;
}>;
entity?: {
entityPresentationInfo?: {
entityTypeHints?: string[];
};
};
}
export const searchBingAPI = async (query: string) => {
try {
const bingApiKey = await getBingSubscriptionKey();
const url = new URL(`https://api.cognitive.microsoft.com/bing/v7.0/search`);
url.searchParams.append('q', query);
url.searchParams.append('responseFilter', 'Webpages,Images,Videos,News');
const res = await axios.get(url.toString(), {
headers: {
'Ocp-Apim-Subscription-Key': bingApiKey,
'Accept': 'application/json'
}
});
if (res.data.error) {
throw new Error(`Bing API Error: ${res.data.error.message}`);
}
const originalres = res.data;
const webResults = originalres.webPages?.value || [];
const imageResults = originalres.images?.value || [];
const videoResults = originalres.videos?.value || [];
const results = webResults.map((item: any) => ({
title: item.name,
url: item.url,
content: item.snippet,
img_src: item.primaryImageOfPage?.thumbnailUrl
|| imageResults.find((img: any) => img.hostPageUrl === item.url)?.thumbnailUrl
|| videoResults.find((vid: any) => vid.hostPageUrl === item.url)?.thumbnailUrl,
...(item.video && {
videoData: {
duration: item.video.duration,
embedUrl: item.video.embedHtml?.match(/src="(.*?)"/)?.[1]
}
})
}));
return { results, originalres };
} catch (error) {
const errorMessage = error.response?.data
? JSON.stringify(error.response.data, null, 2)
: error.message || 'Unknown error';
throw new Error(`Bing API Error: ${errorMessage}`);
}
};

View File

@ -0,0 +1,96 @@
import axios from 'axios';
import { getBraveApiKey } from '../../config';
interface BraveSearchResult {
title: string;
url: string;
content?: string;
img_src?: string;
age?: string;
family_friendly?: boolean;
language?: string;
video?: {
embedUrl?: string;
duration?: string;
};
rating?: {
value: number;
scale: number;
};
products?: Array<{
name: string;
price?: string;
}>;
recipe?: {
ingredients?: string[];
cookTime?: string;
};
meta?: {
fetched?: string;
lastCrawled?: string;
};
}
export const searchBraveAPI = async (
query: string,
numResults: number = 20
): Promise<{ results: BraveSearchResult[]; originalres: any }> => {
try {
const braveApiKey = await getBraveApiKey();
const url = new URL(`https://api.search.brave.com/res/v1/web/search`);
url.searchParams.append('q', query);
url.searchParams.append('count', numResults.toString());
const res = await axios.get(url.toString(), {
headers: {
'X-Subscription-Token': braveApiKey,
'Accept': 'application/json'
}
});
if (res.data.error) {
throw new Error(`Brave API Error: ${res.data.error.message}`);
}
const originalres = res.data;
const webResults = originalres.web?.results || [];
const results: BraveSearchResult[] = webResults.map((item: any) => ({
title: item.title,
url: item.url,
content: item.description,
img_src: item.thumbnail?.src || item.deep_results?.images?.[0]?.src,
age: item.age,
family_friendly: item.family_friendly,
language: item.language,
video: item.video ? {
embedUrl: item.video.embed_url,
duration: item.video.duration
} : undefined,
rating: item.rating ? {
value: item.rating.value,
scale: item.rating.scale_max
} : undefined,
products: item.deep_results?.product_cluster?.map((p: any) => ({
name: p.name,
price: p.price
})),
recipe: item.recipe ? {
ingredients: item.recipe.ingredients,
cookTime: item.recipe.cook_time
} : undefined,
meta: {
fetched: item.meta?.fetched,
lastCrawled: item.meta?.last_crawled
}
}));
return { results, originalres };
} catch (error) {
const errorMessage = error.response?.data
? JSON.stringify(error.response.data, null, 2)
: error.message || 'Unknown error';
throw new Error(`Brave API Error: ${errorMessage}`);
}
};

View File

@ -0,0 +1,83 @@
import axios from 'axios';
import { getYacyJsonEndpoint } from '../../config';
interface YaCySearchResult {
channels: {
title: string;
description: string;
link: string;
image: {
url: string;
title: string;
link: string;
};
startIndex: string;
itemsPerPage: string;
searchTerms: string;
items: {
title: string;
link: string;
code: string;
description: string;
pubDate: string;
image?: string;
size: string;
sizename: string;
guid: string;
faviconUrl: string;
host: string;
path: string;
file: string;
urlhash: string;
ranking: string;
}[];
navigation: {
facetname: string;
displayname: string;
type: string;
min: string;
max: string;
mean: string;
elements: {
name: string;
count: string;
modifier: string;
url: string;
}[];
}[];
}[];
}
export const searchYaCy = async (
query: string,
numResults: number = 20
) => {
try {
const yacyBaseUrl = getYacyJsonEndpoint();
const url = new URL(`${yacyBaseUrl}/yacysearch.json`);
url.searchParams.append('query', query);
url.searchParams.append('count', numResults.toString());
const res = await axios.get(url.toString());
const originalres = res.data as YaCySearchResult;
const results = originalres.channels[0].items.map(item => ({
title: item.title,
url: item.link,
content: item.description,
img_src: item.image || null,
pubDate: item.pubDate,
host: item.host,
}));
return { results, originalres };
} catch (error) {
const errorMessage = error.response?.data
? JSON.stringify(error.response.data, null, 2)
: error.message || 'Unknown error';
throw new Error(`YaCy Error: ${errorMessage}`);
}
};

View File

@ -1,6 +1,8 @@
import express from 'express';
import { searchSearxng } from '../lib/searchEngines/searxng';
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
import { searchBraveAPI } from '../lib/searchEngines/brave';
import { searchYaCy } from '../lib/searchEngines/yacy';
import { getSearchEngineBackend } from '../config';
import logger from '../utils/logger';
@ -42,6 +44,34 @@ async function performSearch(query: string, site: string) {
return searxResult.results;
}
case 'brave': {
const braveResult = await searchBraveAPI(query);
return braveResult.results.map(item => ({
title: item.title,
url: item.url,
content: item.content,
thumbnail: item.img_src,
img_src: item.img_src,
iframe_src: null,
author: item.meta?.fetched || site,
publishedDate: item.meta?.lastCrawled
}));
}
case 'yacy': {
const yacyResult = await searchYaCy(query);
return yacyResult.results.map((item) => ({
title: item.title,
url: item.url,
content: item.content,
thumbnail: item.img_src,
img_src: item.img_src,
iframe_src: null,
author: item?.host || site,
publishedDate: item?.pubDate
}))
}
default:
throw new Error(`Unknown search engine ${searchEngine}`);
}

View File

@ -19,6 +19,9 @@ import { getDocumentsFromLinks } from '../utils/documents';
import { Document } from 'langchain/document';
import { searchSearxng } from '../lib/searchEngines/searxng';
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
import { searchBingAPI } from '../lib/searchEngines/bing';
import { searchBraveAPI } from '../lib/searchEngines/brave';
import { searchYaCy } from '../lib/searchEngines/yacy';
import { getSearchEngineBackend } from '../config';
import path from 'path';
import fs from 'fs';
@ -134,7 +137,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the
text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
- **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague.
- **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query.
- **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format.
@ -219,6 +222,15 @@ class MetaSearchAgent implements MetaSearchAgentType {
case 'google':
res = await searchGooglePSE(question);
break;
case 'bing':
res = await searchBingAPI(question);
break;
case 'brave':
res = await searchBraveAPI(question);
break;
case 'yacy':
res = await searchYaCy(question);
break;
default:
throw new Error(`Unknown search engine ${searchEngine}`);
}