Compare commits

...

31 Commits

Author SHA1 Message Date
ItzCrazyKns
59ab10110a Merge branch 'master' of https://github.com/HadiCherkaoui/Perplexica into pr/658 2025-03-02 15:35:14 +05:30
ItzCrazyKns
10f9cd2f79 feat(app): lint & beautify 2025-03-02 15:35:08 +05:30
Hadi Cherkaoui
82e1dd73b0 add config to ui 2025-03-02 11:00:56 +01:00
Hadi Cherkaoui
728b499281 Merge branch 'ItzCrazyKns:master' into master 2025-03-01 18:16:08 +01:00
Hadi Cherkaoui
5a4dafc753 Update yarn.lock 2025-03-01 18:12:40 +01:00
Hadi Cherkaoui
4ac99786f0 Update yarn.lock 2025-03-01 18:11:34 +01:00
Hadi Cherkaoui
1224281278 Update yarn.lock 2025-03-01 18:09:57 +01:00
Hadi Cherkaoui
3daae29a5d Update package.json 2025-03-01 18:05:28 +01:00
Hadi Cherkaoui
50bcaa13f2 Revert "Delete package.json"
This reverts commit 31e4abf068d5922e389d1f9cd5dd3d7d1db0b6a8.
2025-03-01 17:58:54 +01:00
Hadi Cherkaoui
31e4abf068 Delete package.json 2025-03-01 17:57:40 +01:00
Hadi Cherkaoui
fd6e701cf0 Delete docs/installation/DEVELOPMENT.md 2025-03-01 17:49:59 +01:00
Hadi Cherkaoui
89880a2555 Delete .dev directory 2025-03-01 17:48:54 +01:00
ItzCrazyKns
07776d8699 feat(config): remove key cloak config functions 2025-03-01 22:13:38 +05:30
Hadi Cherkaoui
32fb6ac131 Update config.ts 2025-03-01 14:16:39 +01:00
Hadi Cherkaoui
99137d95e7 Update docker-compose.yaml 2025-03-01 14:15:44 +01:00
Hadi Cherkaoui
490a8db538 Update sample.config.toml 2025-03-01 14:15:21 +01:00
HadiCherkaoui
aba702c51b make it possible to configure multiple engines 2025-02-28 15:50:59 +01:00
Hadi Cherkaoui
89a6e7fbb1 Merge pull request #11 from HadiCherkaoui/add-bing
add bing
2025-02-28 15:17:40 +01:00
HadiCherkaoui
f19d2e3a97 add bing 2025-02-28 15:16:54 +01:00
Lars Erhardt
4a7ca8fc68 Add keycloak config params to sample config 2025-02-28 15:07:50 +01:00
Lars Erhardt
3d642f2539 fix sample messup 2025-02-28 14:58:16 +01:00
Lars Erhardt
aa91d3bc60 Add config params for keycloak 2025-02-28 14:56:33 +01:00
Lars Erhardt
93c5ed46f6 Merge pull request #9 from HadiCherkaoui/add-bing
Added brave, yacy and changed back the default search engine to searxng
2025-02-28 14:49:58 +01:00
HadiCherkaoui
af4b97b766 add yacy 2025-02-28 14:46:24 +01:00
HadiCherkaoui
ca86a7e358 add brave as a search engine 2025-02-28 13:11:00 +01:00
Lars Erhardt
99351fc2a6 Added multiple search backends
Add support for multiple search engines/google as a search engine
2025-02-28 09:33:50 +01:00
HadiCherkaoui
7a816efc04 fix google pse
i tested everything, also tested if invalid api key and such and regression tested searxng which still works
2025-02-28 09:13:11 +01:00
Hadi Cherkaoui
4d41243108 added multi search engine support (didnt test) WIP 2025-02-27 21:32:26 +01:00
Hadi Cherkaoui
6c218b5fee move searxng.ts and update imports 2025-02-27 18:24:54 +01:00
Hadi Cherkaoui
1c1f31e23a Update .gitignore 2025-02-27 18:21:36 +01:00
Hadi Cherkaoui
5b15bcfe17 Add config for multiple search engines 2025-02-27 18:21:00 +01:00
19 changed files with 1125 additions and 91 deletions

4
.gitignore vendored
View File

@ -2,6 +2,7 @@
node_modules/ node_modules/
npm-debug.log npm-debug.log
yarn-error.log yarn-error.log
package-lock.json
# Build output # Build output
/.next/ /.next/
@ -37,3 +38,6 @@ Thumbs.db
# Db # Db
db.sqlite db.sqlite
/searxng /searxng
# Dev
docker-compose-dev.yaml

View File

@ -2,7 +2,6 @@
[![Discord](https://dcbadge.vercel.app/api/server/26aArMy8tT?style=flat&compact=true)](https://discord.gg/26aArMy8tT) [![Discord](https://dcbadge.vercel.app/api/server/26aArMy8tT?style=flat&compact=true)](https://discord.gg/26aArMy8tT)
![preview](.assets/perplexica-screenshot.png?) ![preview](.assets/perplexica-screenshot.png?)
## Table of Contents <!-- omit in toc --> ## Table of Contents <!-- omit in toc -->

View File

@ -4,7 +4,7 @@ services:
volumes: volumes:
- ./searxng:/etc/searxng:rw - ./searxng:/etc/searxng:rw
ports: ports:
- 4000:8080 - '4000:8080'
networks: networks:
- perplexica-network - perplexica-network
restart: unless-stopped restart: unless-stopped
@ -19,7 +19,7 @@ services:
depends_on: depends_on:
- searxng - searxng
ports: ports:
- 3001:3001 - '3001:3001'
volumes: volumes:
- backend-dbstore:/home/perplexica/data - backend-dbstore:/home/perplexica/data
- uploads:/home/perplexica/uploads - uploads:/home/perplexica/uploads
@ -41,7 +41,7 @@ services:
depends_on: depends_on:
- perplexica-backend - perplexica-backend
ports: ports:
- 3000:3000 - '3000:3000'
networks: networks:
- perplexica-network - perplexica-network
restart: unless-stopped restart: unless-stopped

View File

@ -30,8 +30,8 @@
"@iarna/toml": "^2.2.5", "@iarna/toml": "^2.2.5",
"@langchain/anthropic": "^0.2.3", "@langchain/anthropic": "^0.2.3",
"@langchain/community": "^0.2.16", "@langchain/community": "^0.2.16",
"@langchain/openai": "^0.0.25",
"@langchain/google-genai": "^0.0.23", "@langchain/google-genai": "^0.0.23",
"@langchain/openai": "^0.0.25",
"@xenova/transformers": "^2.17.1", "@xenova/transformers": "^2.17.1",
"axios": "^1.6.8", "axios": "^1.6.8",
"better-sqlite3": "^11.0.0", "better-sqlite3": "^11.0.0",

View File

@ -3,6 +3,12 @@ PORT = 3001 # Port to run the server on
SIMILARITY_MEASURE = "cosine" # "cosine" or "dot" SIMILARITY_MEASURE = "cosine" # "cosine" or "dot"
KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m") KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m")
[SEARCH_ENGINE_BACKENDS] # "google" | "searxng" | "bing" | "brave" | "yacy"
SEARCH = "searxng"
IMAGE = "searxng"
VIDEO = "searxng"
NEWS = "searxng"
[MODELS.OPENAI] [MODELS.OPENAI]
API_KEY = "" API_KEY = ""
@ -22,5 +28,18 @@ API_URL = ""
[MODELS.OLLAMA] [MODELS.OLLAMA]
API_URL = "" # Ollama API URL - http://host.docker.internal:11434 API_URL = "" # Ollama API URL - http://host.docker.internal:11434
[API_ENDPOINTS] [SEARCH_ENGINES.GOOGLE]
SEARXNG = "http://localhost:32768" # SearxNG API URL API_KEY = ""
CSE_ID = ""
[SEARCH_ENGINES.SEARXNG]
ENDPOINT = ""
[SEARCH_ENGINES.BING]
SUBSCRIPTION_KEY = ""
[SEARCH_ENGINES.BRAVE]
API_KEY = ""
[SEARCH_ENGINES.YACY]
ENDPOINT = ""

View File

@ -15,3 +15,5 @@ server:
engines: engines:
- name: wolframalpha - name: wolframalpha
disabled: false disabled: false
- name: qwant
disabled: true

View File

@ -7,7 +7,12 @@ import { PromptTemplate } from '@langchain/core/prompts';
import formatChatHistoryAsString from '../utils/formatHistory'; import formatChatHistoryAsString from '../utils/formatHistory';
import { BaseMessage } from '@langchain/core/messages'; import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers'; import { StringOutputParser } from '@langchain/core/output_parsers';
import { searchSearxng } from '../lib/searxng'; import { searchSearxng } from '../lib/searchEngines/searxng';
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
import { searchBraveAPI } from '../lib/searchEngines/brave';
import { searchYaCy } from '../lib/searchEngines/yacy';
import { searchBingAPI } from '../lib/searchEngines/bing';
import { getImageSearchEngineBackend } from '../config';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
const imageSearchChainPrompt = ` const imageSearchChainPrompt = `
@ -36,6 +41,103 @@ type ImageSearchChainInput = {
query: string; query: string;
}; };
async function performImageSearch(query: string) {
const searchEngine = getImageSearchEngineBackend();
let images = [];
switch (searchEngine) {
case 'google': {
const googleResult = await searchGooglePSE(query);
images = googleResult.results
.map((result) => {
if (result.img_src && result.url && result.title) {
return {
img_src: result.img_src,
url: result.url,
title: result.title,
source: result.displayLink,
};
}
})
.filter(Boolean);
break;
}
case 'searxng': {
const searxResult = await searchSearxng(query, {
engines: ['google images', 'bing images'],
pageno: 1,
});
searxResult.results.forEach((result) => {
if (result.img_src && result.url && result.title) {
images.push({
img_src: result.img_src,
url: result.url,
title: result.title,
});
}
});
break;
}
case 'brave': {
const braveResult = await searchBraveAPI(query);
images = braveResult.results
.map((result) => {
if (result.img_src && result.url && result.title) {
return {
img_src: result.img_src,
url: result.url,
title: result.title,
source: result.url,
};
}
})
.filter(Boolean);
break;
}
case 'yacy': {
const yacyResult = await searchYaCy(query);
images = yacyResult.results
.map((result) => {
if (result.img_src && result.url && result.title) {
return {
img_src: result.img_src,
url: result.url,
title: result.title,
source: result.url,
};
}
})
.filter(Boolean);
break;
}
case 'bing': {
const bingResult = await searchBingAPI(query);
images = bingResult.results
.map((result) => {
if (result.img_src && result.url && result.title) {
return {
img_src: result.img_src,
url: result.url,
title: result.title,
source: result.url,
};
}
})
.filter(Boolean);
break;
}
default:
throw new Error(`Unknown search engine ${searchEngine}`);
}
return images;
}
const strParser = new StringOutputParser(); const strParser = new StringOutputParser();
const createImageSearchChain = (llm: BaseChatModel) => { const createImageSearchChain = (llm: BaseChatModel) => {
@ -52,22 +154,7 @@ const createImageSearchChain = (llm: BaseChatModel) => {
llm, llm,
strParser, strParser,
RunnableLambda.from(async (input: string) => { RunnableLambda.from(async (input: string) => {
const res = await searchSearxng(input, { const images = await performImageSearch(input);
engines: ['bing images', 'google images'],
});
const images = [];
res.results.forEach((result) => {
if (result.img_src && result.url && result.title) {
images.push({
img_src: result.img_src,
url: result.url,
title: result.title,
});
}
});
return images.slice(0, 10); return images.slice(0, 10);
}), }),
]); ]);

View File

@ -7,26 +7,30 @@ import { PromptTemplate } from '@langchain/core/prompts';
import formatChatHistoryAsString from '../utils/formatHistory'; import formatChatHistoryAsString from '../utils/formatHistory';
import { BaseMessage } from '@langchain/core/messages'; import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers'; import { StringOutputParser } from '@langchain/core/output_parsers';
import { searchSearxng } from '../lib/searxng'; import { searchSearxng } from '../lib/searchEngines/searxng';
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
import { searchBraveAPI } from '../lib/searchEngines/brave';
import { searchBingAPI } from '../lib/searchEngines/bing';
import { getVideoSearchEngineBackend } from '../config';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
const VideoSearchChainPrompt = ` const VideoSearchChainPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos. You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos.
You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
Example: Example:
1. Follow up question: How does a car work? 1. Follow up question: How does a car work?
Rephrased: How does a car work? Rephrased: How does a car work?
2. Follow up question: What is the theory of relativity? 2. Follow up question: What is the theory of relativity?
Rephrased: What is theory of relativity Rephrased: What is theory of relativity
3. Follow up question: How does an AC work? 3. Follow up question: How does an AC work?
Rephrased: How does an AC work Rephrased: How does an AC work
Conversation: Conversation:
{chat_history} {chat_history}
Follow up question: {query} Follow up question: {query}
Rephrased question: Rephrased question:
`; `;
@ -38,6 +42,102 @@ type VideoSearchChainInput = {
const strParser = new StringOutputParser(); const strParser = new StringOutputParser();
async function performVideoSearch(query: string) {
const searchEngine = getVideoSearchEngineBackend();
const youtubeQuery = `${query} site:youtube.com`;
let videos = [];
switch (searchEngine) {
case 'google': {
const googleResult = await searchGooglePSE(youtubeQuery);
googleResult.results.forEach((result) => {
// Use .results instead of .originalres
if (result.img_src && result.url && result.title) {
const videoId = new URL(result.url).searchParams.get('v');
videos.push({
img_src: result.img_src,
url: result.url,
title: result.title,
iframe_src: videoId
? `https://www.youtube.com/embed/${videoId}`
: null,
});
}
});
break;
}
case 'searxng': {
const searxResult = await searchSearxng(query, {
engines: ['youtube'],
});
searxResult.results.forEach((result) => {
if (
result.thumbnail &&
result.url &&
result.title &&
result.iframe_src
) {
videos.push({
img_src: result.thumbnail,
url: result.url,
title: result.title,
iframe_src: result.iframe_src,
});
}
});
break;
}
case 'brave': {
const braveResult = await searchBraveAPI(youtubeQuery);
braveResult.results.forEach((result) => {
if (result.img_src && result.url && result.title) {
const videoId = new URL(result.url).searchParams.get('v');
videos.push({
img_src: result.img_src,
url: result.url,
title: result.title,
iframe_src: videoId
? `https://www.youtube.com/embed/${videoId}`
: null,
});
}
});
break;
}
case 'yacy': {
console.log('Not available for yacy');
videos = [];
break;
}
case 'bing': {
const bingResult = await searchBingAPI(youtubeQuery);
bingResult.results.forEach((result) => {
if (result.img_src && result.url && result.title) {
const videoId = new URL(result.url).searchParams.get('v');
videos.push({
img_src: result.img_src,
url: result.url,
title: result.title,
iframe_src: videoId
? `https://www.youtube.com/embed/${videoId}`
: null,
});
}
});
break;
}
default:
throw new Error(`Unknown search engine ${searchEngine}`);
}
return videos;
}
const createVideoSearchChain = (llm: BaseChatModel) => { const createVideoSearchChain = (llm: BaseChatModel) => {
return RunnableSequence.from([ return RunnableSequence.from([
RunnableMap.from({ RunnableMap.from({
@ -52,28 +152,7 @@ const createVideoSearchChain = (llm: BaseChatModel) => {
llm, llm,
strParser, strParser,
RunnableLambda.from(async (input: string) => { RunnableLambda.from(async (input: string) => {
const res = await searchSearxng(input, { const videos = await performVideoSearch(input);
engines: ['youtube'],
});
const videos = [];
res.results.forEach((result) => {
if (
result.thumbnail &&
result.url &&
result.title &&
result.iframe_src
) {
videos.push({
img_src: result.thumbnail,
url: result.url,
title: result.title,
iframe_src: result.iframe_src,
});
}
});
return videos.slice(0, 10); return videos.slice(0, 10);
}), }),
]); ]);

View File

@ -10,6 +10,12 @@ interface Config {
SIMILARITY_MEASURE: string; SIMILARITY_MEASURE: string;
KEEP_ALIVE: string; KEEP_ALIVE: string;
}; };
SEARCH_ENGINE_BACKENDS: {
SEARCH: string;
IMAGE: string;
VIDEO: string;
NEWS: string;
};
MODELS: { MODELS: {
OPENAI: { OPENAI: {
API_KEY: string; API_KEY: string;
@ -32,8 +38,23 @@ interface Config {
MODEL_NAME: string; MODEL_NAME: string;
}; };
}; };
API_ENDPOINTS: { SEARCH_ENGINES: {
SEARXNG: string; GOOGLE: {
API_KEY: string;
CSE_ID: string;
};
SEARXNG: {
ENDPOINT: string;
};
BING: {
SUBSCRIPTION_KEY: string;
};
BRAVE: {
API_KEY: string;
};
YACY: {
ENDPOINT: string;
};
}; };
} }
@ -61,8 +82,32 @@ export const getAnthropicApiKey = () => loadConfig().MODELS.ANTHROPIC.API_KEY;
export const getGeminiApiKey = () => loadConfig().MODELS.GEMINI.API_KEY; export const getGeminiApiKey = () => loadConfig().MODELS.GEMINI.API_KEY;
export const getSearchEngineBackend = () =>
loadConfig().SEARCH_ENGINE_BACKENDS.SEARCH;
export const getImageSearchEngineBackend = () =>
loadConfig().SEARCH_ENGINE_BACKENDS.IMAGE || getSearchEngineBackend();
export const getVideoSearchEngineBackend = () =>
loadConfig().SEARCH_ENGINE_BACKENDS.VIDEO || getSearchEngineBackend();
export const getNewsSearchEngineBackend = () =>
loadConfig().SEARCH_ENGINE_BACKENDS.NEWS || getSearchEngineBackend();
export const getGoogleApiKey = () => loadConfig().SEARCH_ENGINES.GOOGLE.API_KEY;
export const getGoogleCseId = () => loadConfig().SEARCH_ENGINES.GOOGLE.CSE_ID;
export const getBraveApiKey = () => loadConfig().SEARCH_ENGINES.BRAVE.API_KEY;
export const getBingSubscriptionKey = () =>
loadConfig().SEARCH_ENGINES.BING.SUBSCRIPTION_KEY;
export const getYacyJsonEndpoint = () =>
loadConfig().SEARCH_ENGINES.YACY.ENDPOINT;
export const getSearxngApiEndpoint = () => export const getSearxngApiEndpoint = () =>
process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG; process.env.SEARXNG_API_URL || loadConfig().SEARCH_ENGINES.SEARXNG.ENDPOINT;
export const getOllamaApiEndpoint = () => loadConfig().MODELS.OLLAMA.API_URL; export const getOllamaApiEndpoint = () => loadConfig().MODELS.OLLAMA.API_URL;

View File

@ -0,0 +1,105 @@
import axios from 'axios';
import { getBingSubscriptionKey } from '../../config';
interface BingAPISearchResult {
_type: string;
name: string;
url: string;
displayUrl: string;
snippet?: string;
dateLastCrawled?: string;
thumbnailUrl?: string;
contentUrl?: string;
hostPageUrl?: string;
width?: number;
height?: number;
accentColor?: string;
contentSize?: string;
datePublished?: string;
encodingFormat?: string;
hostPageDisplayUrl?: string;
id?: string;
isLicensed?: boolean;
isFamilyFriendly?: boolean;
language?: string;
mediaUrl?: string;
motionThumbnailUrl?: string;
publisher?: string;
viewCount?: number;
webSearchUrl?: string;
primaryImageOfPage?: {
thumbnailUrl?: string;
width?: number;
height?: number;
};
video?: {
allowHttpsEmbed?: boolean;
embedHtml?: string;
allowMobileEmbed?: boolean;
viewCount?: number;
duration?: string;
};
image?: {
thumbnail?: {
contentUrl?: string;
width?: number;
height?: number;
};
imageInsightsToken?: string;
imageId?: string;
};
}
export const searchBingAPI = async (query: string) => {
try {
const bingApiKey = await getBingSubscriptionKey();
const url = new URL(`https://api.cognitive.microsoft.com/bing/v7.0/search`);
url.searchParams.append('q', query);
url.searchParams.append('responseFilter', 'Webpages,Images,Videos');
const res = await axios.get(url.toString(), {
headers: {
'Ocp-Apim-Subscription-Key': bingApiKey,
Accept: 'application/json',
},
});
if (res.data.error) {
throw new Error(`Bing API Error: ${res.data.error.message}`);
}
const originalres = res.data;
// Extract web, image, and video results
const webResults = originalres.webPages?.value || [];
const imageResults = originalres.images?.value || [];
const videoResults = originalres.videos?.value || [];
const results = webResults.map((item: BingAPISearchResult) => ({
title: item.name,
url: item.url,
content: item.snippet,
img_src:
item.primaryImageOfPage?.thumbnailUrl ||
imageResults.find((img: any) => img.hostPageUrl === item.url)
?.thumbnailUrl ||
videoResults.find((vid: any) => vid.hostPageUrl === item.url)
?.thumbnailUrl,
...(item.video && {
videoData: {
duration: item.video.duration,
embedUrl: item.video.embedHtml?.match(/src="(.*?)"/)?.[1],
},
publisher: item.publisher,
datePublished: item.datePublished,
}),
}));
return { results, originalres };
} catch (error) {
const errorMessage = error.response?.data
? JSON.stringify(error.response.data, null, 2)
: error.message || 'Unknown error';
throw new Error(`Bing API Error: ${errorMessage}`);
}
};

View File

@ -0,0 +1,102 @@
import axios from 'axios';
import { getBraveApiKey } from '../../config';
interface BraveSearchResult {
title: string;
url: string;
content?: string;
img_src?: string;
age?: string;
family_friendly?: boolean;
language?: string;
video?: {
embedUrl?: string;
duration?: string;
};
rating?: {
value: number;
scale: number;
};
products?: Array<{
name: string;
price?: string;
}>;
recipe?: {
ingredients?: string[];
cookTime?: string;
};
meta?: {
fetched?: string;
lastCrawled?: string;
};
}
export const searchBraveAPI = async (
query: string,
numResults: number = 20,
): Promise<{ results: BraveSearchResult[]; originalres: any }> => {
try {
const braveApiKey = await getBraveApiKey();
const url = new URL(`https://api.search.brave.com/res/v1/web/search`);
url.searchParams.append('q', query);
url.searchParams.append('count', numResults.toString());
const res = await axios.get(url.toString(), {
headers: {
'X-Subscription-Token': braveApiKey,
Accept: 'application/json',
},
});
if (res.data.error) {
throw new Error(`Brave API Error: ${res.data.error.message}`);
}
const originalres = res.data;
const webResults = originalres.web?.results || [];
const results: BraveSearchResult[] = webResults.map((item: any) => ({
title: item.title,
url: item.url,
content: item.description,
img_src: item.thumbnail?.src || item.deep_results?.images?.[0]?.src,
age: item.age,
family_friendly: item.family_friendly,
language: item.language,
video: item.video
? {
embedUrl: item.video.embed_url,
duration: item.video.duration,
}
: undefined,
rating: item.rating
? {
value: item.rating.value,
scale: item.rating.scale_max,
}
: undefined,
products: item.deep_results?.product_cluster?.map((p: any) => ({
name: p.name,
price: p.price,
})),
recipe: item.recipe
? {
ingredients: item.recipe.ingredients,
cookTime: item.recipe.cook_time,
}
: undefined,
meta: {
fetched: item.meta?.fetched,
lastCrawled: item.meta?.last_crawled,
},
}));
return { results, originalres };
} catch (error) {
const errorMessage = error.response?.data
? JSON.stringify(error.response.data, null, 2)
: error.message || 'Unknown error';
throw new Error(`Brave API Error: ${errorMessage}`);
}
};

View File

@ -0,0 +1,85 @@
import axios from 'axios';
import { getGoogleApiKey, getGoogleCseId } from '../../config';
interface GooglePSESearchResult {
kind: string;
title: string;
htmlTitle: string;
link: string;
displayLink: string;
snippet?: string;
htmlSnippet?: string;
cacheId?: string;
formattedUrl: string;
htmlFormattedUrl: string;
pagemap?: {
videoobject: any;
cse_thumbnail?: Array<{
src: string;
width: string;
height: string;
}>;
metatags?: Array<{
[key: string]: string;
author?: string;
}>;
cse_image?: Array<{
src: string;
}>;
};
fileFormat?: string;
image?: {
contextLink: string;
thumbnailLink: string;
};
mime?: string;
labels?: Array<{
name: string;
displayName: string;
}>;
}
export const searchGooglePSE = async (query: string) => {
try {
const [googleApiKey, googleCseID] = await Promise.all([
getGoogleApiKey(),
getGoogleCseId(),
]);
const url = new URL(`https://www.googleapis.com/customsearch/v1`);
url.searchParams.append('q', query);
url.searchParams.append('cx', googleCseID);
url.searchParams.append('key', googleApiKey);
const res = await axios.get(url.toString());
if (res.data.error) {
throw new Error(`Google PSE Error: ${res.data.error.message}`);
}
const originalres = res.data.items;
const results = originalres.map((item: GooglePSESearchResult) => ({
title: item.title,
url: item.link,
content: item.snippet,
img_src:
item.pagemap?.cse_image?.[0]?.src ||
item.pagemap?.cse_thumbnail?.[0]?.src ||
item.image?.thumbnailLink,
...(item.pagemap?.videoobject?.[0] && {
videoData: {
duration: item.pagemap.videoobject[0].duration,
embedUrl: item.pagemap.videoobject[0].embedurl,
},
}),
}));
return { results, originalres };
} catch (error) {
const errorMessage = error.response?.data
? JSON.stringify(error.response.data, null, 2)
: error.message || 'Unknown error';
throw new Error(`Google PSE Error: ${errorMessage}`);
}
};

View File

@ -1,5 +1,5 @@
import axios from 'axios'; import axios from 'axios';
import { getSearxngApiEndpoint } from '../config'; import { getSearxngApiEndpoint } from '../../config';
interface SearxngSearchOptions { interface SearxngSearchOptions {
categories?: string[]; categories?: string[];

View File

@ -0,0 +1,79 @@
import axios from 'axios';
import { getYacyJsonEndpoint } from '../../config';
interface YaCySearchResult {
channels: {
title: string;
description: string;
link: string;
image: {
url: string;
title: string;
link: string;
};
startIndex: string;
itemsPerPage: string;
searchTerms: string;
items: {
title: string;
link: string;
code: string;
description: string;
pubDate: string;
image?: string;
size: string;
sizename: string;
guid: string;
faviconUrl: string;
host: string;
path: string;
file: string;
urlhash: string;
ranking: string;
}[];
navigation: {
facetname: string;
displayname: string;
type: string;
min: string;
max: string;
mean: string;
elements: {
name: string;
count: string;
modifier: string;
url: string;
}[];
}[];
}[];
}
export const searchYaCy = async (query: string, numResults: number = 20) => {
try {
const yacyBaseUrl = getYacyJsonEndpoint();
const url = new URL(`${yacyBaseUrl}/yacysearch.json`);
url.searchParams.append('query', query);
url.searchParams.append('count', numResults.toString());
const res = await axios.get(url.toString());
const originalres = res.data as YaCySearchResult;
const results = originalres.channels[0].items.map((item) => ({
title: item.title,
url: item.link,
content: item.description,
img_src: item.image || null,
pubDate: item.pubDate,
host: item.host,
}));
return { results, originalres };
} catch (error) {
const errorMessage = error.response?.data
? JSON.stringify(error.response.data, null, 2)
: error.message || 'Unknown error';
throw new Error(`YaCy Error: ${errorMessage}`);
}
};

View File

@ -13,6 +13,16 @@ import {
getCustomOpenaiApiUrl, getCustomOpenaiApiUrl,
getCustomOpenaiApiKey, getCustomOpenaiApiKey,
getCustomOpenaiModelName, getCustomOpenaiModelName,
getSearchEngineBackend,
getImageSearchEngineBackend,
getVideoSearchEngineBackend,
getNewsSearchEngineBackend,
getSearxngApiEndpoint,
getGoogleApiKey,
getGoogleCseId,
getBingSubscriptionKey,
getBraveApiKey,
getYacyJsonEndpoint,
} from '../config'; } from '../config';
import logger from '../utils/logger'; import logger from '../utils/logger';
@ -60,6 +70,21 @@ router.get('/', async (_, res) => {
config['customOpenaiApiUrl'] = getCustomOpenaiApiUrl(); config['customOpenaiApiUrl'] = getCustomOpenaiApiUrl();
config['customOpenaiApiKey'] = getCustomOpenaiApiKey(); config['customOpenaiApiKey'] = getCustomOpenaiApiKey();
config['customOpenaiModelName'] = getCustomOpenaiModelName(); config['customOpenaiModelName'] = getCustomOpenaiModelName();
// Add search engine configuration
config['searchEngineBackends'] = {
search: getSearchEngineBackend(),
image: getImageSearchEngineBackend(),
video: getVideoSearchEngineBackend(),
news: getNewsSearchEngineBackend(),
};
config['searxngEndpoint'] = getSearxngApiEndpoint();
config['googleApiKey'] = getGoogleApiKey();
config['googleCseId'] = getGoogleCseId();
config['bingSubscriptionKey'] = getBingSubscriptionKey();
config['braveApiKey'] = getBraveApiKey();
config['yacyEndpoint'] = getYacyJsonEndpoint();
res.status(200).json(config); res.status(200).json(config);
} catch (err: any) { } catch (err: any) {
@ -94,6 +119,30 @@ router.post('/', async (req, res) => {
MODEL_NAME: config.customOpenaiModelName, MODEL_NAME: config.customOpenaiModelName,
}, },
}, },
SEARCH_ENGINE_BACKENDS: config.searchEngineBackends ? {
SEARCH: config.searchEngineBackends.search,
IMAGE: config.searchEngineBackends.image,
VIDEO: config.searchEngineBackends.video,
NEWS: config.searchEngineBackends.news,
} : undefined,
SEARCH_ENGINES: {
GOOGLE: {
API_KEY: config.googleApiKey,
CSE_ID: config.googleCseId,
},
SEARXNG: {
ENDPOINT: config.searxngEndpoint,
},
BING: {
SUBSCRIPTION_KEY: config.bingSubscriptionKey,
},
BRAVE: {
API_KEY: config.braveApiKey,
},
YACY: {
ENDPOINT: config.yacyEndpoint,
},
},
}; };
updateConfig(updatedConfig); updateConfig(updatedConfig);

View File

@ -1,42 +1,125 @@
import express from 'express'; import express from 'express';
import { searchSearxng } from '../lib/searxng'; import { searchSearxng } from '../lib/searchEngines/searxng';
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
import { searchBraveAPI } from '../lib/searchEngines/brave';
import { searchYaCy } from '../lib/searchEngines/yacy';
import { searchBingAPI } from '../lib/searchEngines/bing';
import { getNewsSearchEngineBackend } from '../config';
import logger from '../utils/logger'; import logger from '../utils/logger';
const router = express.Router(); const router = express.Router();
async function performSearch(query: string, site: string) {
const searchEngine = getNewsSearchEngineBackend();
switch (searchEngine) {
case 'google': {
const googleResult = await searchGooglePSE(query);
return googleResult.originalres.map((item) => {
const imageSources = [
item.pagemap?.cse_image?.[0]?.src,
item.pagemap?.cse_thumbnail?.[0]?.src,
item.pagemap?.metatags?.[0]?.['og:image'],
item.pagemap?.metatags?.[0]?.['twitter:image'],
item.pagemap?.metatags?.[0]?.['image'],
].filter(Boolean); // Remove undefined values
return {
title: item.title,
url: item.link,
content: item.snippet,
thumbnail: imageSources[0], // First available image
img_src: imageSources[0], // Same as thumbnail for consistency
iframe_src: null,
author: item.pagemap?.metatags?.[0]?.['og:site_name'] || site,
publishedDate:
item.pagemap?.metatags?.[0]?.['article:published_time'],
};
});
}
case 'searxng': {
const searxResult = await searchSearxng(query, {
engines: ['bing news'],
pageno: 1,
});
return searxResult.results;
}
case 'brave': {
const braveResult = await searchBraveAPI(query);
return braveResult.results.map((item) => ({
title: item.title,
url: item.url,
content: item.content,
thumbnail: item.img_src,
img_src: item.img_src,
iframe_src: null,
author: item.meta?.fetched || site,
publishedDate: item.meta?.lastCrawled,
}));
}
case 'yacy': {
const yacyResult = await searchYaCy(query);
return yacyResult.results.map((item) => ({
title: item.title,
url: item.url,
content: item.content,
thumbnail: item.img_src,
img_src: item.img_src,
iframe_src: null,
author: item?.host || site,
publishedDate: item?.pubDate,
}));
}
case 'bing': {
const bingResult = await searchBingAPI(query);
return bingResult.results.map((item) => ({
title: item.title,
url: item.url,
content: item.content,
thumbnail: item.img_src,
img_src: item.img_src,
iframe_src: null,
author: item?.publisher || site,
publishedDate: item?.datePublished,
}));
}
default:
throw new Error(`Unknown search engine ${searchEngine}`);
}
}
router.get('/', async (req, res) => { router.get('/', async (req, res) => {
try { try {
const queries = [
{ site: 'businessinsider.com', topic: 'AI' },
{ site: 'www.exchangewire.com', topic: 'AI' },
{ site: 'yahoo.com', topic: 'AI' },
{ site: 'businessinsider.com', topic: 'tech' },
{ site: 'www.exchangewire.com', topic: 'tech' },
{ site: 'yahoo.com', topic: 'tech' },
];
const data = ( const data = (
await Promise.all([ await Promise.all(
searchSearxng('site:businessinsider.com AI', { queries.map(async ({ site, topic }) => {
engines: ['bing news'], try {
pageno: 1, const query = `site:${site} ${topic}`;
return await performSearch(query, site);
} catch (error) {
logger.error(`Error searching ${site}: ${error.message}`);
return [];
}
}), }),
searchSearxng('site:www.exchangewire.com AI', { )
engines: ['bing news'],
pageno: 1,
}),
searchSearxng('site:yahoo.com AI', {
engines: ['bing news'],
pageno: 1,
}),
searchSearxng('site:businessinsider.com tech', {
engines: ['bing news'],
pageno: 1,
}),
searchSearxng('site:www.exchangewire.com tech', {
engines: ['bing news'],
pageno: 1,
}),
searchSearxng('site:yahoo.com tech', {
engines: ['bing news'],
pageno: 1,
}),
])
) )
.map((result) => result.results)
.flat() .flat()
.sort(() => Math.random() - 0.5); .sort(() => Math.random() - 0.5)
.filter((item) => item.title && item.url && item.content);
return res.json({ blogs: data }); return res.json({ blogs: data });
} catch (err: any) { } catch (err: any) {

View File

@ -17,7 +17,12 @@ import LineListOutputParser from '../lib/outputParsers/listLineOutputParser';
import LineOutputParser from '../lib/outputParsers/lineOutputParser'; import LineOutputParser from '../lib/outputParsers/lineOutputParser';
import { getDocumentsFromLinks } from '../utils/documents'; import { getDocumentsFromLinks } from '../utils/documents';
import { Document } from 'langchain/document'; import { Document } from 'langchain/document';
import { searchSearxng } from '../lib/searxng'; import { searchSearxng } from '../lib/searchEngines/searxng';
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
import { searchBingAPI } from '../lib/searchEngines/bing';
import { searchBraveAPI } from '../lib/searchEngines/brave';
import { searchYaCy } from '../lib/searchEngines/yacy';
import { getSearchEngineBackend } from '../config';
import path from 'path'; import path from 'path';
import fs from 'fs'; import fs from 'fs';
import computeSimilarity from '../utils/computeSimilarity'; import computeSimilarity from '../utils/computeSimilarity';
@ -132,7 +137,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the
text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query. text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary. If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
- **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague. - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague.
- **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query. - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query.
- **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format. - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format.
@ -203,10 +208,37 @@ class MetaSearchAgent implements MetaSearchAgentType {
return { query: question, docs: docs }; return { query: question, docs: docs };
} else { } else {
const res = await searchSearxng(question, { const searchEngine = getSearchEngineBackend();
language: 'en',
engines: this.config.activeEngines, let res;
}); switch (searchEngine) {
case 'searxng':
res = await searchSearxng(question, {
language: 'en',
engines: this.config.activeEngines,
});
break;
case 'google':
res = await searchGooglePSE(question);
break;
case 'bing':
res = await searchBingAPI(question);
break;
case 'brave':
res = await searchBraveAPI(question);
break;
case 'yacy':
res = await searchYaCy(question);
break;
default:
throw new Error(`Unknown search engine ${searchEngine}`);
}
if (!res?.results) {
throw new Error(
`No results found for search engine: ${searchEngine}`,
);
}
const documents = res.results.map( const documents = res.results.map(
(result) => (result) =>

View File

@ -23,6 +23,18 @@ interface SettingsType {
customOpenaiApiKey: string; customOpenaiApiKey: string;
customOpenaiApiUrl: string; customOpenaiApiUrl: string;
customOpenaiModelName: string; customOpenaiModelName: string;
searchEngineBackends: {
search: string;
image: string;
video: string;
news: string;
};
searxngEndpoint: string;
googleApiKey: string;
googleCseId: string;
bingSubscriptionKey: string;
braveApiKey: string;
yacyEndpoint: string;
} }
interface InputProps extends React.InputHTMLAttributes<HTMLInputElement> { interface InputProps extends React.InputHTMLAttributes<HTMLInputElement> {
@ -112,6 +124,12 @@ const Page = () => {
const [automaticImageSearch, setAutomaticImageSearch] = useState(false); const [automaticImageSearch, setAutomaticImageSearch] = useState(false);
const [automaticVideoSearch, setAutomaticVideoSearch] = useState(false); const [automaticVideoSearch, setAutomaticVideoSearch] = useState(false);
const [savingStates, setSavingStates] = useState<Record<string, boolean>>({}); const [savingStates, setSavingStates] = useState<Record<string, boolean>>({});
const [searchEngineBackends, setSearchEngineBackends] = useState({
search: '',
image: '',
video: '',
news: '',
});
useEffect(() => { useEffect(() => {
const fetchConfig = async () => { const fetchConfig = async () => {
@ -125,6 +143,16 @@ const Page = () => {
const data = (await res.json()) as SettingsType; const data = (await res.json()) as SettingsType;
setConfig(data); setConfig(data);
// Set search engine backends if they exist in the response
if (data.searchEngineBackends) {
setSearchEngineBackends({
search: data.searchEngineBackends.search || '',
image: data.searchEngineBackends.image || '',
video: data.searchEngineBackends.video || '',
news: data.searchEngineBackends.news || '',
});
}
const chatModelProvidersKeys = Object.keys(data.chatModelProviders || {}); const chatModelProvidersKeys = Object.keys(data.chatModelProviders || {});
const embeddingModelProvidersKeys = Object.keys( const embeddingModelProvidersKeys = Object.keys(
data.embeddingModelProviders || {}, data.embeddingModelProviders || {},
@ -331,6 +359,8 @@ const Page = () => {
localStorage.setItem('embeddingModelProvider', value); localStorage.setItem('embeddingModelProvider', value);
} else if (key === 'embeddingModel') { } else if (key === 'embeddingModel') {
localStorage.setItem('embeddingModel', value); localStorage.setItem('embeddingModel', value);
} else if (key === 'searchEngineBackends') {
localStorage.setItem('searchEngineBackends', value);
} }
} catch (err) { } catch (err) {
console.error('Failed to save:', err); console.error('Failed to save:', err);
@ -793,6 +823,234 @@ const Page = () => {
</div> </div>
</div> </div>
</SettingsSection> </SettingsSection>
<SettingsSection title="Search Engine Settings">
<div className="flex flex-col space-y-4">
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
Default Search Engine
</p>
<Select
value={searchEngineBackends.search}
onChange={(e) => {
const value = e.target.value;
setSearchEngineBackends((prev) => ({
...prev,
search: value,
}));
saveConfig('searchEngineBackends', {
...searchEngineBackends,
search: value,
});
}}
options={[
{ value: 'searxng', label: 'SearXNG' },
{ value: 'google', label: 'Google' },
{ value: 'bing', label: 'Bing' },
{ value: 'brave', label: 'Brave' },
{ value: 'yacy', label: 'YaCy' },
]}
/>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
Image Search Engine
</p>
<Select
value={searchEngineBackends.image}
onChange={(e) => {
const value = e.target.value;
setSearchEngineBackends((prev) => ({
...prev,
image: value,
}));
saveConfig('searchEngineBackends', {
...searchEngineBackends,
image: value,
});
}}
options={[
{ value: '', label: 'Use Default Search Engine' },
{ value: 'searxng', label: 'SearXNG' },
{ value: 'google', label: 'Google' },
{ value: 'bing', label: 'Bing' },
{ value: 'brave', label: 'Brave' },
]}
/>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
Video Search Engine
</p>
<Select
value={searchEngineBackends.video}
onChange={(e) => {
const value = e.target.value;
setSearchEngineBackends((prev) => ({
...prev,
video: value,
}));
saveConfig('searchEngineBackends', {
...searchEngineBackends,
video: value,
});
}}
options={[
{ value: '', label: 'Use Default Search Engine' },
{ value: 'searxng', label: 'SearXNG' },
{ value: 'google', label: 'Google' },
{ value: 'bing', label: 'Bing' },
{ value: 'brave', label: 'Brave' },
]}
/>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
News Search Engine
</p>
<Select
value={searchEngineBackends.news}
onChange={(e) => {
const value = e.target.value;
setSearchEngineBackends((prev) => ({
...prev,
news: value,
}));
saveConfig('searchEngineBackends', {
...searchEngineBackends,
news: value,
});
}}
options={[
{ value: '', label: 'Use Default Search Engine' },
{ value: 'searxng', label: 'SearXNG' },
{ value: 'google', label: 'Google' },
{ value: 'bing', label: 'Bing' },
{ value: 'brave', label: 'Brave' },
]}
/>
</div>
<div className="pt-4 border-t border-light-200 dark:border-dark-200">
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
SearXNG Endpoint
</p>
<Input
type="text"
placeholder="SearXNG API Endpoint"
value={config.searxngEndpoint || ''}
isSaving={savingStates['searxngEndpoint']}
onChange={(e) => {
setConfig((prev) => ({
...prev!,
searxngEndpoint: e.target.value,
}));
}}
onSave={(value) => saveConfig('searxngEndpoint', value)}
/>
</div>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
Google API Key
</p>
<Input
type="text"
placeholder="Google API Key"
value={config.googleApiKey || ''}
isSaving={savingStates['googleApiKey']}
onChange={(e) => {
setConfig((prev) => ({
...prev!,
googleApiKey: e.target.value,
}));
}}
onSave={(value) => saveConfig('googleApiKey', value)}
/>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
Google CSE ID
</p>
<Input
type="text"
placeholder="Google Custom Search Engine ID"
value={config.googleCseId || ''}
isSaving={savingStates['googleCseId']}
onChange={(e) => {
setConfig((prev) => ({
...prev!,
googleCseId: e.target.value,
}));
}}
onSave={(value) => saveConfig('googleCseId', value)}
/>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
Bing Subscription Key
</p>
<Input
type="text"
placeholder="Bing Subscription Key"
value={config.bingSubscriptionKey || ''}
isSaving={savingStates['bingSubscriptionKey']}
onChange={(e) => {
setConfig((prev) => ({
...prev!,
bingSubscriptionKey: e.target.value,
}));
}}
onSave={(value) => saveConfig('bingSubscriptionKey', value)}
/>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
Brave API Key
</p>
<Input
type="text"
placeholder="Brave API Key"
value={config.braveApiKey || ''}
isSaving={savingStates['braveApiKey']}
onChange={(e) => {
setConfig((prev) => ({
...prev!,
braveApiKey: e.target.value,
}));
}}
onSave={(value) => saveConfig('braveApiKey', value)}
/>
</div>
<div className="flex flex-col space-y-1">
<p className="text-black/70 dark:text-white/70 text-sm">
YaCy Endpoint
</p>
<Input
type="text"
placeholder="YaCy API Endpoint"
value={config.yacyEndpoint || ''}
isSaving={savingStates['yacyEndpoint']}
onChange={(e) => {
setConfig((prev) => ({
...prev!,
yacyEndpoint: e.target.value,
}));
}}
onSave={(value) => saveConfig('yacyEndpoint', value)}
/>
</div>
</div>
</SettingsSection>
</div> </div>
) )
)} )}

View File

@ -68,7 +68,13 @@ const MessageBox = ({
return ( return (
<div> <div>
{message.role === 'user' && ( {message.role === 'user' && (
<div className={cn('w-full', messageIndex === 0 ? 'pt-16' : 'pt-8', 'break-words')}> <div
className={cn(
'w-full',
messageIndex === 0 ? 'pt-16' : 'pt-8',
'break-words',
)}
>
<h2 className="text-black dark:text-white font-medium text-3xl lg:w-9/12"> <h2 className="text-black dark:text-white font-medium text-3xl lg:w-9/12">
{message.content} {message.content}
</h2> </h2>