mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-09-16 22:31:32 +00:00
Compare commits
28 Commits
feat/azure
...
728b499281
Author | SHA1 | Date | |
---|---|---|---|
|
728b499281 | ||
|
5a4dafc753 | ||
|
4ac99786f0 | ||
|
1224281278 | ||
|
3daae29a5d | ||
|
50bcaa13f2 | ||
|
31e4abf068 | ||
|
fd6e701cf0 | ||
|
89880a2555 | ||
|
07776d8699 | ||
|
32fb6ac131 | ||
|
99137d95e7 | ||
|
490a8db538 | ||
|
aba702c51b | ||
|
89a6e7fbb1 | ||
|
f19d2e3a97 | ||
|
4a7ca8fc68 | ||
|
3d642f2539 | ||
|
aa91d3bc60 | ||
|
93c5ed46f6 | ||
|
af4b97b766 | ||
|
ca86a7e358 | ||
|
99351fc2a6 | ||
|
7a816efc04 | ||
|
4d41243108 | ||
|
6c218b5fee | ||
|
1c1f31e23a | ||
|
5b15bcfe17 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -2,6 +2,7 @@
|
||||
node_modules/
|
||||
npm-debug.log
|
||||
yarn-error.log
|
||||
package-lock.json
|
||||
|
||||
# Build output
|
||||
/.next/
|
||||
@@ -37,3 +38,6 @@ Thumbs.db
|
||||
# Db
|
||||
db.sqlite
|
||||
/searxng
|
||||
|
||||
# Dev
|
||||
docker-compose-dev.yaml
|
||||
|
@@ -4,7 +4,7 @@ services:
|
||||
volumes:
|
||||
- ./searxng:/etc/searxng:rw
|
||||
ports:
|
||||
- 4000:8080
|
||||
- "4000:8080"
|
||||
networks:
|
||||
- perplexica-network
|
||||
restart: unless-stopped
|
||||
@@ -19,7 +19,7 @@ services:
|
||||
depends_on:
|
||||
- searxng
|
||||
ports:
|
||||
- 3001:3001
|
||||
- "3001:3001"
|
||||
volumes:
|
||||
- backend-dbstore:/home/perplexica/data
|
||||
- uploads:/home/perplexica/uploads
|
||||
@@ -41,7 +41,7 @@ services:
|
||||
depends_on:
|
||||
- perplexica-backend
|
||||
ports:
|
||||
- 3000:3000
|
||||
- "3000:3000"
|
||||
networks:
|
||||
- perplexica-network
|
||||
restart: unless-stopped
|
||||
|
@@ -30,8 +30,8 @@
|
||||
"@iarna/toml": "^2.2.5",
|
||||
"@langchain/anthropic": "^0.2.3",
|
||||
"@langchain/community": "^0.2.16",
|
||||
"@langchain/openai": "^0.0.25",
|
||||
"@langchain/google-genai": "^0.0.23",
|
||||
"@langchain/openai": "^0.0.25",
|
||||
"@xenova/transformers": "^2.17.1",
|
||||
"axios": "^1.6.8",
|
||||
"better-sqlite3": "^11.0.0",
|
||||
|
@@ -3,6 +3,12 @@ PORT = 3001 # Port to run the server on
|
||||
SIMILARITY_MEASURE = "cosine" # "cosine" or "dot"
|
||||
KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m")
|
||||
|
||||
[SEARCH_ENGINE_BACKENDS] # "google" | "searxng" | "bing" | "brave" | "yacy"
|
||||
SEARCH = "searxng"
|
||||
IMAGE = "searxng"
|
||||
VIDEO = "searxng"
|
||||
NEWS = "searxng"
|
||||
|
||||
[MODELS.OPENAI]
|
||||
API_KEY = ""
|
||||
|
||||
@@ -22,5 +28,18 @@ API_URL = ""
|
||||
[MODELS.OLLAMA]
|
||||
API_URL = "" # Ollama API URL - http://host.docker.internal:11434
|
||||
|
||||
[API_ENDPOINTS]
|
||||
SEARXNG = "http://localhost:32768" # SearxNG API URL
|
||||
[SEARCH_ENGINES.GOOGLE]
|
||||
API_KEY = ""
|
||||
CSE_ID = ""
|
||||
|
||||
[SEARCH_ENGINES.SEARXNG]
|
||||
ENDPOINT = ""
|
||||
|
||||
[SEARCH_ENGINES.BING]
|
||||
SUBSCRIPTION_KEY = ""
|
||||
|
||||
[SEARCH_ENGINES.BRAVE]
|
||||
API_KEY = ""
|
||||
|
||||
[SEARCH_ENGINES.YACY]
|
||||
ENDPOINT = ""
|
||||
|
@@ -15,3 +15,5 @@ server:
|
||||
engines:
|
||||
- name: wolframalpha
|
||||
disabled: false
|
||||
- name: qwant
|
||||
disabled: true
|
||||
|
@@ -7,7 +7,12 @@ import { PromptTemplate } from '@langchain/core/prompts';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import { BaseMessage } from '@langchain/core/messages';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { searchSearxng } from '../lib/searxng';
|
||||
import { searchSearxng } from '../lib/searchEngines/searxng';
|
||||
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
|
||||
import { searchBraveAPI } from '../lib/searchEngines/brave';
|
||||
import { searchYaCy } from '../lib/searchEngines/yacy';
|
||||
import { searchBingAPI } from '../lib/searchEngines/bing';
|
||||
import { getImageSearchEngineBackend } from '../config';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
|
||||
const imageSearchChainPrompt = `
|
||||
@@ -36,6 +41,95 @@ type ImageSearchChainInput = {
|
||||
query: string;
|
||||
};
|
||||
|
||||
async function performImageSearch(query: string) {
|
||||
const searchEngine = getImageSearchEngineBackend();
|
||||
let images = [];
|
||||
|
||||
switch (searchEngine) {
|
||||
case 'google': {
|
||||
const googleResult = await searchGooglePSE(query);
|
||||
images = googleResult.results.map((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
return {
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
source: result.displayLink
|
||||
};
|
||||
}
|
||||
}).filter(Boolean);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'searxng': {
|
||||
const searxResult = await searchSearxng(query, {
|
||||
engines: ['google images', 'bing images'],
|
||||
pageno: 1,
|
||||
});
|
||||
searxResult.results.forEach((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
images.push({
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
});
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case 'brave': {
|
||||
const braveResult = await searchBraveAPI(query);
|
||||
images = braveResult.results.map((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
return {
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
source: result.url
|
||||
};
|
||||
}
|
||||
}).filter(Boolean);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'yacy': {
|
||||
const yacyResult = await searchYaCy(query);
|
||||
images = yacyResult.results.map((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
return {
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
source: result.url
|
||||
}
|
||||
}
|
||||
}).filter(Boolean);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'bing': {
|
||||
const bingResult = await searchBingAPI(query);
|
||||
images = bingResult.results.map((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
return {
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
source: result.url
|
||||
}
|
||||
}
|
||||
}).filter(Boolean);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw new Error(`Unknown search engine ${searchEngine}`);
|
||||
}
|
||||
|
||||
return images;
|
||||
}
|
||||
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
const createImageSearchChain = (llm: BaseChatModel) => {
|
||||
@@ -52,22 +146,7 @@ const createImageSearchChain = (llm: BaseChatModel) => {
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
const res = await searchSearxng(input, {
|
||||
engines: ['bing images', 'google images'],
|
||||
});
|
||||
|
||||
const images = [];
|
||||
|
||||
res.results.forEach((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
images.push({
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
const images = await performImageSearch(input);
|
||||
return images.slice(0, 10);
|
||||
}),
|
||||
]);
|
||||
|
@@ -7,26 +7,30 @@ import { PromptTemplate } from '@langchain/core/prompts';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import { BaseMessage } from '@langchain/core/messages';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { searchSearxng } from '../lib/searxng';
|
||||
import { searchSearxng } from '../lib/searchEngines/searxng';
|
||||
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
|
||||
import { searchBraveAPI } from '../lib/searchEngines/brave';
|
||||
import { searchBingAPI } from '../lib/searchEngines/bing';
|
||||
import { getVideoSearchEngineBackend } from '../config';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
|
||||
const VideoSearchChainPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos.
|
||||
You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
|
||||
|
||||
|
||||
Example:
|
||||
1. Follow up question: How does a car work?
|
||||
Rephrased: How does a car work?
|
||||
|
||||
|
||||
2. Follow up question: What is the theory of relativity?
|
||||
Rephrased: What is theory of relativity
|
||||
|
||||
|
||||
3. Follow up question: How does an AC work?
|
||||
Rephrased: How does an AC work
|
||||
|
||||
|
||||
Conversation:
|
||||
{chat_history}
|
||||
|
||||
|
||||
Follow up question: {query}
|
||||
Rephrased question:
|
||||
`;
|
||||
@@ -38,6 +42,95 @@ type VideoSearchChainInput = {
|
||||
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
async function performVideoSearch(query: string) {
|
||||
const searchEngine = getVideoSearchEngineBackend();
|
||||
const youtubeQuery = `${query} site:youtube.com`;
|
||||
let videos = [];
|
||||
|
||||
switch (searchEngine) {
|
||||
case 'google': {
|
||||
const googleResult = await searchGooglePSE(youtubeQuery);
|
||||
googleResult.results.forEach((result) => { // Use .results instead of .originalres
|
||||
if (result.img_src && result.url && result.title) {
|
||||
const videoId = new URL(result.url).searchParams.get('v');
|
||||
videos.push({
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
iframe_src: videoId ? `https://www.youtube.com/embed/${videoId}` : null
|
||||
});
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case 'searxng': {
|
||||
const searxResult = await searchSearxng(query, {
|
||||
engines: ['youtube'],
|
||||
});
|
||||
searxResult.results.forEach((result) => {
|
||||
if (
|
||||
result.thumbnail &&
|
||||
result.url &&
|
||||
result.title &&
|
||||
result.iframe_src
|
||||
) {
|
||||
videos.push({
|
||||
img_src: result.thumbnail,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
iframe_src: result.iframe_src,
|
||||
});
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case 'brave': {
|
||||
const braveResult = await searchBraveAPI(youtubeQuery);
|
||||
braveResult.results.forEach((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
const videoId = new URL(result.url).searchParams.get('v');
|
||||
videos.push({
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
iframe_src: videoId ? `https://www.youtube.com/embed/${videoId}` : null
|
||||
});
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case 'yacy': {
|
||||
console.log('Not available for yacy');
|
||||
videos = [];
|
||||
break;
|
||||
}
|
||||
|
||||
case 'bing': {
|
||||
const bingResult = await searchBingAPI(youtubeQuery);
|
||||
bingResult.results.forEach((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
const videoId = new URL(result.url).searchParams.get('v');
|
||||
videos.push({
|
||||
img_src: result.img_src,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
iframe_src: videoId ? `https://www.youtube.com/embed/${videoId}` : null
|
||||
});
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw new Error(`Unknown search engine ${searchEngine}`);
|
||||
}
|
||||
|
||||
return videos;
|
||||
}
|
||||
|
||||
const createVideoSearchChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
@@ -52,28 +145,7 @@ const createVideoSearchChain = (llm: BaseChatModel) => {
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
const res = await searchSearxng(input, {
|
||||
engines: ['youtube'],
|
||||
});
|
||||
|
||||
const videos = [];
|
||||
|
||||
res.results.forEach((result) => {
|
||||
if (
|
||||
result.thumbnail &&
|
||||
result.url &&
|
||||
result.title &&
|
||||
result.iframe_src
|
||||
) {
|
||||
videos.push({
|
||||
img_src: result.thumbnail,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
iframe_src: result.iframe_src,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
const videos = await performVideoSearch(input);
|
||||
return videos.slice(0, 10);
|
||||
}),
|
||||
]);
|
||||
@@ -87,4 +159,4 @@ const handleVideoSearch = (
|
||||
return VideoSearchChain.invoke(input);
|
||||
};
|
||||
|
||||
export default handleVideoSearch;
|
||||
export default handleVideoSearch;
|
@@ -10,6 +10,12 @@ interface Config {
|
||||
SIMILARITY_MEASURE: string;
|
||||
KEEP_ALIVE: string;
|
||||
};
|
||||
SEARCH_ENGINE_BACKENDS: {
|
||||
SEARCH: string;
|
||||
IMAGE: string;
|
||||
VIDEO: string;
|
||||
NEWS: string;
|
||||
}
|
||||
MODELS: {
|
||||
OPENAI: {
|
||||
API_KEY: string;
|
||||
@@ -32,8 +38,23 @@ interface Config {
|
||||
MODEL_NAME: string;
|
||||
};
|
||||
};
|
||||
API_ENDPOINTS: {
|
||||
SEARXNG: string;
|
||||
SEARCH_ENGINES: {
|
||||
GOOGLE: {
|
||||
API_KEY: string;
|
||||
CSE_ID: string;
|
||||
};
|
||||
SEARXNG: {
|
||||
ENDPOINT: string;
|
||||
};
|
||||
BING: {
|
||||
SUBSCRIPTION_KEY: string;
|
||||
};
|
||||
BRAVE: {
|
||||
API_KEY: string;
|
||||
};
|
||||
YACY: {
|
||||
ENDPOINT: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -61,8 +82,31 @@ export const getAnthropicApiKey = () => loadConfig().MODELS.ANTHROPIC.API_KEY;
|
||||
|
||||
export const getGeminiApiKey = () => loadConfig().MODELS.GEMINI.API_KEY;
|
||||
|
||||
export const getSearchEngineBackend = () =>
|
||||
loadConfig().SEARCH_ENGINE_BACKENDS.SEARCH;
|
||||
|
||||
export const getImageSearchEngineBackend = () =>
|
||||
loadConfig().SEARCH_ENGINE_BACKENDS.IMAGE || getSearchEngineBackend();
|
||||
|
||||
export const getVideoSearchEngineBackend = () =>
|
||||
loadConfig().SEARCH_ENGINE_BACKENDS.VIDEO || getSearchEngineBackend();
|
||||
|
||||
export const getNewsSearchEngineBackend = () =>
|
||||
loadConfig().SEARCH_ENGINE_BACKENDS.NEWS || getSearchEngineBackend();
|
||||
|
||||
export const getGoogleApiKey = () => loadConfig().SEARCH_ENGINES.GOOGLE.API_KEY;
|
||||
|
||||
export const getGoogleCseId = () => loadConfig().SEARCH_ENGINES.GOOGLE.CSE_ID;
|
||||
|
||||
export const getBraveApiKey = () => loadConfig().SEARCH_ENGINES.BRAVE.API_KEY;
|
||||
|
||||
export const getBingSubscriptionKey = () =>
|
||||
loadConfig().SEARCH_ENGINES.BING.SUBSCRIPTION_KEY;
|
||||
|
||||
export const getYacyJsonEndpoint = () => loadConfig().SEARCH_ENGINES.YACY.ENDPOINT;
|
||||
|
||||
export const getSearxngApiEndpoint = () =>
|
||||
process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG;
|
||||
process.env.SEARXNG_API_URL || loadConfig().SEARCH_ENGINES.SEARXNG.ENDPOINT;
|
||||
|
||||
export const getOllamaApiEndpoint = () => loadConfig().MODELS.OLLAMA.API_URL;
|
||||
|
||||
|
102
src/lib/searchEngines/bing.ts
Normal file
102
src/lib/searchEngines/bing.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
import axios from 'axios';
|
||||
import { getBingSubscriptionKey } from '../../config';
|
||||
|
||||
interface BingAPISearchResult {
|
||||
_type: string;
|
||||
name: string;
|
||||
url: string;
|
||||
displayUrl: string;
|
||||
snippet?: string;
|
||||
dateLastCrawled?: string;
|
||||
thumbnailUrl?: string;
|
||||
contentUrl?: string;
|
||||
hostPageUrl?: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
accentColor?: string;
|
||||
contentSize?: string;
|
||||
datePublished?: string;
|
||||
encodingFormat?: string;
|
||||
hostPageDisplayUrl?: string;
|
||||
id?: string;
|
||||
isLicensed?: boolean;
|
||||
isFamilyFriendly?: boolean;
|
||||
language?: string;
|
||||
mediaUrl?: string;
|
||||
motionThumbnailUrl?: string;
|
||||
publisher?: string;
|
||||
viewCount?: number;
|
||||
webSearchUrl?: string;
|
||||
primaryImageOfPage?: {
|
||||
thumbnailUrl?: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
};
|
||||
video?: {
|
||||
allowHttpsEmbed?: boolean;
|
||||
embedHtml?: string;
|
||||
allowMobileEmbed?: boolean;
|
||||
viewCount?: number;
|
||||
duration?: string;
|
||||
};
|
||||
image?: {
|
||||
thumbnail?: {
|
||||
contentUrl?: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
};
|
||||
imageInsightsToken?: string;
|
||||
imageId?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export const searchBingAPI = async (query: string) => {
|
||||
try {
|
||||
const bingApiKey = await getBingSubscriptionKey();
|
||||
const url = new URL(`https://api.cognitive.microsoft.com/bing/v7.0/search`);
|
||||
url.searchParams.append('q', query);
|
||||
url.searchParams.append('responseFilter', 'Webpages,Images,Videos');
|
||||
|
||||
const res = await axios.get(url.toString(), {
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': bingApiKey,
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
});
|
||||
|
||||
if (res.data.error) {
|
||||
throw new Error(`Bing API Error: ${res.data.error.message}`);
|
||||
}
|
||||
|
||||
const originalres = res.data;
|
||||
|
||||
// Extract web, image, and video results
|
||||
const webResults = originalres.webPages?.value || [];
|
||||
const imageResults = originalres.images?.value || [];
|
||||
const videoResults = originalres.videos?.value || [];
|
||||
|
||||
const results = webResults.map((item: BingAPISearchResult) => ({
|
||||
title: item.name,
|
||||
url: item.url,
|
||||
content: item.snippet,
|
||||
img_src: item.primaryImageOfPage?.thumbnailUrl
|
||||
|| imageResults.find((img: any) => img.hostPageUrl === item.url)?.thumbnailUrl
|
||||
|| videoResults.find((vid: any) => vid.hostPageUrl === item.url)?.thumbnailUrl,
|
||||
...(item.video && {
|
||||
videoData: {
|
||||
duration: item.video.duration,
|
||||
embedUrl: item.video.embedHtml?.match(/src="(.*?)"/)?.[1]
|
||||
},
|
||||
publisher: item.publisher,
|
||||
datePublished: item.datePublished
|
||||
})
|
||||
}));
|
||||
|
||||
return { results, originalres };
|
||||
} catch (error) {
|
||||
const errorMessage = error.response?.data
|
||||
? JSON.stringify(error.response.data, null, 2)
|
||||
: error.message || 'Unknown error';
|
||||
throw new Error(`Bing API Error: ${errorMessage}`);
|
||||
}
|
||||
};
|
96
src/lib/searchEngines/brave.ts
Normal file
96
src/lib/searchEngines/brave.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
import axios from 'axios';
|
||||
import { getBraveApiKey } from '../../config';
|
||||
|
||||
interface BraveSearchResult {
|
||||
title: string;
|
||||
url: string;
|
||||
content?: string;
|
||||
img_src?: string;
|
||||
age?: string;
|
||||
family_friendly?: boolean;
|
||||
language?: string;
|
||||
video?: {
|
||||
embedUrl?: string;
|
||||
duration?: string;
|
||||
};
|
||||
rating?: {
|
||||
value: number;
|
||||
scale: number;
|
||||
};
|
||||
products?: Array<{
|
||||
name: string;
|
||||
price?: string;
|
||||
}>;
|
||||
recipe?: {
|
||||
ingredients?: string[];
|
||||
cookTime?: string;
|
||||
};
|
||||
meta?: {
|
||||
fetched?: string;
|
||||
lastCrawled?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export const searchBraveAPI = async (
|
||||
query: string,
|
||||
numResults: number = 20
|
||||
): Promise<{ results: BraveSearchResult[]; originalres: any }> => {
|
||||
try {
|
||||
const braveApiKey = await getBraveApiKey();
|
||||
const url = new URL(`https://api.search.brave.com/res/v1/web/search`);
|
||||
|
||||
url.searchParams.append('q', query);
|
||||
url.searchParams.append('count', numResults.toString());
|
||||
|
||||
const res = await axios.get(url.toString(), {
|
||||
headers: {
|
||||
'X-Subscription-Token': braveApiKey,
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
});
|
||||
|
||||
if (res.data.error) {
|
||||
throw new Error(`Brave API Error: ${res.data.error.message}`);
|
||||
}
|
||||
|
||||
const originalres = res.data;
|
||||
const webResults = originalres.web?.results || [];
|
||||
|
||||
const results: BraveSearchResult[] = webResults.map((item: any) => ({
|
||||
title: item.title,
|
||||
url: item.url,
|
||||
content: item.description,
|
||||
img_src: item.thumbnail?.src || item.deep_results?.images?.[0]?.src,
|
||||
age: item.age,
|
||||
family_friendly: item.family_friendly,
|
||||
language: item.language,
|
||||
video: item.video ? {
|
||||
embedUrl: item.video.embed_url,
|
||||
duration: item.video.duration
|
||||
} : undefined,
|
||||
rating: item.rating ? {
|
||||
value: item.rating.value,
|
||||
scale: item.rating.scale_max
|
||||
} : undefined,
|
||||
products: item.deep_results?.product_cluster?.map((p: any) => ({
|
||||
name: p.name,
|
||||
price: p.price
|
||||
})),
|
||||
recipe: item.recipe ? {
|
||||
ingredients: item.recipe.ingredients,
|
||||
cookTime: item.recipe.cook_time
|
||||
} : undefined,
|
||||
meta: {
|
||||
fetched: item.meta?.fetched,
|
||||
lastCrawled: item.meta?.last_crawled
|
||||
}
|
||||
}));
|
||||
|
||||
return { results, originalres };
|
||||
} catch (error) {
|
||||
const errorMessage = error.response?.data
|
||||
? JSON.stringify(error.response.data, null, 2)
|
||||
: error.message || 'Unknown error';
|
||||
throw new Error(`Brave API Error: ${errorMessage}`);
|
||||
}
|
||||
};
|
84
src/lib/searchEngines/google_pse.ts
Normal file
84
src/lib/searchEngines/google_pse.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import axios from 'axios';
|
||||
import { getGoogleApiKey, getGoogleCseId } from '../../config';
|
||||
|
||||
interface GooglePSESearchResult {
|
||||
kind: string;
|
||||
title: string;
|
||||
htmlTitle: string;
|
||||
link: string;
|
||||
displayLink: string;
|
||||
snippet?: string;
|
||||
htmlSnippet?: string;
|
||||
cacheId?: string;
|
||||
formattedUrl: string;
|
||||
htmlFormattedUrl: string;
|
||||
pagemap?: {
|
||||
videoobject: any;
|
||||
cse_thumbnail?: Array<{
|
||||
src: string;
|
||||
width: string;
|
||||
height: string;
|
||||
}>;
|
||||
metatags?: Array<{
|
||||
[key: string]: string;
|
||||
'author'?: string;
|
||||
}>;
|
||||
cse_image?: Array<{
|
||||
src: string;
|
||||
}>;
|
||||
};
|
||||
fileFormat?: string;
|
||||
image?: {
|
||||
contextLink: string;
|
||||
thumbnailLink: string;
|
||||
};
|
||||
mime?: string;
|
||||
labels?: Array<{
|
||||
name: string;
|
||||
displayName: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
export const searchGooglePSE = async (query: string) => {
|
||||
try {
|
||||
const [googleApiKey, googleCseID] = await Promise.all([
|
||||
getGoogleApiKey(),
|
||||
getGoogleCseId()
|
||||
]);
|
||||
|
||||
const url = new URL(`https://www.googleapis.com/customsearch/v1`);
|
||||
url.searchParams.append('q', query);
|
||||
url.searchParams.append('cx', googleCseID);
|
||||
url.searchParams.append('key', googleApiKey);
|
||||
|
||||
const res = await axios.get(url.toString());
|
||||
|
||||
if (res.data.error) {
|
||||
throw new Error(`Google PSE Error: ${res.data.error.message}`);
|
||||
}
|
||||
|
||||
const originalres = res.data.items;
|
||||
|
||||
const results = originalres.map((item: GooglePSESearchResult) => ({
|
||||
title: item.title,
|
||||
url: item.link,
|
||||
content: item.snippet,
|
||||
img_src: item.pagemap?.cse_image?.[0]?.src
|
||||
|| item.pagemap?.cse_thumbnail?.[0]?.src
|
||||
|| item.image?.thumbnailLink,
|
||||
...(item.pagemap?.videoobject?.[0] && {
|
||||
videoData: {
|
||||
duration: item.pagemap.videoobject[0].duration,
|
||||
embedUrl: item.pagemap.videoobject[0].embedurl
|
||||
}
|
||||
})
|
||||
}));
|
||||
|
||||
return { results, originalres };
|
||||
} catch (error) {
|
||||
const errorMessage = error.response?.data
|
||||
? JSON.stringify(error.response.data, null, 2)
|
||||
: error.message || 'Unknown error';
|
||||
throw new Error(`Google PSE Error: ${errorMessage}`);
|
||||
}
|
||||
};
|
@@ -1,5 +1,5 @@
|
||||
import axios from 'axios';
|
||||
import { getSearxngApiEndpoint } from '../config';
|
||||
import { getSearxngApiEndpoint } from '../../config';
|
||||
|
||||
interface SearxngSearchOptions {
|
||||
categories?: string[];
|
83
src/lib/searchEngines/yacy.ts
Normal file
83
src/lib/searchEngines/yacy.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import axios from 'axios';
|
||||
import { getYacyJsonEndpoint } from '../../config';
|
||||
|
||||
interface YaCySearchResult {
|
||||
channels: {
|
||||
title: string;
|
||||
description: string;
|
||||
link: string;
|
||||
image: {
|
||||
url: string;
|
||||
title: string;
|
||||
link: string;
|
||||
};
|
||||
startIndex: string;
|
||||
itemsPerPage: string;
|
||||
searchTerms: string;
|
||||
items: {
|
||||
title: string;
|
||||
link: string;
|
||||
code: string;
|
||||
description: string;
|
||||
pubDate: string;
|
||||
image?: string;
|
||||
size: string;
|
||||
sizename: string;
|
||||
guid: string;
|
||||
faviconUrl: string;
|
||||
host: string;
|
||||
path: string;
|
||||
file: string;
|
||||
urlhash: string;
|
||||
ranking: string;
|
||||
}[];
|
||||
navigation: {
|
||||
facetname: string;
|
||||
displayname: string;
|
||||
type: string;
|
||||
min: string;
|
||||
max: string;
|
||||
mean: string;
|
||||
elements: {
|
||||
name: string;
|
||||
count: string;
|
||||
modifier: string;
|
||||
url: string;
|
||||
}[];
|
||||
}[];
|
||||
}[];
|
||||
}
|
||||
|
||||
|
||||
export const searchYaCy = async (
|
||||
query: string,
|
||||
numResults: number = 20
|
||||
) => {
|
||||
try {
|
||||
const yacyBaseUrl = getYacyJsonEndpoint();
|
||||
|
||||
const url = new URL(`${yacyBaseUrl}/yacysearch.json`);
|
||||
url.searchParams.append('query', query);
|
||||
url.searchParams.append('count', numResults.toString());
|
||||
|
||||
const res = await axios.get(url.toString());
|
||||
|
||||
const originalres = res.data as YaCySearchResult;
|
||||
|
||||
const results = originalres.channels[0].items.map(item => ({
|
||||
title: item.title,
|
||||
url: item.link,
|
||||
content: item.description,
|
||||
img_src: item.image || null,
|
||||
pubDate: item.pubDate,
|
||||
host: item.host,
|
||||
}));
|
||||
|
||||
return { results, originalres };
|
||||
} catch (error) {
|
||||
const errorMessage = error.response?.data
|
||||
? JSON.stringify(error.response.data, null, 2)
|
||||
: error.message || 'Unknown error';
|
||||
throw new Error(`YaCy Error: ${errorMessage}`);
|
||||
}
|
||||
};
|
@@ -1,42 +1,125 @@
|
||||
import express from 'express';
|
||||
import { searchSearxng } from '../lib/searxng';
|
||||
import { searchSearxng } from '../lib/searchEngines/searxng';
|
||||
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
|
||||
import { searchBraveAPI } from '../lib/searchEngines/brave';
|
||||
import { searchYaCy } from '../lib/searchEngines/yacy';
|
||||
import { searchBingAPI } from '../lib/searchEngines/bing';
|
||||
import { getNewsSearchEngineBackend } from '../config';
|
||||
import logger from '../utils/logger';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
async function performSearch(query: string, site: string) {
|
||||
const searchEngine = getNewsSearchEngineBackend();
|
||||
switch (searchEngine) {
|
||||
case 'google': {
|
||||
const googleResult = await searchGooglePSE(query);
|
||||
|
||||
return googleResult.originalres.map(item => {
|
||||
const imageSources = [
|
||||
item.pagemap?.cse_image?.[0]?.src,
|
||||
item.pagemap?.cse_thumbnail?.[0]?.src,
|
||||
item.pagemap?.metatags?.[0]?.['og:image'],
|
||||
item.pagemap?.metatags?.[0]?.['twitter:image'],
|
||||
item.pagemap?.metatags?.[0]?.['image'],
|
||||
].filter(Boolean); // Remove undefined values
|
||||
|
||||
return {
|
||||
title: item.title,
|
||||
url: item.link,
|
||||
content: item.snippet,
|
||||
thumbnail: imageSources[0], // First available image
|
||||
img_src: imageSources[0], // Same as thumbnail for consistency
|
||||
iframe_src: null,
|
||||
author: item.pagemap?.metatags?.[0]?.['og:site_name'] || site,
|
||||
publishedDate: item.pagemap?.metatags?.[0]?.['article:published_time']
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
case 'searxng': {
|
||||
const searxResult = await searchSearxng(query, {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
});
|
||||
return searxResult.results;
|
||||
}
|
||||
|
||||
case 'brave': {
|
||||
const braveResult = await searchBraveAPI(query);
|
||||
return braveResult.results.map(item => ({
|
||||
title: item.title,
|
||||
url: item.url,
|
||||
content: item.content,
|
||||
thumbnail: item.img_src,
|
||||
img_src: item.img_src,
|
||||
iframe_src: null,
|
||||
author: item.meta?.fetched || site,
|
||||
publishedDate: item.meta?.lastCrawled
|
||||
}));
|
||||
}
|
||||
|
||||
case 'yacy': {
|
||||
const yacyResult = await searchYaCy(query);
|
||||
return yacyResult.results.map((item) => ({
|
||||
title: item.title,
|
||||
url: item.url,
|
||||
content: item.content,
|
||||
thumbnail: item.img_src,
|
||||
img_src: item.img_src,
|
||||
iframe_src: null,
|
||||
author: item?.host || site,
|
||||
publishedDate: item?.pubDate
|
||||
}))
|
||||
}
|
||||
|
||||
case 'bing': {
|
||||
const bingResult = await searchBingAPI(query);
|
||||
return bingResult.results.map(item => ({
|
||||
title: item.title,
|
||||
url: item.url,
|
||||
content: item.content,
|
||||
thumbnail: item.img_src,
|
||||
img_src: item.img_src,
|
||||
iframe_src: null,
|
||||
author: item?.publisher || site,
|
||||
publishedDate: item?.datePublished
|
||||
}))
|
||||
}
|
||||
|
||||
default:
|
||||
throw new Error(`Unknown search engine ${searchEngine}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
router.get('/', async (req, res) => {
|
||||
try {
|
||||
const queries = [
|
||||
{ site: 'businessinsider.com', topic: 'AI' },
|
||||
{ site: 'www.exchangewire.com', topic: 'AI' },
|
||||
{ site: 'yahoo.com', topic: 'AI' },
|
||||
{ site: 'businessinsider.com', topic: 'tech' },
|
||||
{ site: 'www.exchangewire.com', topic: 'tech' },
|
||||
{ site: 'yahoo.com', topic: 'tech' },
|
||||
];
|
||||
|
||||
const data = (
|
||||
await Promise.all([
|
||||
searchSearxng('site:businessinsider.com AI', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:www.exchangewire.com AI', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:yahoo.com AI', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:businessinsider.com tech', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:www.exchangewire.com tech', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:yahoo.com tech', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
])
|
||||
await Promise.all(
|
||||
queries.map(async ({ site, topic }) => {
|
||||
try {
|
||||
const query = `site:${site} ${topic}`;
|
||||
return await performSearch(query, site);
|
||||
} catch (error) {
|
||||
logger.error(`Error searching ${site}: ${error.message}`);
|
||||
return [];
|
||||
}
|
||||
})
|
||||
)
|
||||
)
|
||||
.map((result) => result.results)
|
||||
.flat()
|
||||
.sort(() => Math.random() - 0.5);
|
||||
.sort(() => Math.random() - 0.5)
|
||||
.filter(item => item.title && item.url && item.content);
|
||||
|
||||
return res.json({ blogs: data });
|
||||
} catch (err: any) {
|
||||
|
@@ -17,7 +17,12 @@ import LineListOutputParser from '../lib/outputParsers/listLineOutputParser';
|
||||
import LineOutputParser from '../lib/outputParsers/lineOutputParser';
|
||||
import { getDocumentsFromLinks } from '../utils/documents';
|
||||
import { Document } from 'langchain/document';
|
||||
import { searchSearxng } from '../lib/searxng';
|
||||
import { searchSearxng } from '../lib/searchEngines/searxng';
|
||||
import { searchGooglePSE } from '../lib/searchEngines/google_pse';
|
||||
import { searchBingAPI } from '../lib/searchEngines/bing';
|
||||
import { searchBraveAPI } from '../lib/searchEngines/brave';
|
||||
import { searchYaCy } from '../lib/searchEngines/yacy';
|
||||
import { getSearchEngineBackend } from '../config';
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
import computeSimilarity from '../utils/computeSimilarity';
|
||||
@@ -132,7 +137,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||
You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the
|
||||
text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
|
||||
If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
|
||||
|
||||
|
||||
- **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague.
|
||||
- **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query.
|
||||
- **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format.
|
||||
@@ -203,10 +208,36 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||
|
||||
return { query: question, docs: docs };
|
||||
} else {
|
||||
const res = await searchSearxng(question, {
|
||||
language: 'en',
|
||||
engines: this.config.activeEngines,
|
||||
});
|
||||
|
||||
const searchEngine = getSearchEngineBackend();
|
||||
|
||||
let res;
|
||||
switch (searchEngine) {
|
||||
case 'searxng':
|
||||
res = await searchSearxng(question, {
|
||||
language: 'en',
|
||||
engines: this.config.activeEngines,
|
||||
});
|
||||
break;
|
||||
case 'google':
|
||||
res = await searchGooglePSE(question);
|
||||
break;
|
||||
case 'bing':
|
||||
res = await searchBingAPI(question);
|
||||
break;
|
||||
case 'brave':
|
||||
res = await searchBraveAPI(question);
|
||||
break;
|
||||
case 'yacy':
|
||||
res = await searchYaCy(question);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unknown search engine ${searchEngine}`);
|
||||
}
|
||||
|
||||
if (!res?.results) {
|
||||
throw new Error(`No results found for search engine: ${searchEngine}`);
|
||||
}
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
|
Reference in New Issue
Block a user