mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2026-04-09 21:34:27 +00:00
Compare commits
31 Commits
f83f813bd7
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e8d883768 | ||
|
|
71790f346e | ||
|
|
40a7cdeb4b | ||
|
|
ebcf5384f3 | ||
|
|
a889fdc31e | ||
|
|
c3ee1988d2 | ||
|
|
5991416142 | ||
|
|
7a6fad95ef | ||
|
|
b868aa0287 | ||
|
|
65b2c3f234 | ||
|
|
cbe538cc36 | ||
|
|
f6a47fd3e1 | ||
|
|
690b11cc1c | ||
|
|
96e860a310 | ||
|
|
65475b418f | ||
|
|
acaa208a41 | ||
|
|
54f3a7d2b8 | ||
|
|
1a124b8b07 | ||
|
|
3098622cb0 | ||
|
|
3646495bdf | ||
|
|
476c4ec8c2 | ||
|
|
0e33641927 | ||
|
|
8c061f20a5 | ||
|
|
72ac815294 | ||
|
|
d16b7e271a | ||
|
|
58ed869b3d | ||
|
|
3fede054da | ||
|
|
21bd88787e | ||
|
|
b02f5aa37f | ||
|
|
a2f2ac532e | ||
|
|
1763ee9d1f |
@@ -34,6 +34,9 @@ COPY drizzle ./drizzle
|
|||||||
|
|
||||||
RUN mkdir /home/vane/uploads
|
RUN mkdir /home/vane/uploads
|
||||||
|
|
||||||
|
RUN yarn add playwright
|
||||||
|
RUN yarn playwright install --with-deps --only-shell chromium
|
||||||
|
|
||||||
RUN useradd --shell /bin/bash --system \
|
RUN useradd --shell /bin/bash --system \
|
||||||
--home-dir "/usr/local/searxng" \
|
--home-dir "/usr/local/searxng" \
|
||||||
--comment 'Privacy-respecting metasearch engine' \
|
--comment 'Privacy-respecting metasearch engine' \
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
import { defineConfig } from 'drizzle-kit';
|
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
|
|
||||||
export default defineConfig({
|
export default {
|
||||||
dialect: 'sqlite',
|
dialect: 'sqlite',
|
||||||
schema: './src/lib/db/schema.ts',
|
schema: './src/lib/db/schema.ts',
|
||||||
out: './drizzle',
|
out: './drizzle',
|
||||||
dbCredentials: {
|
dbCredentials: {
|
||||||
url: path.join(process.cwd(), 'data', 'db.sqlite'),
|
url: path.join(process.cwd(), 'data', 'db.sqlite'),
|
||||||
},
|
},
|
||||||
});
|
};
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import path from 'node:path';
|
||||||
import pkg from './package.json' with { type: 'json' };
|
import pkg from './package.json' with { type: 'json' };
|
||||||
|
|
||||||
/** @type {import('next').NextConfig} */
|
/** @type {import('next').NextConfig} */
|
||||||
@@ -10,7 +11,12 @@ const nextConfig = {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
serverExternalPackages: ['pdf-parse'],
|
serverExternalPackages: [
|
||||||
|
'pdf-parse',
|
||||||
|
'playwright',
|
||||||
|
'officeparser',
|
||||||
|
'file-type',
|
||||||
|
],
|
||||||
outputFileTracingIncludes: {
|
outputFileTracingIncludes: {
|
||||||
'/api/**': [
|
'/api/**': [
|
||||||
'./node_modules/@napi-rs/canvas/**',
|
'./node_modules/@napi-rs/canvas/**',
|
||||||
@@ -21,6 +27,9 @@ const nextConfig = {
|
|||||||
env: {
|
env: {
|
||||||
NEXT_PUBLIC_VERSION: pkg.version,
|
NEXT_PUBLIC_VERSION: pkg.version,
|
||||||
},
|
},
|
||||||
|
turbopack: {
|
||||||
|
root: process.cwd(),
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
export default nextConfig;
|
export default nextConfig;
|
||||||
|
|||||||
20
package.json
20
package.json
@@ -1,10 +1,10 @@
|
|||||||
{
|
{
|
||||||
"name": "vane",
|
"name": "vane",
|
||||||
"version": "1.12.1",
|
"version": "1.12.2",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"author": "ItzCrazyKns",
|
"author": "ItzCrazyKns",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "next dev --webpack",
|
"dev": "next dev",
|
||||||
"build": "next build --webpack",
|
"build": "next build --webpack",
|
||||||
"start": "next start",
|
"start": "next start",
|
||||||
"lint": "next lint",
|
"lint": "next lint",
|
||||||
@@ -16,16 +16,19 @@
|
|||||||
"@headlessui/tailwindcss": "^0.2.2",
|
"@headlessui/tailwindcss": "^0.2.2",
|
||||||
"@huggingface/transformers": "^3.8.1",
|
"@huggingface/transformers": "^3.8.1",
|
||||||
"@icons-pack/react-simple-icons": "^12.3.0",
|
"@icons-pack/react-simple-icons": "^12.3.0",
|
||||||
|
"@mozilla/readability": "^0.6.0",
|
||||||
"@phosphor-icons/react": "^2.1.10",
|
"@phosphor-icons/react": "^2.1.10",
|
||||||
"@radix-ui/react-tooltip": "^1.2.8",
|
"@radix-ui/react-tooltip": "^1.2.8",
|
||||||
"@tailwindcss/typography": "^0.5.12",
|
"@tailwindcss/typography": "^0.5.12",
|
||||||
"@toolsycc/json-repair": "^0.1.22",
|
"@toolsycc/json-repair": "^0.1.22",
|
||||||
|
"async-mutex": "^0.5.0",
|
||||||
"axios": "^1.8.3",
|
"axios": "^1.8.3",
|
||||||
"better-sqlite3": "^11.9.1",
|
"better-sqlite3": "^11.9.1",
|
||||||
"clsx": "^2.1.0",
|
"clsx": "^2.1.0",
|
||||||
"drizzle-orm": "^0.40.1",
|
"drizzle-orm": "^0.45.2",
|
||||||
"js-tiktoken": "^1.0.21",
|
"js-tiktoken": "^1.0.21",
|
||||||
"jspdf": "^3.0.4",
|
"jsdom": "^29.0.1",
|
||||||
|
"jspdf": "^4.2.1",
|
||||||
"lightweight-charts": "^5.0.9",
|
"lightweight-charts": "^5.0.9",
|
||||||
"lucide-react": "^0.556.0",
|
"lucide-react": "^0.556.0",
|
||||||
"mammoth": "^1.9.1",
|
"mammoth": "^1.9.1",
|
||||||
@@ -34,11 +37,12 @@
|
|||||||
"motion": "^12.23.26",
|
"motion": "^12.23.26",
|
||||||
"next": "^16.0.7",
|
"next": "^16.0.7",
|
||||||
"next-themes": "^0.3.0",
|
"next-themes": "^0.3.0",
|
||||||
"officeparser": "^5.2.2",
|
"officeparser": "^6.0.7",
|
||||||
"ollama": "^0.6.3",
|
"ollama": "^0.6.3",
|
||||||
"openai": "^6.9.0",
|
"openai": "^6.9.0",
|
||||||
"partial-json": "^0.1.7",
|
"partial-json": "^0.1.7",
|
||||||
"pdf-parse": "^2.4.5",
|
"pdf-parse": "^2.4.5",
|
||||||
|
"playwright": "^1.59.1",
|
||||||
"react": "^18",
|
"react": "^18",
|
||||||
"react-dom": "^18",
|
"react-dom": "^18",
|
||||||
"react-syntax-highlighter": "^16.1.0",
|
"react-syntax-highlighter": "^16.1.0",
|
||||||
@@ -47,13 +51,13 @@
|
|||||||
"rfc6902": "^5.1.2",
|
"rfc6902": "^5.1.2",
|
||||||
"sonner": "^1.4.41",
|
"sonner": "^1.4.41",
|
||||||
"tailwind-merge": "^2.2.2",
|
"tailwind-merge": "^2.2.2",
|
||||||
"turndown": "^7.2.2",
|
|
||||||
"yahoo-finance2": "^3.10.2",
|
"yahoo-finance2": "^3.10.2",
|
||||||
"yet-another-react-lightbox": "^3.17.2",
|
"yet-another-react-lightbox": "^3.17.2",
|
||||||
"zod": "^4.1.12"
|
"zod": "^4.1.12"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/better-sqlite3": "^7.6.12",
|
"@types/better-sqlite3": "^7.6.12",
|
||||||
|
"@types/jsdom": "^28.0.1",
|
||||||
"@types/jspdf": "^2.0.0",
|
"@types/jspdf": "^2.0.0",
|
||||||
"@types/node": "^24.8.1",
|
"@types/node": "^24.8.1",
|
||||||
"@types/pdf-parse": "^1.1.4",
|
"@types/pdf-parse": "^1.1.4",
|
||||||
@@ -62,9 +66,9 @@
|
|||||||
"@types/react-syntax-highlighter": "^15.5.13",
|
"@types/react-syntax-highlighter": "^15.5.13",
|
||||||
"@types/turndown": "^5.0.6",
|
"@types/turndown": "^5.0.6",
|
||||||
"autoprefixer": "^10.0.1",
|
"autoprefixer": "^10.0.1",
|
||||||
"drizzle-kit": "^0.30.5",
|
"drizzle-kit": "^0.18.1",
|
||||||
"eslint": "^8",
|
"eslint": "^8",
|
||||||
"eslint-config-next": "14.1.4",
|
"eslint-config-next": "^16.2.2",
|
||||||
"postcss": "^8",
|
"postcss": "^8",
|
||||||
"prettier": "^3.2.5",
|
"prettier": "^3.2.5",
|
||||||
"tailwindcss": "^3.3.0",
|
"tailwindcss": "^3.3.0",
|
||||||
|
|||||||
@@ -37,7 +37,8 @@ const getStepTitle = (
|
|||||||
if (step.type === 'reasoning') {
|
if (step.type === 'reasoning') {
|
||||||
return isStreaming && !step.reasoning ? 'Thinking...' : 'Thinking';
|
return isStreaming && !step.reasoning ? 'Thinking...' : 'Thinking';
|
||||||
} else if (step.type === 'searching') {
|
} else if (step.type === 'searching') {
|
||||||
return `Searching ${step.searching.length} ${step.searching.length === 1 ? 'query' : 'queries'}`;
|
const queries = Array.isArray(step.searching) ? step.searching : [];
|
||||||
|
return `Searching ${queries.length} ${queries.length === 1 ? 'query' : 'queries'}`;
|
||||||
} else if (step.type === 'search_results') {
|
} else if (step.type === 'search_results') {
|
||||||
return `Found ${step.reading.length} ${step.reading.length === 1 ? 'result' : 'results'}`;
|
return `Found ${step.reading.length} ${step.reading.length === 1 ? 'result' : 'results'}`;
|
||||||
} else if (step.type === 'reading') {
|
} else if (step.type === 'reading') {
|
||||||
@@ -160,6 +161,7 @@ const AssistantSteps = ({
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{step.type === 'searching' &&
|
{step.type === 'searching' &&
|
||||||
|
Array.isArray(step.searching) &&
|
||||||
step.searching.length > 0 && (
|
step.searching.length > 0 && (
|
||||||
<div className="flex flex-wrap gap-1.5 mt-1.5">
|
<div className="flex flex-wrap gap-1.5 mt-1.5">
|
||||||
{step.searching.map((query, idx) => (
|
{step.searching.map((query, idx) => (
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import { Fragment, useRef, useState } from 'react';
|
|||||||
import { useChat } from '@/lib/hooks/useChat';
|
import { useChat } from '@/lib/hooks/useChat';
|
||||||
import { AnimatePresence } from 'motion/react';
|
import { AnimatePresence } from 'motion/react';
|
||||||
import { motion } from 'framer-motion';
|
import { motion } from 'framer-motion';
|
||||||
|
import { toast } from 'sonner';
|
||||||
|
|
||||||
const Attach = () => {
|
const Attach = () => {
|
||||||
const { files, setFiles, setFileIds, fileIds } = useChat();
|
const { files, setFiles, setFileIds, fileIds } = useChat();
|
||||||
@@ -26,31 +27,59 @@ const Attach = () => {
|
|||||||
const fileInputRef = useRef<any>();
|
const fileInputRef = useRef<any>();
|
||||||
|
|
||||||
const handleChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
const handleChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
setLoading(true);
|
const selectedFiles = e.target.files;
|
||||||
const data = new FormData();
|
|
||||||
|
|
||||||
for (let i = 0; i < e.target.files!.length; i++) {
|
if (!selectedFiles?.length) {
|
||||||
data.append('files', e.target.files![i]);
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const embeddingModelProvider = localStorage.getItem(
|
setLoading(true);
|
||||||
'embeddingModelProviderId',
|
|
||||||
);
|
|
||||||
const embeddingModel = localStorage.getItem('embeddingModelKey');
|
|
||||||
|
|
||||||
data.append('embedding_model_provider_id', embeddingModelProvider!);
|
try {
|
||||||
data.append('embedding_model_key', embeddingModel!);
|
const data = new FormData();
|
||||||
|
|
||||||
const res = await fetch(`/api/uploads`, {
|
for (let i = 0; i < selectedFiles.length; i++) {
|
||||||
method: 'POST',
|
data.append('files', selectedFiles[i]);
|
||||||
body: data,
|
}
|
||||||
});
|
|
||||||
|
|
||||||
const resData = await res.json();
|
const embeddingModelProvider = localStorage.getItem(
|
||||||
|
'embeddingModelProviderId',
|
||||||
|
);
|
||||||
|
const embeddingModel = localStorage.getItem('embeddingModelKey');
|
||||||
|
|
||||||
setFiles([...files, ...resData.files]);
|
if (!embeddingModelProvider || !embeddingModel) {
|
||||||
setFileIds([...fileIds, ...resData.files.map((file: any) => file.fileId)]);
|
throw new Error('Please select an embedding model before uploading.');
|
||||||
setLoading(false);
|
}
|
||||||
|
|
||||||
|
data.append('embedding_model_provider_id', embeddingModelProvider);
|
||||||
|
data.append('embedding_model_key', embeddingModel);
|
||||||
|
|
||||||
|
const res = await fetch(`/api/uploads`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: data,
|
||||||
|
});
|
||||||
|
|
||||||
|
const resData = await res.json().catch(() => ({}));
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(resData.message || 'Failed to upload file(s).');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(resData.files)) {
|
||||||
|
throw new Error('Invalid upload response from server.');
|
||||||
|
}
|
||||||
|
|
||||||
|
setFiles([...files, ...resData.files]);
|
||||||
|
setFileIds([
|
||||||
|
...fileIds,
|
||||||
|
...resData.files.map((file: any) => file.fileId),
|
||||||
|
]);
|
||||||
|
} catch (err: any) {
|
||||||
|
toast(err?.message || 'Failed to upload file(s).');
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
e.target.value = '';
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return loading ? (
|
return loading ? (
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import { Fragment, useRef, useState } from 'react';
|
|||||||
import { useChat } from '@/lib/hooks/useChat';
|
import { useChat } from '@/lib/hooks/useChat';
|
||||||
import { AnimatePresence } from 'motion/react';
|
import { AnimatePresence } from 'motion/react';
|
||||||
import { motion } from 'framer-motion';
|
import { motion } from 'framer-motion';
|
||||||
|
import { toast } from 'sonner';
|
||||||
|
|
||||||
const AttachSmall = () => {
|
const AttachSmall = () => {
|
||||||
const { files, setFiles, setFileIds, fileIds } = useChat();
|
const { files, setFiles, setFileIds, fileIds } = useChat();
|
||||||
@@ -17,31 +18,59 @@ const AttachSmall = () => {
|
|||||||
const fileInputRef = useRef<any>();
|
const fileInputRef = useRef<any>();
|
||||||
|
|
||||||
const handleChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
const handleChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
setLoading(true);
|
const selectedFiles = e.target.files;
|
||||||
const data = new FormData();
|
|
||||||
|
|
||||||
for (let i = 0; i < e.target.files!.length; i++) {
|
if (!selectedFiles?.length) {
|
||||||
data.append('files', e.target.files![i]);
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const embeddingModelProvider = localStorage.getItem(
|
setLoading(true);
|
||||||
'embeddingModelProviderId',
|
|
||||||
);
|
|
||||||
const embeddingModel = localStorage.getItem('embeddingModelKey');
|
|
||||||
|
|
||||||
data.append('embedding_model_provider_id', embeddingModelProvider!);
|
try {
|
||||||
data.append('embedding_model_key', embeddingModel!);
|
const data = new FormData();
|
||||||
|
|
||||||
const res = await fetch(`/api/uploads`, {
|
for (let i = 0; i < selectedFiles.length; i++) {
|
||||||
method: 'POST',
|
data.append('files', selectedFiles[i]);
|
||||||
body: data,
|
}
|
||||||
});
|
|
||||||
|
|
||||||
const resData = await res.json();
|
const embeddingModelProvider = localStorage.getItem(
|
||||||
|
'embeddingModelProviderId',
|
||||||
|
);
|
||||||
|
const embeddingModel = localStorage.getItem('embeddingModelKey');
|
||||||
|
|
||||||
setFiles([...files, ...resData.files]);
|
if (!embeddingModelProvider || !embeddingModel) {
|
||||||
setFileIds([...fileIds, ...resData.files.map((file: any) => file.fileId)]);
|
throw new Error('Please select an embedding model before uploading.');
|
||||||
setLoading(false);
|
}
|
||||||
|
|
||||||
|
data.append('embedding_model_provider_id', embeddingModelProvider);
|
||||||
|
data.append('embedding_model_key', embeddingModel);
|
||||||
|
|
||||||
|
const res = await fetch(`/api/uploads`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: data,
|
||||||
|
});
|
||||||
|
|
||||||
|
const resData = await res.json().catch(() => ({}));
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(resData.message || 'Failed to upload file(s).');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(resData.files)) {
|
||||||
|
throw new Error('Invalid upload response from server.');
|
||||||
|
}
|
||||||
|
|
||||||
|
setFiles([...files, ...resData.files]);
|
||||||
|
setFileIds([
|
||||||
|
...fileIds,
|
||||||
|
...resData.files.map((file: any) => file.fileId),
|
||||||
|
]);
|
||||||
|
} catch (err: any) {
|
||||||
|
toast(err?.message || 'Failed to upload file(s).');
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
e.target.value = '';
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return loading ? (
|
return loading ? (
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ import SyntaxHighlighter from 'react-syntax-highlighter';
|
|||||||
import darkTheme from './CodeBlockDarkTheme';
|
import darkTheme from './CodeBlockDarkTheme';
|
||||||
import lightTheme from './CodeBlockLightTheme';
|
import lightTheme from './CodeBlockLightTheme';
|
||||||
|
|
||||||
|
const SyntaxHighlighterComponent =
|
||||||
|
SyntaxHighlighter as unknown as React.ComponentType<any>;
|
||||||
|
|
||||||
const CodeBlock = ({
|
const CodeBlock = ({
|
||||||
language,
|
language,
|
||||||
children,
|
children,
|
||||||
@@ -50,13 +53,13 @@ const CodeBlock = ({
|
|||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
</button>
|
</button>
|
||||||
<SyntaxHighlighter
|
<SyntaxHighlighterComponent
|
||||||
language={language}
|
language={language}
|
||||||
style={syntaxTheme}
|
style={syntaxTheme}
|
||||||
showInlineLineNumbers
|
showInlineLineNumbers
|
||||||
>
|
>
|
||||||
{children as string}
|
{children as string}
|
||||||
</SyntaxHighlighter>
|
</SyntaxHighlighterComponent>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
import { Cloud, Sun, CloudRain, CloudSnow, Wind } from 'lucide-react';
|
'use client';
|
||||||
|
|
||||||
|
import { Wind } from 'lucide-react';
|
||||||
import { useEffect, useState } from 'react';
|
import { useEffect, useState } from 'react';
|
||||||
|
import { getApproxLocation } from '@/lib/actions';
|
||||||
|
|
||||||
const WeatherWidget = () => {
|
const WeatherWidget = () => {
|
||||||
const [data, setData] = useState({
|
const [data, setData] = useState({
|
||||||
@@ -15,17 +18,6 @@ const WeatherWidget = () => {
|
|||||||
|
|
||||||
const [loading, setLoading] = useState(true);
|
const [loading, setLoading] = useState(true);
|
||||||
|
|
||||||
const getApproxLocation = async () => {
|
|
||||||
const res = await fetch('https://ipwhois.app/json/');
|
|
||||||
const data = await res.json();
|
|
||||||
|
|
||||||
return {
|
|
||||||
latitude: data.latitude,
|
|
||||||
longitude: data.longitude,
|
|
||||||
city: data.city,
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
const getLocation = async (
|
const getLocation = async (
|
||||||
callback: (location: {
|
callback: (location: {
|
||||||
latitude: number;
|
latitude: number;
|
||||||
|
|||||||
@@ -257,21 +257,21 @@ const Stock = (props: StockWidgetProps) => {
|
|||||||
const isPostMarket = props.marketState === 'POST';
|
const isPostMarket = props.marketState === 'POST';
|
||||||
|
|
||||||
const displayPrice = isPostMarket
|
const displayPrice = isPostMarket
|
||||||
? props.postMarketPrice ?? props.regularMarketPrice
|
? (props.postMarketPrice ?? props.regularMarketPrice)
|
||||||
: isPreMarket
|
: isPreMarket
|
||||||
? props.preMarketPrice ?? props.regularMarketPrice
|
? (props.preMarketPrice ?? props.regularMarketPrice)
|
||||||
: props.regularMarketPrice;
|
: props.regularMarketPrice;
|
||||||
|
|
||||||
const displayChange = isPostMarket
|
const displayChange = isPostMarket
|
||||||
? props.postMarketChange ?? props.regularMarketChange
|
? (props.postMarketChange ?? props.regularMarketChange)
|
||||||
: isPreMarket
|
: isPreMarket
|
||||||
? props.preMarketChange ?? props.regularMarketChange
|
? (props.preMarketChange ?? props.regularMarketChange)
|
||||||
: props.regularMarketChange;
|
: props.regularMarketChange;
|
||||||
|
|
||||||
const displayChangePercent = isPostMarket
|
const displayChangePercent = isPostMarket
|
||||||
? props.postMarketChangePercent ?? props.regularMarketChangePercent
|
? (props.postMarketChangePercent ?? props.regularMarketChangePercent)
|
||||||
: isPreMarket
|
: isPreMarket
|
||||||
? props.preMarketChangePercent ?? props.regularMarketChangePercent
|
? (props.preMarketChangePercent ?? props.regularMarketChangePercent)
|
||||||
: props.regularMarketChangePercent;
|
: props.regularMarketChangePercent;
|
||||||
|
|
||||||
const changeColor = isPositive
|
const changeColor = isPositive
|
||||||
|
|||||||
@@ -20,3 +20,17 @@ export const getSuggestions = async (chatHistory: [string, string][]) => {
|
|||||||
|
|
||||||
return data.suggestions;
|
return data.suggestions;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const getApproxLocation = async () => {
|
||||||
|
const res = await fetch('https://free.freeipapi.com/api/json', {
|
||||||
|
method: 'GET',
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
|
||||||
|
return {
|
||||||
|
latitude: data.latitude,
|
||||||
|
longitude: data.longitude,
|
||||||
|
city: data.cityName,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ class APISearchAgent {
|
|||||||
chatHistory: input.chatHistory,
|
chatHistory: input.chatHistory,
|
||||||
followUp: input.followUp,
|
followUp: input.followUp,
|
||||||
llm: input.config.llm,
|
llm: input.config.llm,
|
||||||
|
}).catch((err) => {
|
||||||
|
console.error(`Error executing widgets: ${err}`);
|
||||||
|
return [];
|
||||||
});
|
});
|
||||||
|
|
||||||
let searchPromise: Promise<ResearcherOutput> | null = null;
|
let searchPromise: Promise<ResearcherOutput> | null = null;
|
||||||
|
|||||||
@@ -5,9 +5,10 @@ import Researcher from './researcher';
|
|||||||
import { getWriterPrompt } from '@/lib/prompts/search/writer';
|
import { getWriterPrompt } from '@/lib/prompts/search/writer';
|
||||||
import { WidgetExecutor } from './widgets';
|
import { WidgetExecutor } from './widgets';
|
||||||
import db from '@/lib/db';
|
import db from '@/lib/db';
|
||||||
import { chats, messages } from '@/lib/db/schema';
|
import { messages } from '@/lib/db/schema';
|
||||||
import { and, eq, gt } from 'drizzle-orm';
|
import { and, eq, gt } from 'drizzle-orm';
|
||||||
import { TextBlock } from '@/lib/types';
|
import { TextBlock } from '@/lib/types';
|
||||||
|
import { getTokenCount } from '@/lib/utils/splitText';
|
||||||
|
|
||||||
class SearchAgent {
|
class SearchAgent {
|
||||||
async searchAsync(session: SessionManager, input: SearchAgentInput) {
|
async searchAsync(session: SessionManager, input: SearchAgentInput) {
|
||||||
@@ -98,13 +99,17 @@ class SearchAgent {
|
|||||||
type: 'researchComplete',
|
type: 'researchComplete',
|
||||||
});
|
});
|
||||||
|
|
||||||
const finalContext =
|
let finalContext =
|
||||||
searchResults?.searchFindings
|
'<Query to be answered without searching; Search not made>';
|
||||||
|
|
||||||
|
if (searchResults) {
|
||||||
|
finalContext = searchResults?.searchFindings
|
||||||
.map(
|
.map(
|
||||||
(f, index) =>
|
(f, index) =>
|
||||||
`<result index=${index + 1} title=${f.metadata.title}>${f.content}</result>`,
|
`<result index=${index + 1} title=${f.metadata.title}>${f.content}</result>`,
|
||||||
)
|
)
|
||||||
.join('\n') || '';
|
.join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
const widgetContext = widgetOutputs
|
const widgetContext = widgetOutputs
|
||||||
.map((o) => {
|
.map((o) => {
|
||||||
@@ -119,6 +124,7 @@ class SearchAgent {
|
|||||||
input.config.systemInstructions,
|
input.config.systemInstructions,
|
||||||
input.config.mode,
|
input.config.mode,
|
||||||
);
|
);
|
||||||
|
|
||||||
const answerStream = input.config.llm.streamText({
|
const answerStream = input.config.llm.streamText({
|
||||||
messages: [
|
messages: [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,129 +0,0 @@
|
|||||||
import z from 'zod';
|
|
||||||
import { ResearchAction } from '../../types';
|
|
||||||
import { Chunk, SearchResultsResearchBlock } from '@/lib/types';
|
|
||||||
import { searchSearxng } from '@/lib/searxng';
|
|
||||||
|
|
||||||
const schema = z.object({
|
|
||||||
queries: z.array(z.string()).describe('List of academic search queries'),
|
|
||||||
});
|
|
||||||
|
|
||||||
const academicSearchDescription = `
|
|
||||||
Use this tool to perform academic searches for scholarly articles, papers, and research studies relevant to the user's query. Provide a list of concise search queries that will help gather comprehensive academic information on the topic at hand.
|
|
||||||
You can provide up to 3 queries at a time. Make sure the queries are specific and relevant to the user's needs.
|
|
||||||
|
|
||||||
For example, if the user is interested in recent advancements in renewable energy, your queries could be:
|
|
||||||
1. "Recent advancements in renewable energy 2024"
|
|
||||||
2. "Cutting-edge research on solar power technologies"
|
|
||||||
3. "Innovations in wind energy systems"
|
|
||||||
|
|
||||||
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed academic information.
|
|
||||||
`;
|
|
||||||
|
|
||||||
const academicSearchAction: ResearchAction<typeof schema> = {
|
|
||||||
name: 'academic_search',
|
|
||||||
schema: schema,
|
|
||||||
getDescription: () => academicSearchDescription,
|
|
||||||
getToolDescription: () =>
|
|
||||||
"Use this tool to perform academic searches for scholarly articles, papers, and research studies relevant to the user's query. Provide a list of concise search queries that will help gather comprehensive academic information on the topic at hand.",
|
|
||||||
enabled: (config) =>
|
|
||||||
config.sources.includes('academic') &&
|
|
||||||
config.classification.classification.skipSearch === false &&
|
|
||||||
config.classification.classification.academicSearch === true,
|
|
||||||
execute: async (input, additionalConfig) => {
|
|
||||||
input.queries = input.queries.slice(0, 3);
|
|
||||||
|
|
||||||
const researchBlock = additionalConfig.session.getBlock(
|
|
||||||
additionalConfig.researchBlockId,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (researchBlock && researchBlock.type === 'research') {
|
|
||||||
researchBlock.data.subSteps.push({
|
|
||||||
type: 'searching',
|
|
||||||
id: crypto.randomUUID(),
|
|
||||||
searching: input.queries,
|
|
||||||
});
|
|
||||||
|
|
||||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
||||||
{
|
|
||||||
op: 'replace',
|
|
||||||
path: '/data/subSteps',
|
|
||||||
value: researchBlock.data.subSteps,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
const searchResultsBlockId = crypto.randomUUID();
|
|
||||||
let searchResultsEmitted = false;
|
|
||||||
|
|
||||||
let results: Chunk[] = [];
|
|
||||||
|
|
||||||
const search = async (q: string) => {
|
|
||||||
const res = await searchSearxng(q, {
|
|
||||||
engines: ['arxiv', 'google scholar', 'pubmed'],
|
|
||||||
});
|
|
||||||
|
|
||||||
const resultChunks: Chunk[] = res.results.map((r) => ({
|
|
||||||
content: r.content || r.title,
|
|
||||||
metadata: {
|
|
||||||
title: r.title,
|
|
||||||
url: r.url,
|
|
||||||
},
|
|
||||||
}));
|
|
||||||
|
|
||||||
results.push(...resultChunks);
|
|
||||||
|
|
||||||
if (
|
|
||||||
!searchResultsEmitted &&
|
|
||||||
researchBlock &&
|
|
||||||
researchBlock.type === 'research'
|
|
||||||
) {
|
|
||||||
searchResultsEmitted = true;
|
|
||||||
|
|
||||||
researchBlock.data.subSteps.push({
|
|
||||||
id: searchResultsBlockId,
|
|
||||||
type: 'search_results',
|
|
||||||
reading: resultChunks,
|
|
||||||
});
|
|
||||||
|
|
||||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
||||||
{
|
|
||||||
op: 'replace',
|
|
||||||
path: '/data/subSteps',
|
|
||||||
value: researchBlock.data.subSteps,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
} else if (
|
|
||||||
searchResultsEmitted &&
|
|
||||||
researchBlock &&
|
|
||||||
researchBlock.type === 'research'
|
|
||||||
) {
|
|
||||||
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
|
||||||
(step) => step.id === searchResultsBlockId,
|
|
||||||
);
|
|
||||||
|
|
||||||
const subStep = researchBlock.data.subSteps[
|
|
||||||
subStepIndex
|
|
||||||
] as SearchResultsResearchBlock;
|
|
||||||
|
|
||||||
subStep.reading.push(...resultChunks);
|
|
||||||
|
|
||||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
||||||
{
|
|
||||||
op: 'replace',
|
|
||||||
path: '/data/subSteps',
|
|
||||||
value: researchBlock.data.subSteps,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
await Promise.all(input.queries.map(search));
|
|
||||||
|
|
||||||
return {
|
|
||||||
type: 'search_results',
|
|
||||||
results,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
export default academicSearchAction;
|
|
||||||
@@ -1,11 +1,11 @@
|
|||||||
import academicSearchAction from './academicSearch';
|
import academicSearchAction from './search/academicSearch';
|
||||||
import doneAction from './done';
|
import doneAction from './done';
|
||||||
import planAction from './plan';
|
import planAction from './plan';
|
||||||
import ActionRegistry from './registry';
|
import ActionRegistry from './registry';
|
||||||
import scrapeURLAction from './scrapeURL';
|
import scrapeURLAction from './scrapeURL';
|
||||||
import socialSearchAction from './socialSearch';
|
import socialSearchAction from './search/socialSearch';
|
||||||
import uploadsSearchAction from './uploadsSearch';
|
import uploadsSearchAction from './uploadsSearch';
|
||||||
import webSearchAction from './webSearch';
|
import webSearchAction from './search/webSearch';
|
||||||
|
|
||||||
ActionRegistry.register(webSearchAction);
|
ActionRegistry.register(webSearchAction);
|
||||||
ActionRegistry.register(doneAction);
|
ActionRegistry.register(doneAction);
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ class ActionRegistry {
|
|||||||
additionalConfig: AdditionalConfig & {
|
additionalConfig: AdditionalConfig & {
|
||||||
researchBlockId: string;
|
researchBlockId: string;
|
||||||
fileIds: string[];
|
fileIds: string[];
|
||||||
|
mode: SearchAgentConfig['mode'];
|
||||||
},
|
},
|
||||||
) {
|
) {
|
||||||
const action = this.actions.get(name);
|
const action = this.actions.get(name);
|
||||||
@@ -83,6 +84,7 @@ class ActionRegistry {
|
|||||||
additionalConfig: AdditionalConfig & {
|
additionalConfig: AdditionalConfig & {
|
||||||
researchBlockId: string;
|
researchBlockId: string;
|
||||||
fileIds: string[];
|
fileIds: string[];
|
||||||
|
mode: SearchAgentConfig['mode'];
|
||||||
},
|
},
|
||||||
): Promise<ActionOutput[]> {
|
): Promise<ActionOutput[]> {
|
||||||
const results: ActionOutput[] = [];
|
const results: ActionOutput[] = [];
|
||||||
|
|||||||
@@ -1,10 +1,50 @@
|
|||||||
import z from 'zod';
|
import z from 'zod';
|
||||||
import { ResearchAction } from '../../types';
|
import { ResearchAction } from '../../types';
|
||||||
import { Chunk, ReadingResearchBlock } from '@/lib/types';
|
import { Chunk, ReadingResearchBlock } from '@/lib/types';
|
||||||
import TurnDown from 'turndown';
|
import Scraper from '@/lib/scraper';
|
||||||
import path from 'path';
|
import { splitText } from '@/lib/utils/splitText';
|
||||||
|
|
||||||
const turndownService = new TurnDown();
|
const extractorPrompt = `
|
||||||
|
Assistant is an AI information extractor. Assistant will be shared with scraped information from a website along with the queries used to retrieve that information. Assistant's task is to extract relevant facts from the scraped data to answer the queries.
|
||||||
|
|
||||||
|
## Things to taken into consideration when extracting information:
|
||||||
|
1. Relevance to the query: The extracted information must dynamically adjust based on the query's intent. If the query asks "What is [X]", you must extract the definition/identity. If the query asks for "[X] specs" or "features", you must provide deep, granular technical details.
|
||||||
|
- Example: For "What is [Product]", extract the core definition. For "[Product] capabilities", extract every technical function mentioned.
|
||||||
|
2. Concentrate on extracting factual information that can help in answering the question rather than opinions or commentary. Ignore marketing fluff like "best-in-class" or "seamless."
|
||||||
|
3. Noise to signal ratio: If the scraped data is noisy (headers, footers, UI text), ignore it and extract only the high-value information.
|
||||||
|
- Example: Discard "Click for more" or "Subscribe now" messages.
|
||||||
|
4. Avoid using filler sentences or words; extract concise, telegram-style information.
|
||||||
|
- Example: Change "The device features a weight of only 1.2kg" to "Weight: 1.2kg."
|
||||||
|
5. Duplicate information: If a fact appears multiple times (e.g., in a paragraph and a technical table), merge the details into a single, high-density bullet point to avoid redundancy.
|
||||||
|
6. Numerical Data Integrity: NEVER summarize or generalize numbers, benchmarks, or table data. Extract raw values exactly as they appear.
|
||||||
|
- Example: Do not say "Improved coding scores." Say "LiveCodeBench v6: 80.0%."
|
||||||
|
|
||||||
|
## Example
|
||||||
|
For example, if the query is "What are the health benefits of green tea?" and the scraped data contains various pieces of information about green tea, Assistant should focus on extracting factual information related to the health benefits of green tea such as "Green tea contains antioxidants which can help in reducing inflammation" and ignore irrelevant information such as "Green tea is a popular beverage worldwide".
|
||||||
|
|
||||||
|
It can also remove filler words to reduce the sentence to "Contains antioxidants; reduces inflammation."
|
||||||
|
|
||||||
|
For tables/numerical data extraction, Assistant should extract the raw numerical data or the content of the table without trying to summarize it to avoid losing important details. For example, if a table lists specific battery life hours for different modes, Assistant should list every mode and its corresponding hour count rather than giving a general average.
|
||||||
|
|
||||||
|
Make sure the extracted facts are in bullet points format to make it easier to read and understand.
|
||||||
|
|
||||||
|
## Output format
|
||||||
|
Assistant should reply with a JSON object containing a key "extracted_facts" which is a string of the bulleted facts. Return only raw JSON without markdown formatting (no \`\`\`json blocks).
|
||||||
|
|
||||||
|
<example_output>
|
||||||
|
{
|
||||||
|
"extracted_facts": "- Fact 1\n- Fact 2\n- Fact 3"
|
||||||
|
}
|
||||||
|
</example_output>
|
||||||
|
`;
|
||||||
|
|
||||||
|
const extractorSchema = z.object({
|
||||||
|
extracted_facts: z
|
||||||
|
.string()
|
||||||
|
.describe(
|
||||||
|
'The extracted facts that are relevant to the query and can help in answering the question should be listed here in a concise manner.',
|
||||||
|
),
|
||||||
|
});
|
||||||
|
|
||||||
const schema = z.object({
|
const schema = z.object({
|
||||||
urls: z.array(z.string()).describe('A list of URLs to scrape content from.'),
|
urls: z.array(z.string()).describe('A list of URLs to scrape content from.'),
|
||||||
@@ -39,11 +79,7 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
|||||||
await Promise.all(
|
await Promise.all(
|
||||||
params.urls.map(async (url) => {
|
params.urls.map(async (url) => {
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url);
|
const scraped = await Scraper.scrape(url);
|
||||||
const text = await res.text();
|
|
||||||
|
|
||||||
const title =
|
|
||||||
text.match(/<title>(.*?)<\/title>/i)?.[1] || `Content from ${url}`;
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
!readingEmitted &&
|
!readingEmitted &&
|
||||||
@@ -59,7 +95,7 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
|||||||
content: '',
|
content: '',
|
||||||
metadata: {
|
metadata: {
|
||||||
url,
|
url,
|
||||||
title: title,
|
title: scraped.title,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
@@ -92,7 +128,7 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
|||||||
content: '',
|
content: '',
|
||||||
metadata: {
|
metadata: {
|
||||||
url,
|
url,
|
||||||
title: title,
|
title: scraped.title,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -108,13 +144,49 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const markdown = turndownService.turndown(text);
|
const chunks = splitText(scraped.content, 4000, 500);
|
||||||
|
|
||||||
|
let accumulatedContent = '';
|
||||||
|
|
||||||
|
if (chunks.length > 1) {
|
||||||
|
try {
|
||||||
|
await Promise.all(
|
||||||
|
chunks.map(async (chunk) => {
|
||||||
|
const extracted = await additionalConfig.llm.generateObject<
|
||||||
|
typeof extractorSchema
|
||||||
|
>({
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: extractorPrompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: `<queries>Summarize</queries>\n<scraped_data>${chunk}</scraped_data>`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
schema: extractorSchema,
|
||||||
|
});
|
||||||
|
|
||||||
|
accumulatedContent += extracted.extracted_facts + '\n';
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
console.log(
|
||||||
|
'Error during extraction, falling back to raw content',
|
||||||
|
err,
|
||||||
|
);
|
||||||
|
accumulatedContent = chunks[0];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
accumulatedContent = scraped.content;
|
||||||
|
}
|
||||||
|
|
||||||
results.push({
|
results.push({
|
||||||
content: markdown,
|
content: accumulatedContent,
|
||||||
metadata: {
|
metadata: {
|
||||||
url,
|
url,
|
||||||
title: title,
|
title: scraped.title,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -122,7 +194,7 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
|||||||
content: `Failed to fetch content from ${url}: ${error}`,
|
content: `Failed to fetch content from ${url}: ${error}`,
|
||||||
metadata: {
|
metadata: {
|
||||||
url,
|
url,
|
||||||
title: `Error fetching ${url}`,
|
title: `Error scraping ${url}`,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,62 @@
|
|||||||
|
import z from 'zod';
|
||||||
|
import { ResearchAction } from '../../../types';
|
||||||
|
import { ResearchBlock } from '@/lib/types';
|
||||||
|
import { executeSearch } from './baseSearch';
|
||||||
|
|
||||||
|
const schema = z.object({
|
||||||
|
queries: z.array(z.string()).describe('List of academic search queries'),
|
||||||
|
});
|
||||||
|
|
||||||
|
const academicSearchDescription = `
|
||||||
|
Use this tool to perform academic searches for scholarly articles, papers, and research studies relevant to the user's query. Provide a list of concise search queries that will help gather comprehensive academic information on the topic at hand.
|
||||||
|
You can provide up to 3 queries at a time. Make sure the queries are specific and relevant to the user's needs.
|
||||||
|
|
||||||
|
For example, if the user is interested in recent advancements in renewable energy, your queries could be:
|
||||||
|
1. "Recent advancements in renewable energy 2024"
|
||||||
|
2. "Cutting-edge research on solar power technologies"
|
||||||
|
3. "Innovations in wind energy systems"
|
||||||
|
|
||||||
|
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed academic information.
|
||||||
|
`;
|
||||||
|
|
||||||
|
const academicSearchAction: ResearchAction<typeof schema> = {
|
||||||
|
name: 'academic_search',
|
||||||
|
schema: schema,
|
||||||
|
getDescription: () => academicSearchDescription,
|
||||||
|
getToolDescription: () =>
|
||||||
|
"Use this tool to perform academic searches for scholarly articles, papers, and research studies relevant to the user's query. Provide a list of concise search queries that will help gather comprehensive academic information on the topic at hand.",
|
||||||
|
enabled: (config) =>
|
||||||
|
config.sources.includes('academic') &&
|
||||||
|
config.classification.classification.skipSearch === false &&
|
||||||
|
config.classification.classification.academicSearch === true,
|
||||||
|
execute: async (input, additionalConfig) => {
|
||||||
|
input.queries = (
|
||||||
|
Array.isArray(input.queries) ? input.queries : [input.queries]
|
||||||
|
).slice(0, 3);
|
||||||
|
|
||||||
|
const researchBlock = additionalConfig.session.getBlock(
|
||||||
|
additionalConfig.researchBlockId,
|
||||||
|
) as ResearchBlock | undefined;
|
||||||
|
|
||||||
|
if (!researchBlock) throw new Error('Failed to retrieve research block');
|
||||||
|
|
||||||
|
const results = await executeSearch({
|
||||||
|
llm: additionalConfig.llm,
|
||||||
|
embedding: additionalConfig.embedding,
|
||||||
|
mode: additionalConfig.mode,
|
||||||
|
queries: input.queries,
|
||||||
|
researchBlock: researchBlock,
|
||||||
|
session: additionalConfig.session,
|
||||||
|
searchConfig: {
|
||||||
|
engines: ['arxiv', 'google scholar', 'pubmed'],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
type: 'search_results',
|
||||||
|
results: results,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export default academicSearchAction;
|
||||||
423
src/lib/agents/search/researcher/actions/search/baseSearch.ts
Normal file
423
src/lib/agents/search/researcher/actions/search/baseSearch.ts
Normal file
@@ -0,0 +1,423 @@
|
|||||||
|
import BaseEmbedding from '@/lib/models/base/embedding';
|
||||||
|
import BaseLLM from '@/lib/models/base/llm';
|
||||||
|
import { searchSearxng, SearxngSearchOptions } from '@/lib/searxng';
|
||||||
|
import SessionManager from '@/lib/session';
|
||||||
|
import { Chunk, ResearchBlock, SearchResultsResearchBlock } from '@/lib/types';
|
||||||
|
import { SearchAgentConfig } from '../../../types';
|
||||||
|
import computeSimilarity from '@/lib/utils/computeSimilarity';
|
||||||
|
import z from 'zod';
|
||||||
|
import Scraper from '@/lib/scraper';
|
||||||
|
import { splitText } from '@/lib/utils/splitText';
|
||||||
|
|
||||||
|
export const executeSearch = async (input: {
|
||||||
|
queries: string[];
|
||||||
|
mode: SearchAgentConfig['mode'];
|
||||||
|
searchConfig?: SearxngSearchOptions;
|
||||||
|
researchBlock: ResearchBlock;
|
||||||
|
session: InstanceType<typeof SessionManager>;
|
||||||
|
llm: BaseLLM<any>;
|
||||||
|
embedding: BaseEmbedding<any>;
|
||||||
|
}) => {
|
||||||
|
const researchBlock = input.researchBlock;
|
||||||
|
|
||||||
|
researchBlock.data.subSteps.push({
|
||||||
|
id: crypto.randomUUID(),
|
||||||
|
type: 'searching',
|
||||||
|
searching: input.queries,
|
||||||
|
});
|
||||||
|
|
||||||
|
input.session.updateBlock(researchBlock.id, [
|
||||||
|
{
|
||||||
|
op: 'replace',
|
||||||
|
path: '/data/subSteps',
|
||||||
|
value: researchBlock.data.subSteps,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (input.mode === 'speed' || input.mode === 'balanced') {
|
||||||
|
const searchResultsBlockId = crypto.randomUUID();
|
||||||
|
let searchResultsEmitted = false;
|
||||||
|
|
||||||
|
const results: Chunk[] = [];
|
||||||
|
|
||||||
|
const search = async (q: string) => {
|
||||||
|
const res = await searchSearxng(q, {
|
||||||
|
...(input.searchConfig ? input.searchConfig : {}),
|
||||||
|
});
|
||||||
|
|
||||||
|
let resultChunks: Chunk[] = [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const queryEmbedding = (await input.embedding.embedText([q]))[0];
|
||||||
|
|
||||||
|
resultChunks = (
|
||||||
|
await Promise.all(
|
||||||
|
res.results.map(async (r) => {
|
||||||
|
const content = r.content || r.title;
|
||||||
|
const chunkEmbedding = (
|
||||||
|
await input.embedding.embedText([content])
|
||||||
|
)[0];
|
||||||
|
|
||||||
|
return {
|
||||||
|
content,
|
||||||
|
metadata: {
|
||||||
|
title: r.title,
|
||||||
|
url: r.url,
|
||||||
|
similarity: computeSimilarity(queryEmbedding, chunkEmbedding),
|
||||||
|
embedding: chunkEmbedding,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
).filter((c) => c.metadata.similarity > 0.5);
|
||||||
|
} catch (err) {
|
||||||
|
resultChunks = res.results.map((r) => {
|
||||||
|
const content = r.content || r.title;
|
||||||
|
|
||||||
|
return {
|
||||||
|
content,
|
||||||
|
metadata: {
|
||||||
|
title: r.title,
|
||||||
|
url: r.url,
|
||||||
|
similarity: 1,
|
||||||
|
embedding: [],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
results.push(...resultChunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!searchResultsEmitted) {
|
||||||
|
searchResultsEmitted = true;
|
||||||
|
|
||||||
|
researchBlock.data.subSteps.push({
|
||||||
|
id: searchResultsBlockId,
|
||||||
|
type: 'search_results',
|
||||||
|
reading: resultChunks,
|
||||||
|
});
|
||||||
|
|
||||||
|
input.session.updateBlock(researchBlock.id, [
|
||||||
|
{
|
||||||
|
op: 'replace',
|
||||||
|
path: '/data/subSteps',
|
||||||
|
value: researchBlock.data.subSteps,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
} else if (searchResultsEmitted) {
|
||||||
|
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
||||||
|
(step) => step.id === searchResultsBlockId,
|
||||||
|
);
|
||||||
|
|
||||||
|
const subStep = researchBlock.data.subSteps[
|
||||||
|
subStepIndex
|
||||||
|
] as SearchResultsResearchBlock;
|
||||||
|
|
||||||
|
subStep.reading.push(...resultChunks);
|
||||||
|
|
||||||
|
input.session.updateBlock(researchBlock.id, [
|
||||||
|
{
|
||||||
|
op: 'replace',
|
||||||
|
path: '/data/subSteps',
|
||||||
|
value: researchBlock.data.subSteps,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
await Promise.all(input.queries.map(search));
|
||||||
|
|
||||||
|
results.sort((a, b) => b.metadata.similarity - a.metadata.similarity);
|
||||||
|
|
||||||
|
const uniqueSearchResultIndices: Set<number> = new Set();
|
||||||
|
|
||||||
|
for (let i = 0; i < results.length; i++) {
|
||||||
|
let isDuplicate = false;
|
||||||
|
|
||||||
|
for (const indice of uniqueSearchResultIndices.keys()) {
|
||||||
|
if (
|
||||||
|
results[i].metadata.embedding.length === 0 ||
|
||||||
|
results[indice].metadata.embedding.length === 0
|
||||||
|
)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const similarity = computeSimilarity(
|
||||||
|
results[i].metadata.embedding,
|
||||||
|
results[indice].metadata.embedding,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (similarity > 0.75) {
|
||||||
|
isDuplicate = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isDuplicate) {
|
||||||
|
uniqueSearchResultIndices.add(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const uniqueSearchResults = Array.from(uniqueSearchResultIndices.keys())
|
||||||
|
.map((i) => {
|
||||||
|
const uniqueResult = results[i];
|
||||||
|
|
||||||
|
delete uniqueResult.metadata.embedding;
|
||||||
|
delete uniqueResult.metadata.similarity;
|
||||||
|
|
||||||
|
return uniqueResult;
|
||||||
|
})
|
||||||
|
.slice(0, 20);
|
||||||
|
|
||||||
|
return uniqueSearchResults;
|
||||||
|
} else if (input.mode === 'quality') {
|
||||||
|
const searchResultsBlockId = crypto.randomUUID();
|
||||||
|
let searchResultsEmitted = false;
|
||||||
|
|
||||||
|
const searchResults: Chunk[] = [];
|
||||||
|
|
||||||
|
const search = async (q: string) => {
|
||||||
|
const res = await searchSearxng(q, {
|
||||||
|
...(input.searchConfig ? input.searchConfig : {}),
|
||||||
|
});
|
||||||
|
|
||||||
|
let resultChunks: Chunk[] = [];
|
||||||
|
|
||||||
|
resultChunks = res.results.map((r) => {
|
||||||
|
const content = r.content || r.title;
|
||||||
|
|
||||||
|
return {
|
||||||
|
content,
|
||||||
|
metadata: {
|
||||||
|
title: r.title,
|
||||||
|
url: r.url,
|
||||||
|
similarity: 1,
|
||||||
|
embedding: [],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
searchResults.push(...resultChunks);
|
||||||
|
|
||||||
|
if (!searchResultsEmitted) {
|
||||||
|
searchResultsEmitted = true;
|
||||||
|
|
||||||
|
researchBlock.data.subSteps.push({
|
||||||
|
id: searchResultsBlockId,
|
||||||
|
type: 'search_results',
|
||||||
|
reading: resultChunks,
|
||||||
|
});
|
||||||
|
|
||||||
|
input.session.updateBlock(researchBlock.id, [
|
||||||
|
{
|
||||||
|
op: 'replace',
|
||||||
|
path: '/data/subSteps',
|
||||||
|
value: researchBlock.data.subSteps,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
} else if (searchResultsEmitted) {
|
||||||
|
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
||||||
|
(step) => step.id === searchResultsBlockId,
|
||||||
|
);
|
||||||
|
|
||||||
|
const subStep = researchBlock.data.subSteps[
|
||||||
|
subStepIndex
|
||||||
|
] as SearchResultsResearchBlock;
|
||||||
|
|
||||||
|
subStep.reading.push(...resultChunks);
|
||||||
|
|
||||||
|
input.session.updateBlock(researchBlock.id, [
|
||||||
|
{
|
||||||
|
op: 'replace',
|
||||||
|
path: '/data/subSteps',
|
||||||
|
value: researchBlock.data.subSteps,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
await Promise.all(input.queries.map(search));
|
||||||
|
|
||||||
|
const pickerPrompt = `
|
||||||
|
Assistant is an AI search result picker. Assistant's task is to pick 2-3 of the most relevant search results based off the query which can be then scraped for information to answer the query.
|
||||||
|
Assistant will be shared with the search results retrieved from a search engine along with the queries used to retrieve those results. Assistant will then pick maxiumum 3 of the most relevant search results based on the queries and the content of the search results. Assistant should only pick search results that are relevant to the query and can help in answering the question.
|
||||||
|
|
||||||
|
## Things to taken into consideration when picking the search results:
|
||||||
|
1. Relevance to the query: The search results should be relevant to the query provided. Irrelevant results should be ignored.
|
||||||
|
2. Content quality: The content of the search results should be of high quality and provide valuable information that can help in answering the question.
|
||||||
|
3. Favour known and reputable sources: If there are search results from known and reputable sources that are relevant to the query, those should be prioritized.
|
||||||
|
4. Diversity: If there are multiple search results that are relevant and of high quality, try to pick results that provide diverse perspectives or information to get a well-rounded understanding of the topic.
|
||||||
|
5. Avoid picking search results that are too similar to each other in terms of content to maximize the amount of information gathered.
|
||||||
|
6. Maximum 3 results: Assistant should pick a maximum of 3 search results. If there are more than 3 relevant and high-quality search results, pick the top 3 based on the above criteria. If the queries are very specific and there are only 1 or 2 relevant search results, it's okay to pick only those 1 or 2 results.
|
||||||
|
7. Try to pick only one high quality result unless there are diverse perspective in multiple results then you can pick a maximum of 3.
|
||||||
|
8. Analyze the title, the snippet and the URL to determine the relevant to query, quality of the content that might be present inside and the reputation of the source before picking the search result.
|
||||||
|
|
||||||
|
## Output format
|
||||||
|
Assistant should output an array of indices corresponding to the search results that were picked based on the above criteria. The indices should be based on the order of the search results provided to Assistant. For example, if Assistant picks the 1st, 3rd, and 5th search results, Assistant should output [0, 2, 4].
|
||||||
|
|
||||||
|
<example_output>
|
||||||
|
{
|
||||||
|
"picked_indices": [0,2,4]
|
||||||
|
}
|
||||||
|
</example_output>
|
||||||
|
`;
|
||||||
|
|
||||||
|
const pickerSchema = z.object({
|
||||||
|
picked_indices: z
|
||||||
|
.array(z.number())
|
||||||
|
.describe(
|
||||||
|
'The array of the picked indices to be scraped for answering',
|
||||||
|
),
|
||||||
|
});
|
||||||
|
|
||||||
|
const pickerResponse = await input.llm.generateObject<typeof pickerSchema>({
|
||||||
|
schema: pickerSchema,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: pickerPrompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: `<queries>${input.queries.join(', ')}</queries>\n<search_results>${searchResults.map((result, index) => `<result indice=${index}>${JSON.stringify(result)}</result>`).join('\n')}</search_results>`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const pickedIndices = pickerResponse.picked_indices.slice(0, 3);
|
||||||
|
const pickedResults = pickedIndices
|
||||||
|
.map((i) => searchResults[i])
|
||||||
|
.filter((r) => r !== undefined);
|
||||||
|
|
||||||
|
const alreadyExtractedURLs: string[] = [];
|
||||||
|
|
||||||
|
researchBlock.data.subSteps.forEach((step) => {
|
||||||
|
if (step.type === 'reading') {
|
||||||
|
step.reading.forEach((chunk) => {
|
||||||
|
alreadyExtractedURLs.push(chunk.metadata.url);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const filteredResults = pickedResults.filter(
|
||||||
|
(r) => !alreadyExtractedURLs.find((url) => url === r.metadata.url),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (filteredResults.length > 0) {
|
||||||
|
researchBlock.data.subSteps.push({
|
||||||
|
id: crypto.randomUUID(),
|
||||||
|
type: 'reading',
|
||||||
|
reading: filteredResults,
|
||||||
|
});
|
||||||
|
|
||||||
|
input.session.updateBlock(researchBlock.id, [
|
||||||
|
{
|
||||||
|
path: '/data/subSteps',
|
||||||
|
op: 'replace',
|
||||||
|
value: researchBlock.data.subSteps,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const extractedFacts: Chunk[] = [];
|
||||||
|
|
||||||
|
const extractorPrompt = `
|
||||||
|
Assistant is an AI information extractor. Assistant will be shared with scraped information from a website along with the queries used to retrieve that information. Assistant's task is to extract relevant facts from the scraped data to answer the queries.
|
||||||
|
|
||||||
|
## Things to taken into consideration when extracting information:
|
||||||
|
1. Relevance to the query: The extracted information must dynamically adjust based on the query's intent. If the query asks "What is [X]", you must extract the definition/identity. If the query asks for "[X] specs" or "features", you must provide deep, granular technical details.
|
||||||
|
- Example: For "What is [Product]", extract the core definition. For "[Product] capabilities", extract every technical function mentioned.
|
||||||
|
2. Concentrate on extracting factual information that can help in answering the question rather than opinions or commentary. Ignore marketing fluff like "best-in-class" or "seamless."
|
||||||
|
3. Noise to signal ratio: If the scraped data is noisy (headers, footers, UI text), ignore it and extract only the high-value information.
|
||||||
|
- Example: Discard "Click for more" or "Subscribe now" messages.
|
||||||
|
4. Avoid using filler sentences or words; extract concise, telegram-style information.
|
||||||
|
- Example: Change "The device features a weight of only 1.2kg" to "Weight: 1.2kg."
|
||||||
|
5. Duplicate information: If a fact appears multiple times (e.g., in a paragraph and a technical table), merge the details into a single, high-density bullet point to avoid redundancy.
|
||||||
|
6. Numerical Data Integrity: NEVER summarize or generalize numbers, benchmarks, or table data. Extract raw values exactly as they appear.
|
||||||
|
- Example: Do not say "Improved coding scores." Say "LiveCodeBench v6: 80.0%."
|
||||||
|
|
||||||
|
## Example
|
||||||
|
For example, if the query is "What are the health benefits of green tea?" and the scraped data contains various pieces of information about green tea, Assistant should focus on extracting factual information related to the health benefits of green tea such as "Green tea contains antioxidants which can help in reducing inflammation" and ignore irrelevant information such as "Green tea is a popular beverage worldwide".
|
||||||
|
|
||||||
|
It can also remove filler words to reduce the sentence to "Contains antioxidants; reduces inflammation."
|
||||||
|
|
||||||
|
For tables/numerical data extraction, Assistant should extract the raw numerical data or the content of the table without trying to summarize it to avoid losing important details. For example, if a table lists specific battery life hours for different modes, Assistant should list every mode and its corresponding hour count rather than giving a general average.
|
||||||
|
|
||||||
|
Make sure the extracted facts are in bullet points format to make it easier to read and understand.
|
||||||
|
|
||||||
|
## Output format
|
||||||
|
Assistant should reply with a JSON object containing a key "extracted_facts" which is a string of the bulleted facts. Return only raw JSON without markdown formatting (no \`\`\`json blocks).
|
||||||
|
|
||||||
|
<example_output>
|
||||||
|
{
|
||||||
|
"extracted_facts": "- Fact 1\n- Fact 2\n- Fact 3"
|
||||||
|
}
|
||||||
|
</example_output>
|
||||||
|
`;
|
||||||
|
|
||||||
|
const extractorSchema = z.object({
|
||||||
|
extracted_facts: z
|
||||||
|
.string()
|
||||||
|
.describe(
|
||||||
|
'The extracted facts that are relevant to the query and can help in answering the question should be listed here in a concise manner.',
|
||||||
|
),
|
||||||
|
});
|
||||||
|
|
||||||
|
await Promise.all(
|
||||||
|
filteredResults.map(async (result, i) => {
|
||||||
|
try {
|
||||||
|
const scrapedData = await Scraper.scrape(result.metadata.url).catch(
|
||||||
|
(err) => {
|
||||||
|
console.log('Error scraping data from', result.metadata.url, err);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!scrapedData) return;
|
||||||
|
|
||||||
|
let accumulatedContent = '';
|
||||||
|
const chunks = splitText(scrapedData.content, 4000, 500);
|
||||||
|
|
||||||
|
await Promise.all(
|
||||||
|
chunks.map(async (chunk) => {
|
||||||
|
try {
|
||||||
|
const extractorOutput = await input.llm.generateObject<
|
||||||
|
typeof extractorSchema
|
||||||
|
>({
|
||||||
|
schema: extractorSchema,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: extractorPrompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: `<queries>${input.queries.join(', ')}</queries>\n<scraped_data>${chunk}</scraped_data>`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
accumulatedContent += extractorOutput.extracted_facts + '\n';
|
||||||
|
} catch (err) {
|
||||||
|
console.log('Error extracting information from chunk', err);
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
extractedFacts.push({
|
||||||
|
...result,
|
||||||
|
content: accumulatedContent,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.log(
|
||||||
|
'Error scraping or extracting information from',
|
||||||
|
result.metadata.url,
|
||||||
|
err,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
return extractedFacts;
|
||||||
|
} else {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
import z from 'zod';
|
||||||
|
import { ResearchAction } from '../../../types';
|
||||||
|
import { ResearchBlock } from '@/lib/types';
|
||||||
|
import { executeSearch } from './baseSearch';
|
||||||
|
|
||||||
|
const schema = z.object({
|
||||||
|
queries: z.array(z.string()).describe('List of social search queries'),
|
||||||
|
});
|
||||||
|
|
||||||
|
const socialSearchDescription = `
|
||||||
|
Use this tool to perform social media searches for relevant posts, discussions, and trends related to the user's query. Provide a list of concise search queries that will help gather comprehensive social media information on the topic at hand.
|
||||||
|
You can provide up to 3 queries at a time. Make sure the queries are specific and relevant to the user's needs.
|
||||||
|
|
||||||
|
For example, if the user is interested in public opinion on electric vehicles, your queries could be:
|
||||||
|
1. "Electric vehicles public opinion 2024"
|
||||||
|
2. "Social media discussions on EV adoption"
|
||||||
|
3. "Trends in electric vehicle usage"
|
||||||
|
|
||||||
|
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed social media information.
|
||||||
|
`;
|
||||||
|
|
||||||
|
const socialSearchAction: ResearchAction<typeof schema> = {
|
||||||
|
name: 'social_search',
|
||||||
|
schema: schema,
|
||||||
|
getDescription: () => socialSearchDescription,
|
||||||
|
getToolDescription: () =>
|
||||||
|
"Use this tool to perform social media searches for relevant posts, discussions, and trends related to the user's query. Provide a list of concise search queries that will help gather comprehensive social media information on the topic at hand.",
|
||||||
|
enabled: (config) =>
|
||||||
|
config.sources.includes('discussions') &&
|
||||||
|
config.classification.classification.skipSearch === false &&
|
||||||
|
config.classification.classification.discussionSearch === true,
|
||||||
|
execute: async (input, additionalConfig) => {
|
||||||
|
input.queries = (
|
||||||
|
Array.isArray(input.queries) ? input.queries : [input.queries]
|
||||||
|
).slice(0, 3);
|
||||||
|
|
||||||
|
const researchBlock = additionalConfig.session.getBlock(
|
||||||
|
additionalConfig.researchBlockId,
|
||||||
|
) as ResearchBlock | undefined;
|
||||||
|
|
||||||
|
if (!researchBlock) throw new Error('Failed to retrieve research block');
|
||||||
|
|
||||||
|
const results = await executeSearch({
|
||||||
|
llm: additionalConfig.llm,
|
||||||
|
embedding: additionalConfig.embedding,
|
||||||
|
mode: additionalConfig.mode,
|
||||||
|
queries: input.queries,
|
||||||
|
researchBlock: researchBlock,
|
||||||
|
session: additionalConfig.session,
|
||||||
|
searchConfig: {
|
||||||
|
engines: ['reddit'],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
type: 'search_results',
|
||||||
|
results: results,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export default socialSearchAction;
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import z from 'zod';
|
import z from 'zod';
|
||||||
import { ResearchAction } from '../../types';
|
import { ResearchAction } from '../../../types';
|
||||||
import { searchSearxng } from '@/lib/searxng';
|
import { ResearchBlock } from '@/lib/types';
|
||||||
import { Chunk, SearchResultsResearchBlock } from '@/lib/types';
|
import { executeSearch } from './baseSearch';
|
||||||
|
|
||||||
const actionSchema = z.object({
|
const actionSchema = z.object({
|
||||||
type: z.literal('web_search'),
|
type: z.literal('web_search'),
|
||||||
@@ -85,96 +85,28 @@ const webSearchAction: ResearchAction<typeof actionSchema> = {
|
|||||||
config.sources.includes('web') &&
|
config.sources.includes('web') &&
|
||||||
config.classification.classification.skipSearch === false,
|
config.classification.classification.skipSearch === false,
|
||||||
execute: async (input, additionalConfig) => {
|
execute: async (input, additionalConfig) => {
|
||||||
input.queries = input.queries.slice(0, 3);
|
input.queries = (
|
||||||
|
Array.isArray(input.queries) ? input.queries : [input.queries]
|
||||||
|
).slice(0, 3);
|
||||||
|
|
||||||
const researchBlock = additionalConfig.session.getBlock(
|
const researchBlock = additionalConfig.session.getBlock(
|
||||||
additionalConfig.researchBlockId,
|
additionalConfig.researchBlockId,
|
||||||
);
|
) as ResearchBlock | undefined;
|
||||||
|
|
||||||
if (researchBlock && researchBlock.type === 'research') {
|
if (!researchBlock) throw new Error('Failed to retrieve research block');
|
||||||
researchBlock.data.subSteps.push({
|
|
||||||
id: crypto.randomUUID(),
|
|
||||||
type: 'searching',
|
|
||||||
searching: input.queries,
|
|
||||||
});
|
|
||||||
|
|
||||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
const results = await executeSearch({
|
||||||
{
|
llm: additionalConfig.llm,
|
||||||
op: 'replace',
|
embedding: additionalConfig.embedding,
|
||||||
path: '/data/subSteps',
|
mode: additionalConfig.mode,
|
||||||
value: researchBlock.data.subSteps,
|
queries: input.queries,
|
||||||
},
|
researchBlock: researchBlock,
|
||||||
]);
|
session: additionalConfig.session,
|
||||||
}
|
});
|
||||||
|
|
||||||
const searchResultsBlockId = crypto.randomUUID();
|
|
||||||
let searchResultsEmitted = false;
|
|
||||||
|
|
||||||
let results: Chunk[] = [];
|
|
||||||
|
|
||||||
const search = async (q: string) => {
|
|
||||||
const res = await searchSearxng(q);
|
|
||||||
|
|
||||||
const resultChunks: Chunk[] = res.results.map((r) => ({
|
|
||||||
content: r.content || r.title,
|
|
||||||
metadata: {
|
|
||||||
title: r.title,
|
|
||||||
url: r.url,
|
|
||||||
},
|
|
||||||
}));
|
|
||||||
|
|
||||||
results.push(...resultChunks);
|
|
||||||
|
|
||||||
if (
|
|
||||||
!searchResultsEmitted &&
|
|
||||||
researchBlock &&
|
|
||||||
researchBlock.type === 'research'
|
|
||||||
) {
|
|
||||||
searchResultsEmitted = true;
|
|
||||||
|
|
||||||
researchBlock.data.subSteps.push({
|
|
||||||
id: searchResultsBlockId,
|
|
||||||
type: 'search_results',
|
|
||||||
reading: resultChunks,
|
|
||||||
});
|
|
||||||
|
|
||||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
||||||
{
|
|
||||||
op: 'replace',
|
|
||||||
path: '/data/subSteps',
|
|
||||||
value: researchBlock.data.subSteps,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
} else if (
|
|
||||||
searchResultsEmitted &&
|
|
||||||
researchBlock &&
|
|
||||||
researchBlock.type === 'research'
|
|
||||||
) {
|
|
||||||
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
|
||||||
(step) => step.id === searchResultsBlockId,
|
|
||||||
);
|
|
||||||
|
|
||||||
const subStep = researchBlock.data.subSteps[
|
|
||||||
subStepIndex
|
|
||||||
] as SearchResultsResearchBlock;
|
|
||||||
|
|
||||||
subStep.reading.push(...resultChunks);
|
|
||||||
|
|
||||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
||||||
{
|
|
||||||
op: 'replace',
|
|
||||||
path: '/data/subSteps',
|
|
||||||
value: researchBlock.data.subSteps,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
await Promise.all(input.queries.map(search));
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
type: 'search_results',
|
type: 'search_results',
|
||||||
results,
|
results: results,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
@@ -1,129 +0,0 @@
|
|||||||
import z from 'zod';
|
|
||||||
import { ResearchAction } from '../../types';
|
|
||||||
import { Chunk, SearchResultsResearchBlock } from '@/lib/types';
|
|
||||||
import { searchSearxng } from '@/lib/searxng';
|
|
||||||
|
|
||||||
const schema = z.object({
|
|
||||||
queries: z.array(z.string()).describe('List of social search queries'),
|
|
||||||
});
|
|
||||||
|
|
||||||
const socialSearchDescription = `
|
|
||||||
Use this tool to perform social media searches for relevant posts, discussions, and trends related to the user's query. Provide a list of concise search queries that will help gather comprehensive social media information on the topic at hand.
|
|
||||||
You can provide up to 3 queries at a time. Make sure the queries are specific and relevant to the user's needs.
|
|
||||||
|
|
||||||
For example, if the user is interested in public opinion on electric vehicles, your queries could be:
|
|
||||||
1. "Electric vehicles public opinion 2024"
|
|
||||||
2. "Social media discussions on EV adoption"
|
|
||||||
3. "Trends in electric vehicle usage"
|
|
||||||
|
|
||||||
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed social media information.
|
|
||||||
`;
|
|
||||||
|
|
||||||
const socialSearchAction: ResearchAction<typeof schema> = {
|
|
||||||
name: 'social_search',
|
|
||||||
schema: schema,
|
|
||||||
getDescription: () => socialSearchDescription,
|
|
||||||
getToolDescription: () =>
|
|
||||||
"Use this tool to perform social media searches for relevant posts, discussions, and trends related to the user's query. Provide a list of concise search queries that will help gather comprehensive social media information on the topic at hand.",
|
|
||||||
enabled: (config) =>
|
|
||||||
config.sources.includes('discussions') &&
|
|
||||||
config.classification.classification.skipSearch === false &&
|
|
||||||
config.classification.classification.discussionSearch === true,
|
|
||||||
execute: async (input, additionalConfig) => {
|
|
||||||
input.queries = input.queries.slice(0, 3);
|
|
||||||
|
|
||||||
const researchBlock = additionalConfig.session.getBlock(
|
|
||||||
additionalConfig.researchBlockId,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (researchBlock && researchBlock.type === 'research') {
|
|
||||||
researchBlock.data.subSteps.push({
|
|
||||||
type: 'searching',
|
|
||||||
id: crypto.randomUUID(),
|
|
||||||
searching: input.queries,
|
|
||||||
});
|
|
||||||
|
|
||||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
||||||
{
|
|
||||||
op: 'replace',
|
|
||||||
path: '/data/subSteps',
|
|
||||||
value: researchBlock.data.subSteps,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
const searchResultsBlockId = crypto.randomUUID();
|
|
||||||
let searchResultsEmitted = false;
|
|
||||||
|
|
||||||
let results: Chunk[] = [];
|
|
||||||
|
|
||||||
const search = async (q: string) => {
|
|
||||||
const res = await searchSearxng(q, {
|
|
||||||
engines: ['reddit'],
|
|
||||||
});
|
|
||||||
|
|
||||||
const resultChunks: Chunk[] = res.results.map((r) => ({
|
|
||||||
content: r.content || r.title,
|
|
||||||
metadata: {
|
|
||||||
title: r.title,
|
|
||||||
url: r.url,
|
|
||||||
},
|
|
||||||
}));
|
|
||||||
|
|
||||||
results.push(...resultChunks);
|
|
||||||
|
|
||||||
if (
|
|
||||||
!searchResultsEmitted &&
|
|
||||||
researchBlock &&
|
|
||||||
researchBlock.type === 'research'
|
|
||||||
) {
|
|
||||||
searchResultsEmitted = true;
|
|
||||||
|
|
||||||
researchBlock.data.subSteps.push({
|
|
||||||
id: searchResultsBlockId,
|
|
||||||
type: 'search_results',
|
|
||||||
reading: resultChunks,
|
|
||||||
});
|
|
||||||
|
|
||||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
||||||
{
|
|
||||||
op: 'replace',
|
|
||||||
path: '/data/subSteps',
|
|
||||||
value: researchBlock.data.subSteps,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
} else if (
|
|
||||||
searchResultsEmitted &&
|
|
||||||
researchBlock &&
|
|
||||||
researchBlock.type === 'research'
|
|
||||||
) {
|
|
||||||
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
|
||||||
(step) => step.id === searchResultsBlockId,
|
|
||||||
);
|
|
||||||
|
|
||||||
const subStep = researchBlock.data.subSteps[
|
|
||||||
subStepIndex
|
|
||||||
] as SearchResultsResearchBlock;
|
|
||||||
|
|
||||||
subStep.reading.push(...resultChunks);
|
|
||||||
|
|
||||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
||||||
{
|
|
||||||
op: 'replace',
|
|
||||||
path: '/data/subSteps',
|
|
||||||
value: researchBlock.data.subSteps,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
await Promise.all(input.queries.map(search));
|
|
||||||
|
|
||||||
return {
|
|
||||||
type: 'search_results',
|
|
||||||
results,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
export default socialSearchAction;
|
|
||||||
@@ -167,6 +167,7 @@ class Researcher {
|
|||||||
session: session,
|
session: session,
|
||||||
researchBlockId: researchBlockId,
|
researchBlockId: researchBlockId,
|
||||||
fileIds: input.config.fileIds,
|
fileIds: input.config.fileIds,
|
||||||
|
mode: input.config.mode,
|
||||||
});
|
});
|
||||||
|
|
||||||
actionOutput.push(...actionResults);
|
actionOutput.push(...actionResults);
|
||||||
|
|||||||
@@ -117,6 +117,7 @@ export interface ResearchAction<
|
|||||||
additionalConfig: AdditionalConfig & {
|
additionalConfig: AdditionalConfig & {
|
||||||
researchBlockId: string;
|
researchBlockId: string;
|
||||||
fileIds: string[];
|
fileIds: string[];
|
||||||
|
mode: SearchAgentConfig['mode'];
|
||||||
},
|
},
|
||||||
) => Promise<ActionOutput>;
|
) => Promise<ActionOutput>;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import path from 'node:path';
|
import path from 'node:path';
|
||||||
import fs from 'fs';
|
import fs from 'fs';
|
||||||
import { Config, ConfigModelProvider, UIConfigSections } from './types';
|
import { Config, ConfigModelProvider, UIConfigSections } from './types';
|
||||||
import { hashObj } from '../serverUtils';
|
import { hashObj } from '../utils/hash';
|
||||||
import { getModelProvidersUIConfigSection } from '../models/providers';
|
import { getModelProvidersUIConfigSection } from '../models/providers';
|
||||||
|
|
||||||
class ConfigManager {
|
class ConfigManager {
|
||||||
|
|||||||
@@ -25,7 +25,9 @@ const reasoningModels = [
|
|||||||
'qwen3',
|
'qwen3',
|
||||||
'deepseek-v3.1',
|
'deepseek-v3.1',
|
||||||
'magistral',
|
'magistral',
|
||||||
'nemotron-3-nano',
|
'nemotron-3',
|
||||||
|
'nemotron-cascade-2',
|
||||||
|
'glm-4.7-flash',
|
||||||
];
|
];
|
||||||
|
|
||||||
class OllamaLLM extends BaseLLM<OllamaConfig> {
|
class OllamaLLM extends BaseLLM<OllamaConfig> {
|
||||||
|
|||||||
116
src/lib/scraper.ts
Normal file
116
src/lib/scraper.ts
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
import { JSDOM } from 'jsdom';
|
||||||
|
import { Readability } from '@mozilla/readability';
|
||||||
|
import { Mutex } from 'async-mutex';
|
||||||
|
|
||||||
|
class Scraper {
|
||||||
|
private static browser: any | undefined;
|
||||||
|
private static IDLE_KILL_TIMEOUT = 30000;
|
||||||
|
private static NAVIGATION_TIMEOUT = 20000;
|
||||||
|
private static idleTimeout: NodeJS.Timeout | undefined;
|
||||||
|
private static browserMutex = new Mutex();
|
||||||
|
private static userCount = 0;
|
||||||
|
|
||||||
|
private static async initBrowser() {
|
||||||
|
await this.browserMutex.runExclusive(async () => {
|
||||||
|
if (!this.browser) {
|
||||||
|
const { chromium } = await import('playwright');
|
||||||
|
this.browser = await chromium.launch({
|
||||||
|
headless: true,
|
||||||
|
channel: 'chromium-headless-shell',
|
||||||
|
args: [
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--disable-blink-features=AutomationControlled',
|
||||||
|
],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.idleTimeout) clearTimeout(this.idleTimeout);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static scheduleIdleKill() {
|
||||||
|
if (this.idleTimeout) clearTimeout(this.idleTimeout);
|
||||||
|
|
||||||
|
this.idleTimeout = setTimeout(async () => {
|
||||||
|
await this.browserMutex.runExclusive(async () => {
|
||||||
|
if (this.browser && this.userCount === 0) {
|
||||||
|
{
|
||||||
|
await this.browser.close();
|
||||||
|
this.browser = undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}, this.IDLE_KILL_TIMEOUT);
|
||||||
|
}
|
||||||
|
|
||||||
|
static async scrape(
|
||||||
|
url: string,
|
||||||
|
): Promise<{ content: string; title: string }> {
|
||||||
|
await this.initBrowser();
|
||||||
|
|
||||||
|
if (!this.browser) throw new Error('Browser not initialized');
|
||||||
|
|
||||||
|
const context = await this.browser.newContext({
|
||||||
|
userAgent:
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
||||||
|
});
|
||||||
|
|
||||||
|
await context.addInitScript(() => {
|
||||||
|
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
this.userCount++;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(url, {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
timeout: this.NAVIGATION_TIMEOUT,
|
||||||
|
});
|
||||||
|
|
||||||
|
await page
|
||||||
|
.waitForLoadState('load', { timeout: 5000 })
|
||||||
|
.catch(() => undefined);
|
||||||
|
await page.waitForTimeout(500);
|
||||||
|
|
||||||
|
const html = await page.content();
|
||||||
|
|
||||||
|
const dom = new JSDOM(html, {
|
||||||
|
url,
|
||||||
|
});
|
||||||
|
|
||||||
|
const content = new Readability(dom.window.document).parse();
|
||||||
|
|
||||||
|
const title = await page.title();
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: `
|
||||||
|
# ${title ?? 'No title'} - ${url}
|
||||||
|
${content?.textContent?.trim() ?? 'No content available'}
|
||||||
|
`,
|
||||||
|
title,
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`Error scraping ${url}:`, err);
|
||||||
|
|
||||||
|
return {
|
||||||
|
title: 'Failed to scrape',
|
||||||
|
content: `# ${url}\n\nError scraping content.`,
|
||||||
|
};
|
||||||
|
} finally {
|
||||||
|
this.userCount--;
|
||||||
|
|
||||||
|
await context.close().catch(() => undefined);
|
||||||
|
|
||||||
|
if (this.userCount === 0) {
|
||||||
|
this.scheduleIdleKill();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default Scraper;
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import { getSearxngURL } from './config/serverRegistry';
|
import { getSearxngURL } from './config/serverRegistry';
|
||||||
|
|
||||||
interface SearxngSearchOptions {
|
export interface SearxngSearchOptions {
|
||||||
categories?: string[];
|
categories?: string[];
|
||||||
engines?: string[];
|
engines?: string[];
|
||||||
language?: string;
|
language?: string;
|
||||||
@@ -38,11 +38,30 @@ export const searchSearxng = async (
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = await fetch(url);
|
const controller = new AbortController();
|
||||||
const data = await res.json();
|
const timeoutId = setTimeout(() => controller.abort(), 10000);
|
||||||
|
|
||||||
const results: SearxngSearchResult[] = data.results;
|
try {
|
||||||
const suggestions: string[] = data.suggestions;
|
const res = await fetch(url, {
|
||||||
|
signal: controller.signal,
|
||||||
|
});
|
||||||
|
|
||||||
return { results, suggestions };
|
if (!res.ok) {
|
||||||
|
throw new Error(`SearXNG error: ${res.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
|
||||||
|
const results: SearxngSearchResult[] = data.results;
|
||||||
|
const suggestions: string[] = data.suggestions;
|
||||||
|
|
||||||
|
return { results, suggestions };
|
||||||
|
} catch (err: any) {
|
||||||
|
if (err.name === 'AbortError') {
|
||||||
|
throw new Error('SearXNG search timed out');
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
1
src/lib/serverActions.ts
Normal file
1
src/lib/serverActions.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
'use server';
|
||||||
@@ -146,7 +146,7 @@ class UploadManager {
|
|||||||
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
||||||
const docBuffer = fs.readFileSync(filePath);
|
const docBuffer = fs.readFileSync(filePath);
|
||||||
|
|
||||||
const docText = await officeParser.parseOfficeAsync(docBuffer)
|
const docText = (await officeParser.parseOffice(docBuffer)).toText()
|
||||||
|
|
||||||
const docSplittedText = splitText(docText, 512, 128)
|
const docSplittedText = splitText(docText, 512, 128)
|
||||||
const docEmbeddings = await this.embeddingModel.embedText(docSplittedText)
|
const docEmbeddings = await this.embeddingModel.embedText(docSplittedText)
|
||||||
|
|||||||
@@ -2,8 +2,7 @@ import BaseEmbedding from "../models/base/embedding";
|
|||||||
import UploadManager from "./manager";
|
import UploadManager from "./manager";
|
||||||
import computeSimilarity from "../utils/computeSimilarity";
|
import computeSimilarity from "../utils/computeSimilarity";
|
||||||
import { Chunk } from "../types";
|
import { Chunk } from "../types";
|
||||||
import { hashObj } from "../serverUtils";
|
import { hashObj } from '../utils/hash';
|
||||||
import fs from 'fs';
|
|
||||||
|
|
||||||
type UploadStoreParams = {
|
type UploadStoreParams = {
|
||||||
embeddingModel: BaseEmbedding<any>;
|
embeddingModel: BaseEmbedding<any>;
|
||||||
|
|||||||
16
src/lib/utils/jaccardSim.ts
Normal file
16
src/lib/utils/jaccardSim.ts
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
const computeJaccardSimilarity = (a: string, b: string): number => {
|
||||||
|
const wordsA = a.toLowerCase().split(/\W+/);
|
||||||
|
const wordsB = b.toLowerCase().split(/\W+/);
|
||||||
|
|
||||||
|
const setA = new Set(wordsA);
|
||||||
|
const setB = new Set(wordsB);
|
||||||
|
|
||||||
|
if (setA.size === 0 || setB.size === 0) return 0;
|
||||||
|
|
||||||
|
const union = setA.union(setB);
|
||||||
|
const intersections = setA.intersection(setB);
|
||||||
|
|
||||||
|
return intersections.size / union.size;
|
||||||
|
};
|
||||||
|
|
||||||
|
export default computeJaccardSimilarity;
|
||||||
@@ -4,7 +4,7 @@ const splitRegex = /(?<=\. |\n|! |\? |; |:\s|\d+\.\s|- |\* )/g;
|
|||||||
|
|
||||||
const enc = getEncoding('cl100k_base');
|
const enc = getEncoding('cl100k_base');
|
||||||
|
|
||||||
const getTokenCount = (text: string): number => {
|
export const getTokenCount = (text: string): number => {
|
||||||
try {
|
try {
|
||||||
return enc.encode(text).length;
|
return enc.encode(text).length;
|
||||||
} catch {
|
} catch {
|
||||||
|
|||||||
Reference in New Issue
Block a user