mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-04-30 08:12:26 +00:00
Compare commits
10 Commits
v1.9.0-rc
...
v1.9.0-rc2
Author | SHA1 | Date | |
---|---|---|---|
|
2873093fee | ||
|
806c47e705 | ||
|
ff34d1043f | ||
|
c521b032a7 | ||
|
6b8f7dc32c | ||
|
8bb3e4f016 | ||
|
51939ff842 | ||
|
e4faa82362 | ||
|
9c1936ec2c | ||
|
c4932c659a |
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "perplexica-backend",
|
||||
"version": "1.9.0-rc1",
|
||||
"version": "1.9.0-rc2",
|
||||
"license": "MIT",
|
||||
"author": "ItzCrazyKns",
|
||||
"scripts": {
|
||||
|
2345
searxng/settings.yml
2345
searxng/settings.yml
File diff suppressed because it is too large
Load Diff
@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
import computeSimilarity from '../utils/computeSimilarity';
|
||||
import logger from '../utils/logger';
|
||||
import { IterableReadableStream } from '@langchain/core/utils/stream';
|
||||
|
||||
const basicAcademicSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
@ -66,7 +67,7 @@ const basicAcademicSearchResponsePrompt = `
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
const handleStream = async (
|
||||
stream: AsyncGenerator<StreamEvent, any, unknown>,
|
||||
stream: IterableReadableStream<StreamEvent>,
|
||||
emitter: eventEmitter,
|
||||
) => {
|
||||
for await (const event of stream) {
|
||||
|
@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
import computeSimilarity from '../utils/computeSimilarity';
|
||||
import logger from '../utils/logger';
|
||||
import { IterableReadableStream } from '@langchain/core/utils/stream';
|
||||
|
||||
const basicRedditSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
@ -66,7 +67,7 @@ const basicRedditSearchResponsePrompt = `
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
const handleStream = async (
|
||||
stream: AsyncGenerator<StreamEvent, any, unknown>,
|
||||
stream: IterableReadableStream<StreamEvent>,
|
||||
emitter: eventEmitter,
|
||||
) => {
|
||||
for await (const event of stream) {
|
||||
|
@ -22,12 +22,13 @@ import logger from '../utils/logger';
|
||||
import LineListOutputParser from '../lib/outputParsers/listLineOutputParser';
|
||||
import { getDocumentsFromLinks } from '../lib/linkDocument';
|
||||
import LineOutputParser from '../lib/outputParsers/lineOutputParser';
|
||||
import { IterableReadableStream } from '@langchain/core/utils/stream';
|
||||
|
||||
const basicSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
|
||||
If the question contains some links and asks to answer from those links or even if they don't you need to return the links inside 'links' XML block and the question inside 'question' XML block. If there are no links then you need to return the question without any XML block.
|
||||
If the user asks to summarrize the content from some links you need to return \`Summarize\` as the question inside the 'question' XML block and the links inside the 'links' XML block.
|
||||
If the user asks to summarize the content from some links you need to return \`Summarize\` as the question inside the 'question' XML block and the links inside the 'links' XML block.
|
||||
|
||||
Example:
|
||||
1. Follow up question: What is the capital of France?
|
||||
@ -95,7 +96,7 @@ const basicWebSearchResponsePrompt = `
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
const handleStream = async (
|
||||
stream: AsyncGenerator<StreamEvent, any, unknown>,
|
||||
stream: IterableReadableStream<StreamEvent>,
|
||||
emitter: eventEmitter,
|
||||
) => {
|
||||
for await (const event of stream) {
|
||||
@ -157,35 +158,43 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
|
||||
question = 'Summarize';
|
||||
}
|
||||
|
||||
let docs = []
|
||||
let docs = [];
|
||||
|
||||
const linkDocs = await getDocumentsFromLinks({ links });
|
||||
|
||||
const docGroups: Document[] = [];
|
||||
|
||||
linkDocs.map((doc) => {
|
||||
const URLDocExists = docGroups.find((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10);
|
||||
const URLDocExists = docGroups.find(
|
||||
(d) =>
|
||||
d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
|
||||
);
|
||||
|
||||
if (!URLDocExists) {
|
||||
docGroups.push({
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
totalDocs: 1
|
||||
}
|
||||
totalDocs: 1,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const docIndex = docGroups.findIndex((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10);
|
||||
const docIndex = docGroups.findIndex(
|
||||
(d) =>
|
||||
d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
|
||||
);
|
||||
|
||||
if (docIndex !== -1) {
|
||||
docGroups[docIndex].pageContent = docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
|
||||
docGroups[docIndex].pageContent =
|
||||
docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
|
||||
docGroups[docIndex].metadata.totalDocs += 1;
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
await Promise.all(docGroups.map(async (doc) => {
|
||||
const res = await llm.invoke(`
|
||||
await Promise.all(
|
||||
docGroups.map(async (doc) => {
|
||||
const res = await llm.invoke(`
|
||||
You are a text summarizer. You need to summarize the text provided inside the \`text\` XML block.
|
||||
You need to summarize the text into 1 or 2 sentences capturing the main idea of the text.
|
||||
You need to make sure that you don't miss any point while summarizing the text.
|
||||
@ -204,16 +213,17 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
|
||||
Make sure to answer the query in the summary.
|
||||
`);
|
||||
|
||||
const document = new Document({
|
||||
pageContent: res.content as string,
|
||||
metadata: {
|
||||
title: doc.metadata.title,
|
||||
url: doc.metadata.url,
|
||||
},
|
||||
})
|
||||
|
||||
docs.push(document)
|
||||
}))
|
||||
const document = new Document({
|
||||
pageContent: res.content as string,
|
||||
metadata: {
|
||||
title: doc.metadata.title,
|
||||
url: doc.metadata.url,
|
||||
},
|
||||
});
|
||||
|
||||
docs.push(document);
|
||||
}),
|
||||
);
|
||||
|
||||
return { query: question, docs: docs };
|
||||
} else {
|
||||
|
@ -18,6 +18,7 @@ import type { Embeddings } from '@langchain/core/embeddings';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
import logger from '../utils/logger';
|
||||
import { IterableReadableStream } from '@langchain/core/utils/stream';
|
||||
|
||||
const basicWolframAlphaSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
@ -65,7 +66,7 @@ const basicWolframAlphaSearchResponsePrompt = `
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
const handleStream = async (
|
||||
stream: AsyncGenerator<StreamEvent, any, unknown>,
|
||||
stream: IterableReadableStream<StreamEvent>,
|
||||
emitter: eventEmitter,
|
||||
) => {
|
||||
for await (const event of stream) {
|
||||
|
@ -10,6 +10,7 @@ import eventEmitter from 'events';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { Embeddings } from '@langchain/core/embeddings';
|
||||
import logger from '../utils/logger';
|
||||
import { IterableReadableStream } from '@langchain/core/utils/stream';
|
||||
|
||||
const writingAssistantPrompt = `
|
||||
You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query.
|
||||
@ -19,7 +20,7 @@ Since you are a writing assistant, you would not perform web searches. If you th
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
const handleStream = async (
|
||||
stream: AsyncGenerator<StreamEvent, any, unknown>,
|
||||
stream: IterableReadableStream<StreamEvent>,
|
||||
emitter: eventEmitter,
|
||||
) => {
|
||||
for await (const event of stream) {
|
||||
|
@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
import computeSimilarity from '../utils/computeSimilarity';
|
||||
import logger from '../utils/logger';
|
||||
import { IterableReadableStream } from '@langchain/core/utils/stream';
|
||||
|
||||
const basicYoutubeSearchRetrieverPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
|
||||
@ -66,7 +67,7 @@ const basicYoutubeSearchResponsePrompt = `
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
const handleStream = async (
|
||||
stream: AsyncGenerator<StreamEvent, any, unknown>,
|
||||
stream: IterableReadableStream<StreamEvent>,
|
||||
emitter: eventEmitter,
|
||||
) => {
|
||||
for await (const event of stream) {
|
||||
|
@ -30,9 +30,9 @@ server.listen(port, () => {
|
||||
startWebSocketServer(server);
|
||||
|
||||
process.on('uncaughtException', (err, origin) => {
|
||||
logger.error(`Uncaught Exception at ${origin}: ${err}`)
|
||||
})
|
||||
logger.error(`Uncaught Exception at ${origin}: ${err}`);
|
||||
});
|
||||
|
||||
process.on('unhandledRejection', (reason, promise) => {
|
||||
logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`)
|
||||
})
|
||||
logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`);
|
||||
});
|
||||
|
@ -1,8 +1,8 @@
|
||||
import axios from 'axios';
|
||||
import { htmlToText } from 'html-to-text'
|
||||
import { htmlToText } from 'html-to-text';
|
||||
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import pdfParse from 'pdf-parse'
|
||||
import pdfParse from 'pdf-parse';
|
||||
|
||||
export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
|
||||
const splitter = new RecursiveCharacterTextSplitter();
|
||||
@ -23,14 +23,14 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
|
||||
const isPdf = res.headers['content-type'] === 'application/pdf';
|
||||
|
||||
if (isPdf) {
|
||||
const pdfText = await pdfParse(res.data)
|
||||
const pdfText = await pdfParse(res.data);
|
||||
const parsedText = pdfText.text
|
||||
.replace(/(\r\n|\n|\r)/gm, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
const splittedText = await splitter.splitText(parsedText);
|
||||
const title = 'PDF Document'
|
||||
const title = 'PDF Document';
|
||||
|
||||
const linkDocs = splittedText.map((text) => {
|
||||
return new Document({
|
||||
@ -52,16 +52,18 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
|
||||
selector: 'a',
|
||||
options: {
|
||||
ignoreHref: true,
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
],
|
||||
})
|
||||
.replace(/(\r\n|\n|\r)/gm, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
const splittedText = await splitter.splitText(parsedText);
|
||||
const title = res.data.toString('utf8').match(/<title>(.*?)<\/title>/)?.[1];
|
||||
const title = res.data
|
||||
.toString('utf8')
|
||||
.match(/<title>(.*?)<\/title>/)?.[1];
|
||||
|
||||
const linkDocs = splittedText.map((text) => {
|
||||
return new Document({
|
||||
|
@ -38,53 +38,56 @@ const useSocket = (
|
||||
'embeddingModelProvider',
|
||||
);
|
||||
|
||||
const providers = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_API_URL}/models`,
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
).then(async (res) => await res.json());
|
||||
|
||||
if (
|
||||
!chatModel ||
|
||||
!chatModelProvider ||
|
||||
!embeddingModel ||
|
||||
!embeddingModelProvider
|
||||
) {
|
||||
const providers = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_API_URL}/models`,
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
).then(async (res) => await res.json());
|
||||
if (!chatModel || !chatModelProvider) {
|
||||
const chatModelProviders = providers.chatModelProviders;
|
||||
|
||||
const chatModelProviders = providers.chatModelProviders;
|
||||
chatModelProvider = Object.keys(chatModelProviders)[0];
|
||||
|
||||
chatModelProvider = Object.keys(chatModelProviders)[0];
|
||||
|
||||
if (chatModelProvider === 'custom_openai') {
|
||||
toast.error(
|
||||
'Seems like you are using the custom OpenAI provider, please open the settings and configure the API key and base URL',
|
||||
);
|
||||
setError(true);
|
||||
return;
|
||||
} else {
|
||||
chatModel = Object.keys(chatModelProviders[chatModelProvider])[0];
|
||||
|
||||
if (
|
||||
!chatModelProviders ||
|
||||
Object.keys(chatModelProviders).length === 0
|
||||
)
|
||||
return toast.error('No chat models available');
|
||||
if (chatModelProvider === 'custom_openai') {
|
||||
toast.error(
|
||||
'Seems like you are using the custom OpenAI provider, please open the settings and configure the API key and base URL',
|
||||
);
|
||||
setError(true);
|
||||
return;
|
||||
} else {
|
||||
chatModel = Object.keys(chatModelProviders[chatModelProvider])[0];
|
||||
if (
|
||||
!chatModelProviders ||
|
||||
Object.keys(chatModelProviders).length === 0
|
||||
)
|
||||
return toast.error('No chat models available');
|
||||
}
|
||||
}
|
||||
|
||||
const embeddingModelProviders = providers.embeddingModelProviders;
|
||||
if (!embeddingModel || !embeddingModelProvider) {
|
||||
const embeddingModelProviders = providers.embeddingModelProviders;
|
||||
|
||||
if (
|
||||
!embeddingModelProviders ||
|
||||
Object.keys(embeddingModelProviders).length === 0
|
||||
)
|
||||
return toast.error('No embedding models available');
|
||||
if (
|
||||
!embeddingModelProviders ||
|
||||
Object.keys(embeddingModelProviders).length === 0
|
||||
)
|
||||
return toast.error('No embedding models available');
|
||||
|
||||
embeddingModelProvider = Object.keys(embeddingModelProviders)[0];
|
||||
embeddingModel = Object.keys(
|
||||
embeddingModelProviders[embeddingModelProvider],
|
||||
)[0];
|
||||
embeddingModelProvider = Object.keys(embeddingModelProviders)[0];
|
||||
embeddingModel = Object.keys(
|
||||
embeddingModelProviders[embeddingModelProvider],
|
||||
)[0];
|
||||
}
|
||||
|
||||
localStorage.setItem('chatModel', chatModel!);
|
||||
localStorage.setItem('chatModelProvider', chatModelProvider);
|
||||
@ -94,15 +97,6 @@ const useSocket = (
|
||||
embeddingModelProvider,
|
||||
);
|
||||
} else {
|
||||
const providers = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_API_URL}/models`,
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'app lication/json',
|
||||
},
|
||||
},
|
||||
).then(async (res) => await res.json());
|
||||
|
||||
const chatModelProviders = providers.chatModelProviders;
|
||||
const embeddingModelProviders = providers.embeddingModelProviders;
|
||||
|
||||
@ -171,8 +165,6 @@ const useSocket = (
|
||||
|
||||
const timeoutId = setTimeout(() => {
|
||||
if (ws.readyState !== 1) {
|
||||
ws.close();
|
||||
setError(true);
|
||||
toast.error(
|
||||
'Failed to connect to the server. Please try again later.',
|
||||
);
|
||||
@ -182,7 +174,6 @@ const useSocket = (
|
||||
ws.onopen = () => {
|
||||
console.log('[DEBUG] open');
|
||||
clearTimeout(timeoutId);
|
||||
setError(false);
|
||||
setIsWSReady(true);
|
||||
};
|
||||
|
||||
@ -210,13 +201,6 @@ const useSocket = (
|
||||
|
||||
connectWs();
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (ws?.readyState === 1) {
|
||||
ws?.close();
|
||||
console.log('[DEBUG] closed');
|
||||
}
|
||||
};
|
||||
}, [ws, url, setIsWSReady, setError]);
|
||||
|
||||
return ws;
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "perplexica-frontend",
|
||||
"version": "1.9.0-rc1",
|
||||
"version": "1.9.0-rc2",
|
||||
"license": "MIT",
|
||||
"author": "ItzCrazyKns",
|
||||
"scripts": {
|
||||
|
Reference in New Issue
Block a user