Compare commits

10 Commits

Author SHA1 Message Date
ItzCrazyKns
2873093fee feat(package): bump version 2024-08-28 10:00:05 +05:30
ItzCrazyKns
806c47e705 feat(chatwindow): fix infinite loading 2024-08-28 09:53:06 +05:30
ItzCrazyKns
ff34d1043f feat(app): lint & format 2024-08-25 15:08:47 +05:30
ItzCrazyKns
c521b032a7 feat(agents): fix unresloved types 2024-08-25 15:08:30 +05:30
ItzCrazyKns
6b8f7dc32c Merge branch 'pr/309' 2024-08-25 12:03:54 +05:30
ItzCrazyKns
8bb3e4f016 feat(agents): update types 2024-08-25 12:03:32 +05:30
ItzCrazyKns
51939ff842 feat(webSearchAgent): fix typo, closes #313 2024-08-24 21:48:27 +05:30
Xie Yanbo
e4faa82362 Fix #307, update outdated searxng/settings.yml 2024-08-09 20:53:53 +08:00
ItzCrazyKns
9c1936ec2c feat(chat-window): lint & beautify 2024-08-04 18:14:46 +05:30
ItzCrazyKns
c4932c659a feat(app): lint 2024-07-31 20:17:57 +05:30
12 changed files with 97 additions and 2435 deletions

View File

@ -1,6 +1,6 @@
{
"name": "perplexica-backend",
"version": "1.9.0-rc1",
"version": "1.9.0-rc2",
"license": "MIT",
"author": "ItzCrazyKns",
"scripts": {

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const basicAcademicSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -66,7 +67,7 @@ const basicAcademicSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, any, unknown>,
stream: IterableReadableStream<StreamEvent>,
emitter: eventEmitter,
) => {
for await (const event of stream) {

View File

@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const basicRedditSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -66,7 +67,7 @@ const basicRedditSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, any, unknown>,
stream: IterableReadableStream<StreamEvent>,
emitter: eventEmitter,
) => {
for await (const event of stream) {

View File

@ -22,12 +22,13 @@ import logger from '../utils/logger';
import LineListOutputParser from '../lib/outputParsers/listLineOutputParser';
import { getDocumentsFromLinks } from '../lib/linkDocument';
import LineOutputParser from '../lib/outputParsers/lineOutputParser';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const basicSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
If the question contains some links and asks to answer from those links or even if they don't you need to return the links inside 'links' XML block and the question inside 'question' XML block. If there are no links then you need to return the question without any XML block.
If the user asks to summarrize the content from some links you need to return \`Summarize\` as the question inside the 'question' XML block and the links inside the 'links' XML block.
If the user asks to summarize the content from some links you need to return \`Summarize\` as the question inside the 'question' XML block and the links inside the 'links' XML block.
Example:
1. Follow up question: What is the capital of France?
@ -95,7 +96,7 @@ const basicWebSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, any, unknown>,
stream: IterableReadableStream<StreamEvent>,
emitter: eventEmitter,
) => {
for await (const event of stream) {
@ -157,34 +158,42 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
question = 'Summarize';
}
let docs = []
let docs = [];
const linkDocs = await getDocumentsFromLinks({ links });
const docGroups: Document[] = [];
linkDocs.map((doc) => {
const URLDocExists = docGroups.find((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10);
const URLDocExists = docGroups.find(
(d) =>
d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
);
if (!URLDocExists) {
docGroups.push({
...doc,
metadata: {
...doc.metadata,
totalDocs: 1
}
totalDocs: 1,
},
});
}
const docIndex = docGroups.findIndex((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10);
const docIndex = docGroups.findIndex(
(d) =>
d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
);
if (docIndex !== -1) {
docGroups[docIndex].pageContent = docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
docGroups[docIndex].pageContent =
docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
docGroups[docIndex].metadata.totalDocs += 1;
}
})
});
await Promise.all(docGroups.map(async (doc) => {
await Promise.all(
docGroups.map(async (doc) => {
const res = await llm.invoke(`
You are a text summarizer. You need to summarize the text provided inside the \`text\` XML block.
You need to summarize the text into 1 or 2 sentences capturing the main idea of the text.
@ -210,10 +219,11 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
title: doc.metadata.title,
url: doc.metadata.url,
},
})
});
docs.push(document)
}))
docs.push(document);
}),
);
return { query: question, docs: docs };
} else {

View File

@ -18,6 +18,7 @@ import type { Embeddings } from '@langchain/core/embeddings';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const basicWolframAlphaSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -65,7 +66,7 @@ const basicWolframAlphaSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, any, unknown>,
stream: IterableReadableStream<StreamEvent>,
emitter: eventEmitter,
) => {
for await (const event of stream) {

View File

@ -10,6 +10,7 @@ import eventEmitter from 'events';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const writingAssistantPrompt = `
You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query.
@ -19,7 +20,7 @@ Since you are a writing assistant, you would not perform web searches. If you th
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, any, unknown>,
stream: IterableReadableStream<StreamEvent>,
emitter: eventEmitter,
) => {
for await (const event of stream) {

View File

@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const basicYoutubeSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -66,7 +67,7 @@ const basicYoutubeSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, any, unknown>,
stream: IterableReadableStream<StreamEvent>,
emitter: eventEmitter,
) => {
for await (const event of stream) {

View File

@ -30,9 +30,9 @@ server.listen(port, () => {
startWebSocketServer(server);
process.on('uncaughtException', (err, origin) => {
logger.error(`Uncaught Exception at ${origin}: ${err}`)
})
logger.error(`Uncaught Exception at ${origin}: ${err}`);
});
process.on('unhandledRejection', (reason, promise) => {
logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`)
})
logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`);
});

View File

@ -1,8 +1,8 @@
import axios from 'axios';
import { htmlToText } from 'html-to-text'
import { htmlToText } from 'html-to-text';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { Document } from '@langchain/core/documents';
import pdfParse from 'pdf-parse'
import pdfParse from 'pdf-parse';
export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
const splitter = new RecursiveCharacterTextSplitter();
@ -23,14 +23,14 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
const isPdf = res.headers['content-type'] === 'application/pdf';
if (isPdf) {
const pdfText = await pdfParse(res.data)
const pdfText = await pdfParse(res.data);
const parsedText = pdfText.text
.replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s+/g, ' ')
.trim();
const splittedText = await splitter.splitText(parsedText);
const title = 'PDF Document'
const title = 'PDF Document';
const linkDocs = splittedText.map((text) => {
return new Document({
@ -52,16 +52,18 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
selector: 'a',
options: {
ignoreHref: true,
}
},
]
},
],
})
.replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s+/g, ' ')
.trim();
const splittedText = await splitter.splitText(parsedText);
const title = res.data.toString('utf8').match(/<title>(.*?)<\/title>/)?.[1];
const title = res.data
.toString('utf8')
.match(/<title>(.*?)<\/title>/)?.[1];
const linkDocs = splittedText.map((text) => {
return new Document({

View File

@ -38,12 +38,6 @@ const useSocket = (
'embeddingModelProvider',
);
if (
!chatModel ||
!chatModelProvider ||
!embeddingModel ||
!embeddingModelProvider
) {
const providers = await fetch(
`${process.env.NEXT_PUBLIC_API_URL}/models`,
{
@ -53,6 +47,13 @@ const useSocket = (
},
).then(async (res) => await res.json());
if (
!chatModel ||
!chatModelProvider ||
!embeddingModel ||
!embeddingModelProvider
) {
if (!chatModel || !chatModelProvider) {
const chatModelProviders = providers.chatModelProviders;
chatModelProvider = Object.keys(chatModelProviders)[0];
@ -65,14 +66,15 @@ const useSocket = (
return;
} else {
chatModel = Object.keys(chatModelProviders[chatModelProvider])[0];
if (
!chatModelProviders ||
Object.keys(chatModelProviders).length === 0
)
return toast.error('No chat models available');
}
}
if (!embeddingModel || !embeddingModelProvider) {
const embeddingModelProviders = providers.embeddingModelProviders;
if (
@ -85,6 +87,7 @@ const useSocket = (
embeddingModel = Object.keys(
embeddingModelProviders[embeddingModelProvider],
)[0];
}
localStorage.setItem('chatModel', chatModel!);
localStorage.setItem('chatModelProvider', chatModelProvider);
@ -94,15 +97,6 @@ const useSocket = (
embeddingModelProvider,
);
} else {
const providers = await fetch(
`${process.env.NEXT_PUBLIC_API_URL}/models`,
{
headers: {
'Content-Type': 'app lication/json',
},
},
).then(async (res) => await res.json());
const chatModelProviders = providers.chatModelProviders;
const embeddingModelProviders = providers.embeddingModelProviders;
@ -171,8 +165,6 @@ const useSocket = (
const timeoutId = setTimeout(() => {
if (ws.readyState !== 1) {
ws.close();
setError(true);
toast.error(
'Failed to connect to the server. Please try again later.',
);
@ -182,7 +174,6 @@ const useSocket = (
ws.onopen = () => {
console.log('[DEBUG] open');
clearTimeout(timeoutId);
setError(false);
setIsWSReady(true);
};
@ -210,13 +201,6 @@ const useSocket = (
connectWs();
}
return () => {
if (ws?.readyState === 1) {
ws?.close();
console.log('[DEBUG] closed');
}
};
}, [ws, url, setIsWSReady, setError]);
return ws;

View File

@ -1,6 +1,6 @@
{
"name": "perplexica-frontend",
"version": "1.9.0-rc1",
"version": "1.9.0-rc2",
"license": "MIT",
"author": "ItzCrazyKns",
"scripts": {