mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-06-18 15:58:31 +00:00
Compare commits
3 Commits
feat/deeps
...
562a6e890d
Author | SHA1 | Date | |
---|---|---|---|
562a6e890d | |||
47c7bb688f | |||
dd2f4effca |
@ -24,4 +24,7 @@ MODEL_NAME = ""
|
||||
API_URL = "" # Ollama API URL - http://host.docker.internal:11434
|
||||
|
||||
[API_ENDPOINTS]
|
||||
SEARXNG = "http://localhost:32768" # SearxNG API URL
|
||||
SEARXNG = "http://localhost:32768" # SearxNG API URL
|
||||
|
||||
[API_KEYS]
|
||||
JINA = ""
|
||||
|
@ -35,6 +35,9 @@ interface Config {
|
||||
API_ENDPOINTS: {
|
||||
SEARXNG: string;
|
||||
};
|
||||
API_KEYS: {
|
||||
JINA: string;
|
||||
}
|
||||
}
|
||||
|
||||
type RecursivePartial<T> = {
|
||||
@ -75,6 +78,9 @@ export const getCustomOpenaiApiUrl = () =>
|
||||
export const getCustomOpenaiModelName = () =>
|
||||
loadConfig().MODELS.CUSTOM_OPENAI.MODEL_NAME;
|
||||
|
||||
export const getJinaApiKey = () =>
|
||||
loadConfig().API_KEYS.JINA;
|
||||
|
||||
const mergeConfigs = (current: any, update: any): any => {
|
||||
if (update === null || update === undefined) {
|
||||
return current;
|
||||
|
@ -1,22 +1,30 @@
|
||||
import axios from 'axios';
|
||||
import { htmlToText } from 'html-to-text';
|
||||
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
||||
import { RecursiveCharacterTextSplitter, MarkdownTextSplitter } from 'langchain/text_splitter';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import pdfParse from 'pdf-parse';
|
||||
import logger from './logger';
|
||||
import { getJinaApiKey } from '../config';
|
||||
|
||||
export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
|
||||
const splitter = new RecursiveCharacterTextSplitter();
|
||||
|
||||
links = links.map(link => link.startsWith('http://') || link.startsWith('https://')
|
||||
? link
|
||||
: `https://${link}`)
|
||||
|
||||
if (getJinaApiKey()) {
|
||||
return await getDocumentsFromJinaReader({ links });
|
||||
}
|
||||
|
||||
return await getDocumentsFromLocal({ links });
|
||||
};
|
||||
|
||||
const getDocumentsFromLocal = async ({links}: {links: string[]}) => {
|
||||
const splitter = new RecursiveCharacterTextSplitter();
|
||||
let docs: Document[] = [];
|
||||
|
||||
await Promise.all(
|
||||
links.map(async (link) => {
|
||||
link =
|
||||
link.startsWith('http://') || link.startsWith('https://')
|
||||
? link
|
||||
: `https://${link}`;
|
||||
|
||||
try {
|
||||
const res = await axios.get(link, {
|
||||
responseType: 'arraybuffer',
|
||||
@ -94,6 +102,62 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
return docs;
|
||||
};
|
||||
|
||||
const getDocumentsFromJinaReader = async ({links}: { links: string[] }) => {
|
||||
const splitter = new MarkdownTextSplitter();
|
||||
let docs: Document[] = [];
|
||||
|
||||
await Promise.all(
|
||||
links.map(async link => {
|
||||
try {
|
||||
const res = await axios.get(`https://r.jina.ai/${link}`, {
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${getJinaApiKey()}`,
|
||||
}
|
||||
});
|
||||
|
||||
if(res.data.code === 200) {
|
||||
const data = res.data.data
|
||||
const splittedText = await splitter.splitText(data.content);
|
||||
const linkDocs = splittedText.map((text) => {
|
||||
return new Document({
|
||||
pageContent: text,
|
||||
metadata: {
|
||||
title: data.title,
|
||||
url: link,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
docs.push(...linkDocs);
|
||||
return;
|
||||
} else {
|
||||
docs.push(
|
||||
new Document({
|
||||
pageContent: `Failed to retrieve content from the link in the Jina reader API, code: ${res.data.code}`,
|
||||
metadata: {
|
||||
title: 'Failed to retrieve content',
|
||||
url: link,
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
docs.push(
|
||||
new Document({
|
||||
pageContent: `Failed to retrieve content from the link: ${err.message}`,
|
||||
metadata: {
|
||||
title: 'Failed to retrieve content',
|
||||
url: link,
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
})
|
||||
);
|
||||
return docs;
|
||||
};
|
||||
|
Reference in New Issue
Block a user