mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-06-18 15:58:31 +00:00
feat(ui): add necessary utils
This commit is contained in:
5
ui/lib/types/compute-dot.d.ts
vendored
Normal file
5
ui/lib/types/compute-dot.d.ts
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
declare function computeDot(vectorA: number[], vectorB: number[]): number;
|
||||
|
||||
declare module "compute-dot" {
|
||||
export default computeDot;
|
||||
}
|
17
ui/lib/utils/computeSimilarity.ts
Normal file
17
ui/lib/utils/computeSimilarity.ts
Normal file
@ -0,0 +1,17 @@
|
||||
import dot from 'compute-dot';
|
||||
import cosineSimilarity from 'compute-cosine-similarity';
|
||||
import { getSimilarityMeasure } from '../config';
|
||||
|
||||
const computeSimilarity = (x: number[], y: number[]): number => {
|
||||
const similarityMeasure = getSimilarityMeasure();
|
||||
|
||||
if (similarityMeasure === 'cosine') {
|
||||
return cosineSimilarity(x, y) as number;
|
||||
} else if (similarityMeasure === 'dot') {
|
||||
return dot(x, y);
|
||||
}
|
||||
|
||||
throw new Error('Invalid similarity measure');
|
||||
};
|
||||
|
||||
export default computeSimilarity;
|
97
ui/lib/utils/documents.ts
Normal file
97
ui/lib/utils/documents.ts
Normal file
@ -0,0 +1,97 @@
|
||||
import axios from 'axios';
|
||||
import { htmlToText } from 'html-to-text';
|
||||
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import pdfParse from 'pdf-parse';
|
||||
import logger from './logger';
|
||||
|
||||
export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
|
||||
const splitter = new RecursiveCharacterTextSplitter();
|
||||
|
||||
let docs: Document[] = [];
|
||||
|
||||
await Promise.all(
|
||||
links.map(async (link) => {
|
||||
link =
|
||||
link.startsWith('http://') || link.startsWith('https://')
|
||||
? link
|
||||
: `https://${link}`;
|
||||
|
||||
try {
|
||||
const res = await axios.get(link, {
|
||||
responseType: 'arraybuffer',
|
||||
});
|
||||
|
||||
const isPdf = res.headers['content-type'] === 'application/pdf';
|
||||
|
||||
if (isPdf) {
|
||||
const pdfText = await pdfParse(res.data);
|
||||
const parsedText = pdfText.text
|
||||
.replace(/(\r\n|\n|\r)/gm, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
const splittedText = await splitter.splitText(parsedText);
|
||||
const title = 'PDF Document';
|
||||
|
||||
const linkDocs = splittedText.map((text) => {
|
||||
return new Document({
|
||||
pageContent: text,
|
||||
metadata: {
|
||||
title: title,
|
||||
url: link,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
docs.push(...linkDocs);
|
||||
return;
|
||||
}
|
||||
|
||||
const parsedText = htmlToText(res.data.toString('utf8'), {
|
||||
selectors: [
|
||||
{
|
||||
selector: 'a',
|
||||
options: {
|
||||
ignoreHref: true,
|
||||
},
|
||||
},
|
||||
],
|
||||
})
|
||||
.replace(/(\r\n|\n|\r)/gm, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
const splittedText = await splitter.splitText(parsedText);
|
||||
const title = res.data
|
||||
.toString('utf8')
|
||||
.match(/<title>(.*?)<\/title>/)?.[1];
|
||||
|
||||
const linkDocs = splittedText.map((text) => {
|
||||
return new Document({
|
||||
pageContent: text,
|
||||
metadata: {
|
||||
title: title || link,
|
||||
url: link,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
docs.push(...linkDocs);
|
||||
} catch (err) {
|
||||
console.error("An error occurred while getting documents from links: ", err);
|
||||
docs.push(
|
||||
new Document({
|
||||
pageContent: `Failed to retrieve content from the link: ${err}`,
|
||||
metadata: {
|
||||
title: 'Failed to retrieve content',
|
||||
url: link,
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
return docs;
|
||||
};
|
9
ui/lib/utils/formatHistory.ts
Normal file
9
ui/lib/utils/formatHistory.ts
Normal file
@ -0,0 +1,9 @@
|
||||
import { BaseMessage } from '@langchain/core/messages';
|
||||
|
||||
const formatChatHistoryAsString = (history: BaseMessage[]) => {
|
||||
return history
|
||||
.map((message) => `${message._getType()}: ${message.content}`)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
export default formatChatHistoryAsString;
|
22
ui/lib/utils/logger.ts
Normal file
22
ui/lib/utils/logger.ts
Normal file
@ -0,0 +1,22 @@
|
||||
import winston from 'winston';
|
||||
|
||||
const logger = winston.createLogger({
|
||||
level: 'info',
|
||||
transports: [
|
||||
new winston.transports.Console({
|
||||
format: winston.format.combine(
|
||||
winston.format.colorize(),
|
||||
winston.format.simple(),
|
||||
),
|
||||
}),
|
||||
new winston.transports.File({
|
||||
filename: 'app.log',
|
||||
format: winston.format.combine(
|
||||
winston.format.timestamp(),
|
||||
winston.format.json(),
|
||||
),
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
export default logger;
|
Reference in New Issue
Block a user