feat(ui): add necessary utils

This commit is contained in:
ItzCrazyKns
2025-03-18 10:24:16 +05:30
parent 5c313e9bed
commit e29a08dc46
5 changed files with 150 additions and 0 deletions

5
ui/lib/types/compute-dot.d.ts vendored Normal file
View File

@ -0,0 +1,5 @@
declare function computeDot(vectorA: number[], vectorB: number[]): number;
declare module "compute-dot" {
export default computeDot;
}

View File

@ -0,0 +1,17 @@
import dot from 'compute-dot';
import cosineSimilarity from 'compute-cosine-similarity';
import { getSimilarityMeasure } from '../config';
const computeSimilarity = (x: number[], y: number[]): number => {
const similarityMeasure = getSimilarityMeasure();
if (similarityMeasure === 'cosine') {
return cosineSimilarity(x, y) as number;
} else if (similarityMeasure === 'dot') {
return dot(x, y);
}
throw new Error('Invalid similarity measure');
};
export default computeSimilarity;

97
ui/lib/utils/documents.ts Normal file
View File

@ -0,0 +1,97 @@
import axios from 'axios';
import { htmlToText } from 'html-to-text';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { Document } from '@langchain/core/documents';
import pdfParse from 'pdf-parse';
import logger from './logger';
export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
const splitter = new RecursiveCharacterTextSplitter();
let docs: Document[] = [];
await Promise.all(
links.map(async (link) => {
link =
link.startsWith('http://') || link.startsWith('https://')
? link
: `https://${link}`;
try {
const res = await axios.get(link, {
responseType: 'arraybuffer',
});
const isPdf = res.headers['content-type'] === 'application/pdf';
if (isPdf) {
const pdfText = await pdfParse(res.data);
const parsedText = pdfText.text
.replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s+/g, ' ')
.trim();
const splittedText = await splitter.splitText(parsedText);
const title = 'PDF Document';
const linkDocs = splittedText.map((text) => {
return new Document({
pageContent: text,
metadata: {
title: title,
url: link,
},
});
});
docs.push(...linkDocs);
return;
}
const parsedText = htmlToText(res.data.toString('utf8'), {
selectors: [
{
selector: 'a',
options: {
ignoreHref: true,
},
},
],
})
.replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s+/g, ' ')
.trim();
const splittedText = await splitter.splitText(parsedText);
const title = res.data
.toString('utf8')
.match(/<title>(.*?)<\/title>/)?.[1];
const linkDocs = splittedText.map((text) => {
return new Document({
pageContent: text,
metadata: {
title: title || link,
url: link,
},
});
});
docs.push(...linkDocs);
} catch (err) {
console.error("An error occurred while getting documents from links: ", err);
docs.push(
new Document({
pageContent: `Failed to retrieve content from the link: ${err}`,
metadata: {
title: 'Failed to retrieve content',
url: link,
},
}),
);
}
}),
);
return docs;
};

View File

@ -0,0 +1,9 @@
import { BaseMessage } from '@langchain/core/messages';
const formatChatHistoryAsString = (history: BaseMessage[]) => {
return history
.map((message) => `${message._getType()}: ${message.content}`)
.join('\n');
};
export default formatChatHistoryAsString;

22
ui/lib/utils/logger.ts Normal file
View File

@ -0,0 +1,22 @@
import winston from 'winston';
const logger = winston.createLogger({
level: 'info',
transports: [
new winston.transports.Console({
format: winston.format.combine(
winston.format.colorize(),
winston.format.simple(),
),
}),
new winston.transports.File({
filename: 'app.log',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.json(),
),
}),
],
});
export default logger;