diff --git a/ui/lib/types/compute-dot.d.ts b/ui/lib/types/compute-dot.d.ts new file mode 100644 index 0000000..6bcd481 --- /dev/null +++ b/ui/lib/types/compute-dot.d.ts @@ -0,0 +1,5 @@ +declare function computeDot(vectorA: number[], vectorB: number[]): number; + +declare module "compute-dot" { + export default computeDot; +} diff --git a/ui/lib/utils/computeSimilarity.ts b/ui/lib/utils/computeSimilarity.ts new file mode 100644 index 0000000..a635577 --- /dev/null +++ b/ui/lib/utils/computeSimilarity.ts @@ -0,0 +1,17 @@ +import dot from 'compute-dot'; +import cosineSimilarity from 'compute-cosine-similarity'; +import { getSimilarityMeasure } from '../config'; + +const computeSimilarity = (x: number[], y: number[]): number => { + const similarityMeasure = getSimilarityMeasure(); + + if (similarityMeasure === 'cosine') { + return cosineSimilarity(x, y) as number; + } else if (similarityMeasure === 'dot') { + return dot(x, y); + } + + throw new Error('Invalid similarity measure'); +}; + +export default computeSimilarity; diff --git a/ui/lib/utils/documents.ts b/ui/lib/utils/documents.ts new file mode 100644 index 0000000..07b8bef --- /dev/null +++ b/ui/lib/utils/documents.ts @@ -0,0 +1,97 @@ +import axios from 'axios'; +import { htmlToText } from 'html-to-text'; +import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; +import { Document } from '@langchain/core/documents'; +import pdfParse from 'pdf-parse'; +import logger from './logger'; + +export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => { + const splitter = new RecursiveCharacterTextSplitter(); + + let docs: Document[] = []; + + await Promise.all( + links.map(async (link) => { + link = + link.startsWith('http://') || link.startsWith('https://') + ? link + : `https://${link}`; + + try { + const res = await axios.get(link, { + responseType: 'arraybuffer', + }); + + const isPdf = res.headers['content-type'] === 'application/pdf'; + + if (isPdf) { + const pdfText = await pdfParse(res.data); + const parsedText = pdfText.text + .replace(/(\r\n|\n|\r)/gm, ' ') + .replace(/\s+/g, ' ') + .trim(); + + const splittedText = await splitter.splitText(parsedText); + const title = 'PDF Document'; + + const linkDocs = splittedText.map((text) => { + return new Document({ + pageContent: text, + metadata: { + title: title, + url: link, + }, + }); + }); + + docs.push(...linkDocs); + return; + } + + const parsedText = htmlToText(res.data.toString('utf8'), { + selectors: [ + { + selector: 'a', + options: { + ignoreHref: true, + }, + }, + ], + }) + .replace(/(\r\n|\n|\r)/gm, ' ') + .replace(/\s+/g, ' ') + .trim(); + + const splittedText = await splitter.splitText(parsedText); + const title = res.data + .toString('utf8') + .match(/(.*?)<\/title>/)?.[1]; + + const linkDocs = splittedText.map((text) => { + return new Document({ + pageContent: text, + metadata: { + title: title || link, + url: link, + }, + }); + }); + + docs.push(...linkDocs); + } catch (err) { + console.error("An error occurred while getting documents from links: ", err); + docs.push( + new Document({ + pageContent: `Failed to retrieve content from the link: ${err}`, + metadata: { + title: 'Failed to retrieve content', + url: link, + }, + }), + ); + } + }), + ); + + return docs; +}; diff --git a/ui/lib/utils/formatHistory.ts b/ui/lib/utils/formatHistory.ts new file mode 100644 index 0000000..6d0d309 --- /dev/null +++ b/ui/lib/utils/formatHistory.ts @@ -0,0 +1,9 @@ +import { BaseMessage } from '@langchain/core/messages'; + +const formatChatHistoryAsString = (history: BaseMessage[]) => { + return history + .map((message) => `${message._getType()}: ${message.content}`) + .join('\n'); +}; + +export default formatChatHistoryAsString; diff --git a/ui/lib/utils/logger.ts b/ui/lib/utils/logger.ts new file mode 100644 index 0000000..1c81eb9 --- /dev/null +++ b/ui/lib/utils/logger.ts @@ -0,0 +1,22 @@ +import winston from 'winston'; + +const logger = winston.createLogger({ + level: 'info', + transports: [ + new winston.transports.Console({ + format: winston.format.combine( + winston.format.colorize(), + winston.format.simple(), + ), + }), + new winston.transports.File({ + filename: 'app.log', + format: winston.format.combine( + winston.format.timestamp(), + winston.format.json(), + ), + }), + ], +}); + +export default logger;