Remove unused output parsers and document utility

2026-02-23 13:36:45 +00:00 · 2025-11-23 19:21:16 +05:30
parent d6c364fdcb
commit 6d35d60b49
3 changed files with 0 additions and 197 deletions
--- a/src/lib/outputParsers/lineOutputParser.ts
+++ b/src/lib/outputParsers/lineOutputParser.ts
@@ -1,48 +0,0 @@
-import { BaseOutputParser } from '@langchain/core/output_parsers';
-
-interface LineOutputParserArgs {
-  key?: string;
-}
-
-class LineOutputParser extends BaseOutputParser<string | undefined> {
-  private key = 'questions';
-
-  constructor(args?: LineOutputParserArgs) {
-    super();
-    this.key = args?.key ?? this.key;
-  }
-
-  static lc_name() {
-    return 'LineOutputParser';
-  }
-
-  lc_namespace = ['langchain', 'output_parsers', 'line_output_parser'];
-
-  async parse(text: string): Promise<string | undefined> {
-    text = text.trim() || '';
-
-    const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
-    const startKeyIndex = text.indexOf(`<${this.key}>`);
-    const endKeyIndex = text.indexOf(`</${this.key}>`);
-
-    if (startKeyIndex === -1 || endKeyIndex === -1) {
-      return undefined;
-    }
-
-    const questionsStartIndex =
-      startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length;
-    const questionsEndIndex = endKeyIndex === -1 ? text.length : endKeyIndex;
-    const line = text
-      .slice(questionsStartIndex, questionsEndIndex)
-      .trim()
-      .replace(regex, '');
-
-    return line;
-  }
-
-  getFormatInstructions(): string {
-    throw new Error('Not implemented.');
-  }
-}
-
-export default LineOutputParser;
--- a/src/lib/outputParsers/listLineOutputParser.ts
+++ b/src/lib/outputParsers/listLineOutputParser.ts
@@ -1,50 +0,0 @@
-import { BaseOutputParser } from '@langchain/core/output_parsers';
-
-interface LineListOutputParserArgs {
-  key?: string;
-}
-
-class LineListOutputParser extends BaseOutputParser<string[]> {
-  private key = 'questions';
-
-  constructor(args?: LineListOutputParserArgs) {
-    super();
-    this.key = args?.key ?? this.key;
-  }
-
-  static lc_name() {
-    return 'LineListOutputParser';
-  }
-
-  lc_namespace = ['langchain', 'output_parsers', 'line_list_output_parser'];
-
-  async parse(text: string): Promise<string[]> {
-    text = text.trim() || '';
-
-    const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
-    const startKeyIndex = text.indexOf(`<${this.key}>`);
-    const endKeyIndex = text.indexOf(`</${this.key}>`);
-
-    if (startKeyIndex === -1 || endKeyIndex === -1) {
-      return [];
-    }
-
-    const questionsStartIndex =
-      startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length;
-    const questionsEndIndex = endKeyIndex === -1 ? text.length : endKeyIndex;
-    const lines = text
-      .slice(questionsStartIndex, questionsEndIndex)
-      .trim()
-      .split('\n')
-      .filter((line) => line.trim() !== '')
-      .map((line) => line.replace(regex, ''));
-
-    return lines;
-  }
-
-  getFormatInstructions(): string {
-    throw new Error('Not implemented.');
-  }
-}
-
-export default LineListOutputParser;
--- a/src/lib/utils/documents.ts
+++ b/src/lib/utils/documents.ts
@@ -1,99 +0,0 @@
-import axios from 'axios';
-import { htmlToText } from 'html-to-text';
-import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
-import { Document } from '@langchain/core/documents';
-import pdfParse from 'pdf-parse';
-
-export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
-  const splitter = new RecursiveCharacterTextSplitter();
-
-  let docs: Document[] = [];
-
-  await Promise.all(
-    links.map(async (link) => {
-      link =
-        link.startsWith('http://') || link.startsWith('https://')
-          ? link
-          : `https://${link}`;
-
-      try {
-        const res = await axios.get(link, {
-          responseType: 'arraybuffer',
-        });
-
-        const isPdf = res.headers['content-type'] === 'application/pdf';
-
-        if (isPdf) {
-          const pdfText = await pdfParse(res.data);
-          const parsedText = pdfText.text
-            .replace(/(\r\n|\n|\r)/gm, ' ')
-            .replace(/\s+/g, ' ')
-            .trim();
-
-          const splittedText = await splitter.splitText(parsedText);
-          const title = 'PDF Document';
-
-          const linkDocs = splittedText.map((text) => {
-            return new Document({
-              pageContent: text,
-              metadata: {
-                title: title,
-                url: link,
-              },
-            });
-          });
-
-          docs.push(...linkDocs);
-          return;
-        }
-
-        const parsedText = htmlToText(res.data.toString('utf8'), {
-          selectors: [
-            {
-              selector: 'a',
-              options: {
-                ignoreHref: true,
-              },
-            },
-          ],
-        })
-          .replace(/(\r\n|\n|\r)/gm, ' ')
-          .replace(/\s+/g, ' ')
-          .trim();
-
-        const splittedText = await splitter.splitText(parsedText);
-        const title = res.data
-          .toString('utf8')
-          .match(/<title.*>(.*?)<\/title>/)?.[1];
-
-        const linkDocs = splittedText.map((text) => {
-          return new Document({
-            pageContent: text,
-            metadata: {
-              title: title || link,
-              url: link,
-            },
-          });
-        });
-
-        docs.push(...linkDocs);
-      } catch (err) {
-        console.error(
-          'An error occurred while getting documents from links: ',
-          err,
-        );
-        docs.push(
-          new Document({
-            pageContent: `Failed to retrieve content from the link: ${err}`,
-            metadata: {
-              title: 'Failed to retrieve content',
-              url: link,
-            },
-          }),
-        );
-      }
-    }),
-  );
-
-  return docs;
-};