Remove unused output parsers and document utility

2026-03-14 00:26:45 +00:00 · 2025-11-23 19:21:16 +05:30
parent d6c364fdcb
commit 6d35d60b49
3 changed files with 0 additions and 197 deletions
--- a/src/lib/outputParsers/lineOutputParser.ts
+++ b/src/lib/outputParsers/lineOutputParser.ts
@@ -1,48 +0,0 @@
 import { BaseOutputParser } from '@langchain/core/output_parsers';
 interface LineOutputParserArgs {
  key?: string;
 }
 class LineOutputParser extends BaseOutputParser<string | undefined> {
  private key = 'questions';
  constructor(args?: LineOutputParserArgs) {
    super();
    this.key = args?.key ?? this.key;
  }
  static lc_name() {
    return 'LineOutputParser';
  }
  lc_namespace = ['langchain', 'output_parsers', 'line_output_parser'];
  async parse(text: string): Promise<string | undefined> {
    text = text.trim() || '';
    const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
    const startKeyIndex = text.indexOf(`<${this.key}>`);
    const endKeyIndex = text.indexOf(`</${this.key}>`);
    if (startKeyIndex === -1 || endKeyIndex === -1) {
      return undefined;
    }
    const questionsStartIndex =
      startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length;
    const questionsEndIndex = endKeyIndex === -1 ? text.length : endKeyIndex;
    const line = text
      .slice(questionsStartIndex, questionsEndIndex)
      .trim()
      .replace(regex, '');
    return line;
  }
  getFormatInstructions(): string {
    throw new Error('Not implemented.');
  }
 }
 export default LineOutputParser;
--- a/src/lib/outputParsers/listLineOutputParser.ts
+++ b/src/lib/outputParsers/listLineOutputParser.ts
@@ -1,50 +0,0 @@
 import { BaseOutputParser } from '@langchain/core/output_parsers';
 interface LineListOutputParserArgs {
  key?: string;
 }
 class LineListOutputParser extends BaseOutputParser<string[]> {
  private key = 'questions';
  constructor(args?: LineListOutputParserArgs) {
    super();
    this.key = args?.key ?? this.key;
  }
  static lc_name() {
    return 'LineListOutputParser';
  }
  lc_namespace = ['langchain', 'output_parsers', 'line_list_output_parser'];
  async parse(text: string): Promise<string[]> {
    text = text.trim() || '';
    const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
    const startKeyIndex = text.indexOf(`<${this.key}>`);
    const endKeyIndex = text.indexOf(`</${this.key}>`);
    if (startKeyIndex === -1 || endKeyIndex === -1) {
      return [];
    }
    const questionsStartIndex =
      startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length;
    const questionsEndIndex = endKeyIndex === -1 ? text.length : endKeyIndex;
    const lines = text
      .slice(questionsStartIndex, questionsEndIndex)
      .trim()
      .split('\n')
      .filter((line) => line.trim() !== '')
      .map((line) => line.replace(regex, ''));
    return lines;
  }
  getFormatInstructions(): string {
    throw new Error('Not implemented.');
  }
 }
 export default LineListOutputParser;
--- a/src/lib/utils/documents.ts
+++ b/src/lib/utils/documents.ts
@@ -1,99 +0,0 @@
 import axios from 'axios';
 import { htmlToText } from 'html-to-text';
 import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
 import { Document } from '@langchain/core/documents';
 import pdfParse from 'pdf-parse';
 export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
  const splitter = new RecursiveCharacterTextSplitter();
  let docs: Document[] = [];
  await Promise.all(
    links.map(async (link) => {
      link =
        link.startsWith('http://') || link.startsWith('https://')
          ? link
          : `https://${link}`;
      try {
        const res = await axios.get(link, {
          responseType: 'arraybuffer',
        });
        const isPdf = res.headers['content-type'] === 'application/pdf';
        if (isPdf) {
          const pdfText = await pdfParse(res.data);
          const parsedText = pdfText.text
            .replace(/(\r\n|\n|\r)/gm, ' ')
            .replace(/\s+/g, ' ')
            .trim();
          const splittedText = await splitter.splitText(parsedText);
          const title = 'PDF Document';
          const linkDocs = splittedText.map((text) => {
            return new Document({
              pageContent: text,
              metadata: {
                title: title,
                url: link,
              },
            });
          });
          docs.push(...linkDocs);
          return;
        }
        const parsedText = htmlToText(res.data.toString('utf8'), {
          selectors: [
            {
              selector: 'a',
              options: {
                ignoreHref: true,
              },
            },
          ],
        })
          .replace(/(\r\n|\n|\r)/gm, ' ')
          .replace(/\s+/g, ' ')
          .trim();
        const splittedText = await splitter.splitText(parsedText);
        const title = res.data
          .toString('utf8')
          .match(/<title.*>(.*?)<\/title>/)?.[1];
        const linkDocs = splittedText.map((text) => {
          return new Document({
            pageContent: text,
            metadata: {
              title: title || link,
              url: link,
            },
          });
        });
        docs.push(...linkDocs);
      } catch (err) {
        console.error(
          'An error occurred while getting documents from links: ',
          err,
        );
        docs.push(
          new Document({
            pageContent: `Failed to retrieve content from the link: ${err}`,
            metadata: {
              title: 'Failed to retrieve content',
              url: link,
            },
          }),
        );
      }
    }),
  );
  return docs;
 };