diff --git a/src/app/api/uploads/route.ts b/src/app/api/uploads/route.ts index dc9c202..9cac0f7 100644 --- a/src/app/api/uploads/route.ts +++ b/src/app/api/uploads/route.ts @@ -1,40 +1,16 @@ import { NextResponse } from 'next/server'; -import fs from 'fs'; -import path from 'path'; -import crypto from 'crypto'; -import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'; -import { DocxLoader } from '@langchain/community/document_loaders/fs/docx'; -import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; -import { Document } from '@langchain/core/documents'; import ModelRegistry from '@/lib/models/registry'; -import { Chunk } from '@/lib/types'; - -interface FileRes { - fileName: string; - fileExtension: string; - fileId: string; -} - -const uploadDir = path.join(process.cwd(), 'uploads'); - -if (!fs.existsSync(uploadDir)) { - fs.mkdirSync(uploadDir, { recursive: true }); -} - -const splitter = new RecursiveCharacterTextSplitter({ - chunkSize: 500, - chunkOverlap: 100, -}); +import UploadManager from '@/lib/uploads/manager'; export async function POST(req: Request) { try { const formData = await req.formData(); const files = formData.getAll('files') as File[]; - const embedding_model = formData.get('embedding_model_key') as string; - const embedding_model_provider = formData.get('embedding_model_provider_id') as string; + const embeddingModel = formData.get('embedding_model_key') as string; + const embeddingModelProvider = formData.get('embedding_model_provider_id') as string; - if (!embedding_model || !embedding_model_provider) { + if (!embeddingModel || !embeddingModelProvider) { return NextResponse.json( { message: 'Missing embedding model or provider' }, { status: 400 }, @@ -43,81 +19,13 @@ export async function POST(req: Request) { const registry = new ModelRegistry(); - const model = await registry.loadEmbeddingModel(embedding_model_provider, embedding_model); + const model = await registry.loadEmbeddingModel(embeddingModelProvider, embeddingModel); + + const uploadManager = new UploadManager({ + embeddingModel: model, + }) - const processedFiles: FileRes[] = []; - - await Promise.all( - files.map(async (file: any) => { - const fileExtension = file.name.split('.').pop(); - if (!['pdf', 'docx', 'txt'].includes(fileExtension!)) { - return NextResponse.json( - { message: 'File type not supported' }, - { status: 400 }, - ); - } - - const uniqueFileName = `${crypto.randomBytes(16).toString('hex')}.${fileExtension}`; - const filePath = path.join(uploadDir, uniqueFileName); - - const buffer = Buffer.from(await file.arrayBuffer()); - fs.writeFileSync(filePath, new Uint8Array(buffer)); - - let docs: any[] = []; - if (fileExtension === 'pdf') { - const loader = new PDFLoader(filePath); - docs = await loader.load(); - } else if (fileExtension === 'docx') { - const loader = new DocxLoader(filePath); - docs = await loader.load(); - } else if (fileExtension === 'txt') { - const text = fs.readFileSync(filePath, 'utf-8'); - docs = [ - new Document({ pageContent: text, metadata: { title: file.name } }), - ]; - } - - const splitted = await splitter.splitDocuments(docs); - - const extractedDataPath = filePath.replace(/\.\w+$/, '-extracted.json'); - fs.writeFileSync( - extractedDataPath, - JSON.stringify({ - title: file.name, - contents: splitted.map((doc) => doc.pageContent), - }), - ); - - const chunks: Chunk[] = splitted.map((doc) => { - return { - content: doc.pageContent, - metadata: doc.metadata, - } - }); - - const embeddings = await model.embedChunks( - chunks - ); - - const embeddingsDataPath = filePath.replace( - /\.\w+$/, - '-embeddings.json', - ); - fs.writeFileSync( - embeddingsDataPath, - JSON.stringify({ - title: file.name, - embeddings, - }), - ); - - processedFiles.push({ - fileName: file.name, - fileExtension: fileExtension, - fileId: uniqueFileName.replace(/\.\w+$/, ''), - }); - }), - ); + const processedFiles = await uploadManager.processFiles(files); return NextResponse.json({ files: processedFiles,