feat(uploads): update to use new manager

This commit is contained in:
ItzCrazyKns
2025-12-13 22:20:26 +05:30
parent 3949748bbd
commit 40b25a487b

View File

@@ -1,40 +1,16 @@
import { NextResponse } from 'next/server';
import fs from 'fs';
import path from 'path';
import crypto from 'crypto';
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
import { DocxLoader } from '@langchain/community/document_loaders/fs/docx';
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
import { Document } from '@langchain/core/documents';
import ModelRegistry from '@/lib/models/registry';
import { Chunk } from '@/lib/types';
interface FileRes {
fileName: string;
fileExtension: string;
fileId: string;
}
const uploadDir = path.join(process.cwd(), 'uploads');
if (!fs.existsSync(uploadDir)) {
fs.mkdirSync(uploadDir, { recursive: true });
}
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: 500,
chunkOverlap: 100,
});
import UploadManager from '@/lib/uploads/manager';
export async function POST(req: Request) {
try {
const formData = await req.formData();
const files = formData.getAll('files') as File[];
const embedding_model = formData.get('embedding_model_key') as string;
const embedding_model_provider = formData.get('embedding_model_provider_id') as string;
const embeddingModel = formData.get('embedding_model_key') as string;
const embeddingModelProvider = formData.get('embedding_model_provider_id') as string;
if (!embedding_model || !embedding_model_provider) {
if (!embeddingModel || !embeddingModelProvider) {
return NextResponse.json(
{ message: 'Missing embedding model or provider' },
{ status: 400 },
@@ -43,81 +19,13 @@ export async function POST(req: Request) {
const registry = new ModelRegistry();
const model = await registry.loadEmbeddingModel(embedding_model_provider, embedding_model);
const model = await registry.loadEmbeddingModel(embeddingModelProvider, embeddingModel);
const uploadManager = new UploadManager({
embeddingModel: model,
})
const processedFiles: FileRes[] = [];
await Promise.all(
files.map(async (file: any) => {
const fileExtension = file.name.split('.').pop();
if (!['pdf', 'docx', 'txt'].includes(fileExtension!)) {
return NextResponse.json(
{ message: 'File type not supported' },
{ status: 400 },
);
}
const uniqueFileName = `${crypto.randomBytes(16).toString('hex')}.${fileExtension}`;
const filePath = path.join(uploadDir, uniqueFileName);
const buffer = Buffer.from(await file.arrayBuffer());
fs.writeFileSync(filePath, new Uint8Array(buffer));
let docs: any[] = [];
if (fileExtension === 'pdf') {
const loader = new PDFLoader(filePath);
docs = await loader.load();
} else if (fileExtension === 'docx') {
const loader = new DocxLoader(filePath);
docs = await loader.load();
} else if (fileExtension === 'txt') {
const text = fs.readFileSync(filePath, 'utf-8');
docs = [
new Document({ pageContent: text, metadata: { title: file.name } }),
];
}
const splitted = await splitter.splitDocuments(docs);
const extractedDataPath = filePath.replace(/\.\w+$/, '-extracted.json');
fs.writeFileSync(
extractedDataPath,
JSON.stringify({
title: file.name,
contents: splitted.map((doc) => doc.pageContent),
}),
);
const chunks: Chunk[] = splitted.map((doc) => {
return {
content: doc.pageContent,
metadata: doc.metadata,
}
});
const embeddings = await model.embedChunks(
chunks
);
const embeddingsDataPath = filePath.replace(
/\.\w+$/,
'-embeddings.json',
);
fs.writeFileSync(
embeddingsDataPath,
JSON.stringify({
title: file.name,
embeddings,
}),
);
processedFiles.push({
fileName: file.name,
fileExtension: fileExtension,
fileId: uniqueFileName.replace(/\.\w+$/, ''),
});
}),
);
const processedFiles = await uploadManager.processFiles(files);
return NextResponse.json({
files: processedFiles,