mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-12-14 15:48:15 +00:00
feat(uploads): update to use new manager
This commit is contained in:
@@ -1,40 +1,16 @@
|
|||||||
import { NextResponse } from 'next/server';
|
import { NextResponse } from 'next/server';
|
||||||
import fs from 'fs';
|
|
||||||
import path from 'path';
|
|
||||||
import crypto from 'crypto';
|
|
||||||
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
|
|
||||||
import { DocxLoader } from '@langchain/community/document_loaders/fs/docx';
|
|
||||||
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
||||||
import { Document } from '@langchain/core/documents';
|
|
||||||
import ModelRegistry from '@/lib/models/registry';
|
import ModelRegistry from '@/lib/models/registry';
|
||||||
import { Chunk } from '@/lib/types';
|
import UploadManager from '@/lib/uploads/manager';
|
||||||
|
|
||||||
interface FileRes {
|
|
||||||
fileName: string;
|
|
||||||
fileExtension: string;
|
|
||||||
fileId: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
const uploadDir = path.join(process.cwd(), 'uploads');
|
|
||||||
|
|
||||||
if (!fs.existsSync(uploadDir)) {
|
|
||||||
fs.mkdirSync(uploadDir, { recursive: true });
|
|
||||||
}
|
|
||||||
|
|
||||||
const splitter = new RecursiveCharacterTextSplitter({
|
|
||||||
chunkSize: 500,
|
|
||||||
chunkOverlap: 100,
|
|
||||||
});
|
|
||||||
|
|
||||||
export async function POST(req: Request) {
|
export async function POST(req: Request) {
|
||||||
try {
|
try {
|
||||||
const formData = await req.formData();
|
const formData = await req.formData();
|
||||||
|
|
||||||
const files = formData.getAll('files') as File[];
|
const files = formData.getAll('files') as File[];
|
||||||
const embedding_model = formData.get('embedding_model_key') as string;
|
const embeddingModel = formData.get('embedding_model_key') as string;
|
||||||
const embedding_model_provider = formData.get('embedding_model_provider_id') as string;
|
const embeddingModelProvider = formData.get('embedding_model_provider_id') as string;
|
||||||
|
|
||||||
if (!embedding_model || !embedding_model_provider) {
|
if (!embeddingModel || !embeddingModelProvider) {
|
||||||
return NextResponse.json(
|
return NextResponse.json(
|
||||||
{ message: 'Missing embedding model or provider' },
|
{ message: 'Missing embedding model or provider' },
|
||||||
{ status: 400 },
|
{ status: 400 },
|
||||||
@@ -43,81 +19,13 @@ export async function POST(req: Request) {
|
|||||||
|
|
||||||
const registry = new ModelRegistry();
|
const registry = new ModelRegistry();
|
||||||
|
|
||||||
const model = await registry.loadEmbeddingModel(embedding_model_provider, embedding_model);
|
const model = await registry.loadEmbeddingModel(embeddingModelProvider, embeddingModel);
|
||||||
|
|
||||||
|
const uploadManager = new UploadManager({
|
||||||
|
embeddingModel: model,
|
||||||
|
})
|
||||||
|
|
||||||
const processedFiles: FileRes[] = [];
|
const processedFiles = await uploadManager.processFiles(files);
|
||||||
|
|
||||||
await Promise.all(
|
|
||||||
files.map(async (file: any) => {
|
|
||||||
const fileExtension = file.name.split('.').pop();
|
|
||||||
if (!['pdf', 'docx', 'txt'].includes(fileExtension!)) {
|
|
||||||
return NextResponse.json(
|
|
||||||
{ message: 'File type not supported' },
|
|
||||||
{ status: 400 },
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const uniqueFileName = `${crypto.randomBytes(16).toString('hex')}.${fileExtension}`;
|
|
||||||
const filePath = path.join(uploadDir, uniqueFileName);
|
|
||||||
|
|
||||||
const buffer = Buffer.from(await file.arrayBuffer());
|
|
||||||
fs.writeFileSync(filePath, new Uint8Array(buffer));
|
|
||||||
|
|
||||||
let docs: any[] = [];
|
|
||||||
if (fileExtension === 'pdf') {
|
|
||||||
const loader = new PDFLoader(filePath);
|
|
||||||
docs = await loader.load();
|
|
||||||
} else if (fileExtension === 'docx') {
|
|
||||||
const loader = new DocxLoader(filePath);
|
|
||||||
docs = await loader.load();
|
|
||||||
} else if (fileExtension === 'txt') {
|
|
||||||
const text = fs.readFileSync(filePath, 'utf-8');
|
|
||||||
docs = [
|
|
||||||
new Document({ pageContent: text, metadata: { title: file.name } }),
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
const splitted = await splitter.splitDocuments(docs);
|
|
||||||
|
|
||||||
const extractedDataPath = filePath.replace(/\.\w+$/, '-extracted.json');
|
|
||||||
fs.writeFileSync(
|
|
||||||
extractedDataPath,
|
|
||||||
JSON.stringify({
|
|
||||||
title: file.name,
|
|
||||||
contents: splitted.map((doc) => doc.pageContent),
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
const chunks: Chunk[] = splitted.map((doc) => {
|
|
||||||
return {
|
|
||||||
content: doc.pageContent,
|
|
||||||
metadata: doc.metadata,
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
const embeddings = await model.embedChunks(
|
|
||||||
chunks
|
|
||||||
);
|
|
||||||
|
|
||||||
const embeddingsDataPath = filePath.replace(
|
|
||||||
/\.\w+$/,
|
|
||||||
'-embeddings.json',
|
|
||||||
);
|
|
||||||
fs.writeFileSync(
|
|
||||||
embeddingsDataPath,
|
|
||||||
JSON.stringify({
|
|
||||||
title: file.name,
|
|
||||||
embeddings,
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
processedFiles.push({
|
|
||||||
fileName: file.name,
|
|
||||||
fileExtension: fileExtension,
|
|
||||||
fileId: uniqueFileName.replace(/\.\w+$/, ''),
|
|
||||||
});
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
files: processedFiles,
|
files: processedFiles,
|
||||||
|
|||||||
Reference in New Issue
Block a user