mirror of
				https://github.com/ItzCrazyKns/Perplexica.git
				synced 2025-10-30 19:08:15 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			135 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
			
		
		
	
	
			135 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
| import { NextResponse } from 'next/server';
 | |
| import fs from 'fs';
 | |
| import path from 'path';
 | |
| import crypto from 'crypto';
 | |
| import { getAvailableEmbeddingModelProviders } from '@/lib/providers';
 | |
| import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
 | |
| import { DocxLoader } from '@langchain/community/document_loaders/fs/docx';
 | |
| import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
 | |
| import { Document } from 'langchain/document';
 | |
| 
 | |
| interface FileRes {
 | |
|   fileName: string;
 | |
|   fileExtension: string;
 | |
|   fileId: string;
 | |
| }
 | |
| 
 | |
| const uploadDir = path.join(process.cwd(), 'uploads');
 | |
| 
 | |
| if (!fs.existsSync(uploadDir)) {
 | |
|   fs.mkdirSync(uploadDir, { recursive: true });
 | |
| }
 | |
| 
 | |
| const splitter = new RecursiveCharacterTextSplitter({
 | |
|   chunkSize: 500,
 | |
|   chunkOverlap: 100,
 | |
| });
 | |
| 
 | |
| export async function POST(req: Request) {
 | |
|   try {
 | |
|     const formData = await req.formData();
 | |
| 
 | |
|     const files = formData.getAll('files') as File[];
 | |
|     const embedding_model = formData.get('embedding_model');
 | |
|     const embedding_model_provider = formData.get('embedding_model_provider');
 | |
| 
 | |
|     if (!embedding_model || !embedding_model_provider) {
 | |
|       return NextResponse.json(
 | |
|         { message: 'Missing embedding model or provider' },
 | |
|         { status: 400 },
 | |
|       );
 | |
|     }
 | |
| 
 | |
|     const embeddingModels = await getAvailableEmbeddingModelProviders();
 | |
|     const provider =
 | |
|       embedding_model_provider ?? Object.keys(embeddingModels)[0];
 | |
|     const embeddingModel =
 | |
|       embedding_model ?? Object.keys(embeddingModels[provider as string])[0];
 | |
| 
 | |
|     let embeddingsModel =
 | |
|       embeddingModels[provider as string]?.[embeddingModel as string]?.model;
 | |
|     if (!embeddingsModel) {
 | |
|       return NextResponse.json(
 | |
|         { message: 'Invalid embedding model selected' },
 | |
|         { status: 400 },
 | |
|       );
 | |
|     }
 | |
| 
 | |
|     const processedFiles: FileRes[] = [];
 | |
| 
 | |
|     await Promise.all(
 | |
|       files.map(async (file: any) => {
 | |
|         const fileExtension = file.name.split('.').pop();
 | |
|         if (!['pdf', 'docx', 'txt'].includes(fileExtension!)) {
 | |
|           return NextResponse.json(
 | |
|             { message: 'File type not supported' },
 | |
|             { status: 400 },
 | |
|           );
 | |
|         }
 | |
| 
 | |
|         const uniqueFileName = `${crypto.randomBytes(16).toString('hex')}.${fileExtension}`;
 | |
|         const filePath = path.join(uploadDir, uniqueFileName);
 | |
| 
 | |
|         const buffer = Buffer.from(await file.arrayBuffer());
 | |
|         fs.writeFileSync(filePath, new Uint8Array(buffer));
 | |
| 
 | |
|         let docs: any[] = [];
 | |
|         if (fileExtension === 'pdf') {
 | |
|           const loader = new PDFLoader(filePath);
 | |
|           docs = await loader.load();
 | |
|         } else if (fileExtension === 'docx') {
 | |
|           const loader = new DocxLoader(filePath);
 | |
|           docs = await loader.load();
 | |
|         } else if (fileExtension === 'txt') {
 | |
|           const text = fs.readFileSync(filePath, 'utf-8');
 | |
|           docs = [
 | |
|             new Document({ pageContent: text, metadata: { title: file.name } }),
 | |
|           ];
 | |
|         }
 | |
| 
 | |
|         const splitted = await splitter.splitDocuments(docs);
 | |
| 
 | |
|         const extractedDataPath = filePath.replace(/\.\w+$/, '-extracted.json');
 | |
|         fs.writeFileSync(
 | |
|           extractedDataPath,
 | |
|           JSON.stringify({
 | |
|             title: file.name,
 | |
|             contents: splitted.map((doc) => doc.pageContent),
 | |
|           }),
 | |
|         );
 | |
| 
 | |
|         const embeddings = await embeddingsModel.embedDocuments(
 | |
|           splitted.map((doc) => doc.pageContent),
 | |
|         );
 | |
|         const embeddingsDataPath = filePath.replace(
 | |
|           /\.\w+$/,
 | |
|           '-embeddings.json',
 | |
|         );
 | |
|         fs.writeFileSync(
 | |
|           embeddingsDataPath,
 | |
|           JSON.stringify({
 | |
|             title: file.name,
 | |
|             embeddings,
 | |
|           }),
 | |
|         );
 | |
| 
 | |
|         processedFiles.push({
 | |
|           fileName: file.name,
 | |
|           fileExtension: fileExtension,
 | |
|           fileId: uniqueFileName.replace(/\.\w+$/, ''),
 | |
|         });
 | |
|       }),
 | |
|     );
 | |
| 
 | |
|     return NextResponse.json({
 | |
|       files: processedFiles,
 | |
|     });
 | |
|   } catch (error) {
 | |
|     console.error('Error uploading file:', error);
 | |
|     return NextResponse.json(
 | |
|       { message: 'An error has occurred.' },
 | |
|       { status: 500 },
 | |
|     );
 | |
|   }
 | |
| }
 |