mirror of
				https://github.com/ItzCrazyKns/Perplexica.git
				synced 2025-10-25 08:28:14 +00:00 
			
		
		
		
	feat(hf-transformer): use langchain's inbuilt transformer class
This commit is contained in:
		| @@ -2,7 +2,7 @@ services: | |||||||
|   perplexica: |   perplexica: | ||||||
|     image: itzcrazykns1337/perplexica:latest |     image: itzcrazykns1337/perplexica:latest | ||||||
|     ports: |     ports: | ||||||
|       - "3000:3000" |       - '3000:3000' | ||||||
|     volumes: |     volumes: | ||||||
|       - data:/home/perplexica/data |       - data:/home/perplexica/data | ||||||
|       - uploads:/home/perplexica/uploads |       - uploads:/home/perplexica/uploads | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ Before making search requests, you'll need to get the available providers and th | |||||||
| Returns a list of all active providers with their available chat and embedding models. | Returns a list of all active providers with their available chat and embedding models. | ||||||
|  |  | ||||||
| **Response Example:** | **Response Example:** | ||||||
|  |  | ||||||
| ```json | ```json | ||||||
| { | { | ||||||
|   "providers": [ |   "providers": [ | ||||||
|   | |||||||
| @@ -9,7 +9,7 @@ const SettingsButtonMobile = () => { | |||||||
|   return ( |   return ( | ||||||
|     <> |     <> | ||||||
|       <button className="lg:hidden" onClick={() => setIsOpen(true)}> |       <button className="lg:hidden" onClick={() => setIsOpen(true)}> | ||||||
|         <Settings size={18}/> |         <Settings size={18} /> | ||||||
|       </button> |       </button> | ||||||
|       <AnimatePresence> |       <AnimatePresence> | ||||||
|         {isOpen && <SettingsDialogue isOpen={isOpen} setIsOpen={setIsOpen} />} |         {isOpen && <SettingsDialogue isOpen={isOpen} setIsOpen={setIsOpen} />} | ||||||
|   | |||||||
| @@ -1,76 +0,0 @@ | |||||||
| import { Embeddings, type EmbeddingsParams } from '@langchain/core/embeddings'; |  | ||||||
| import { chunkArray } from '@langchain/core/utils/chunk_array'; |  | ||||||
|  |  | ||||||
| export interface HuggingFaceTransformersEmbeddingsParams |  | ||||||
|   extends EmbeddingsParams { |  | ||||||
|   modelName: string; |  | ||||||
|  |  | ||||||
|   model: string; |  | ||||||
|  |  | ||||||
|   timeout?: number; |  | ||||||
|  |  | ||||||
|   batchSize?: number; |  | ||||||
|  |  | ||||||
|   stripNewLines?: boolean; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export class HuggingFaceTransformersEmbeddings |  | ||||||
|   extends Embeddings |  | ||||||
|   implements HuggingFaceTransformersEmbeddingsParams |  | ||||||
| { |  | ||||||
|   modelName = 'Xenova/all-MiniLM-L6-v2'; |  | ||||||
|  |  | ||||||
|   model = 'Xenova/all-MiniLM-L6-v2'; |  | ||||||
|  |  | ||||||
|   batchSize = 512; |  | ||||||
|  |  | ||||||
|   stripNewLines = true; |  | ||||||
|  |  | ||||||
|   timeout?: number; |  | ||||||
|  |  | ||||||
|   constructor(fields?: Partial<HuggingFaceTransformersEmbeddingsParams>) { |  | ||||||
|     super(fields ?? {}); |  | ||||||
|  |  | ||||||
|     this.modelName = fields?.model ?? fields?.modelName ?? this.model; |  | ||||||
|     this.model = this.modelName; |  | ||||||
|     this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines; |  | ||||||
|     this.timeout = fields?.timeout; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   async embedDocuments(texts: string[]): Promise<number[][]> { |  | ||||||
|     const batches = chunkArray( |  | ||||||
|       this.stripNewLines ? texts.map((t) => t.replace(/\n/g, ' ')) : texts, |  | ||||||
|       this.batchSize, |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     const batchRequests = batches.map((batch) => this.runEmbedding(batch)); |  | ||||||
|     const batchResponses = await Promise.all(batchRequests); |  | ||||||
|     const embeddings: number[][] = []; |  | ||||||
|  |  | ||||||
|     for (let i = 0; i < batchResponses.length; i += 1) { |  | ||||||
|       const batchResponse = batchResponses[i]; |  | ||||||
|       for (let j = 0; j < batchResponse.length; j += 1) { |  | ||||||
|         embeddings.push(batchResponse[j]); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return embeddings; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   async embedQuery(text: string): Promise<number[]> { |  | ||||||
|     const data = await this.runEmbedding([ |  | ||||||
|       this.stripNewLines ? text.replace(/\n/g, ' ') : text, |  | ||||||
|     ]); |  | ||||||
|     return data[0]; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   private async runEmbedding(texts: string[]) { |  | ||||||
|     const { pipeline } = await import('@huggingface/transformers'); |  | ||||||
|     const pipe = await pipeline('feature-extraction', this.model); |  | ||||||
|  |  | ||||||
|     return this.caller.call(async () => { |  | ||||||
|       const output = await pipe(texts, { pooling: 'mean', normalize: true }); |  | ||||||
|       return output.tolist(); |  | ||||||
|     }); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| @@ -4,8 +4,7 @@ import BaseModelProvider from './baseProvider'; | |||||||
| import { Embeddings } from '@langchain/core/embeddings'; | import { Embeddings } from '@langchain/core/embeddings'; | ||||||
| import { UIConfigField } from '@/lib/config/types'; | import { UIConfigField } from '@/lib/config/types'; | ||||||
| import { getConfiguredModelProviderById } from '@/lib/config/serverRegistry'; | import { getConfiguredModelProviderById } from '@/lib/config/serverRegistry'; | ||||||
| import { HuggingFaceTransformersEmbeddings } from '@/lib/huggingfaceTransformer'; | import { HuggingFaceTransformersEmbeddings } from '@langchain/community/embeddings/huggingface_transformers'; | ||||||
|  |  | ||||||
| interface TransformersConfig {} | interface TransformersConfig {} | ||||||
|  |  | ||||||
| const defaultEmbeddingModels: Model[] = [ | const defaultEmbeddingModels: Model[] = [ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user