mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-10-25 16:38:16 +00:00
feat(hf-transformer): use langchain's inbuilt transformer class
This commit is contained in:
@@ -2,7 +2,7 @@ services:
|
|||||||
perplexica:
|
perplexica:
|
||||||
image: itzcrazykns1337/perplexica:latest
|
image: itzcrazykns1337/perplexica:latest
|
||||||
ports:
|
ports:
|
||||||
- "3000:3000"
|
- '3000:3000'
|
||||||
volumes:
|
volumes:
|
||||||
- data:/home/perplexica/data
|
- data:/home/perplexica/data
|
||||||
- uploads:/home/perplexica/uploads
|
- uploads:/home/perplexica/uploads
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ Before making search requests, you'll need to get the available providers and th
|
|||||||
Returns a list of all active providers with their available chat and embedding models.
|
Returns a list of all active providers with their available chat and embedding models.
|
||||||
|
|
||||||
**Response Example:**
|
**Response Example:**
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"providers": [
|
"providers": [
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ const SettingsButtonMobile = () => {
|
|||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<button className="lg:hidden" onClick={() => setIsOpen(true)}>
|
<button className="lg:hidden" onClick={() => setIsOpen(true)}>
|
||||||
<Settings size={18}/>
|
<Settings size={18} />
|
||||||
</button>
|
</button>
|
||||||
<AnimatePresence>
|
<AnimatePresence>
|
||||||
{isOpen && <SettingsDialogue isOpen={isOpen} setIsOpen={setIsOpen} />}
|
{isOpen && <SettingsDialogue isOpen={isOpen} setIsOpen={setIsOpen} />}
|
||||||
|
|||||||
@@ -1,76 +0,0 @@
|
|||||||
import { Embeddings, type EmbeddingsParams } from '@langchain/core/embeddings';
|
|
||||||
import { chunkArray } from '@langchain/core/utils/chunk_array';
|
|
||||||
|
|
||||||
export interface HuggingFaceTransformersEmbeddingsParams
|
|
||||||
extends EmbeddingsParams {
|
|
||||||
modelName: string;
|
|
||||||
|
|
||||||
model: string;
|
|
||||||
|
|
||||||
timeout?: number;
|
|
||||||
|
|
||||||
batchSize?: number;
|
|
||||||
|
|
||||||
stripNewLines?: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
export class HuggingFaceTransformersEmbeddings
|
|
||||||
extends Embeddings
|
|
||||||
implements HuggingFaceTransformersEmbeddingsParams
|
|
||||||
{
|
|
||||||
modelName = 'Xenova/all-MiniLM-L6-v2';
|
|
||||||
|
|
||||||
model = 'Xenova/all-MiniLM-L6-v2';
|
|
||||||
|
|
||||||
batchSize = 512;
|
|
||||||
|
|
||||||
stripNewLines = true;
|
|
||||||
|
|
||||||
timeout?: number;
|
|
||||||
|
|
||||||
constructor(fields?: Partial<HuggingFaceTransformersEmbeddingsParams>) {
|
|
||||||
super(fields ?? {});
|
|
||||||
|
|
||||||
this.modelName = fields?.model ?? fields?.modelName ?? this.model;
|
|
||||||
this.model = this.modelName;
|
|
||||||
this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines;
|
|
||||||
this.timeout = fields?.timeout;
|
|
||||||
}
|
|
||||||
|
|
||||||
async embedDocuments(texts: string[]): Promise<number[][]> {
|
|
||||||
const batches = chunkArray(
|
|
||||||
this.stripNewLines ? texts.map((t) => t.replace(/\n/g, ' ')) : texts,
|
|
||||||
this.batchSize,
|
|
||||||
);
|
|
||||||
|
|
||||||
const batchRequests = batches.map((batch) => this.runEmbedding(batch));
|
|
||||||
const batchResponses = await Promise.all(batchRequests);
|
|
||||||
const embeddings: number[][] = [];
|
|
||||||
|
|
||||||
for (let i = 0; i < batchResponses.length; i += 1) {
|
|
||||||
const batchResponse = batchResponses[i];
|
|
||||||
for (let j = 0; j < batchResponse.length; j += 1) {
|
|
||||||
embeddings.push(batchResponse[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return embeddings;
|
|
||||||
}
|
|
||||||
|
|
||||||
async embedQuery(text: string): Promise<number[]> {
|
|
||||||
const data = await this.runEmbedding([
|
|
||||||
this.stripNewLines ? text.replace(/\n/g, ' ') : text,
|
|
||||||
]);
|
|
||||||
return data[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
private async runEmbedding(texts: string[]) {
|
|
||||||
const { pipeline } = await import('@huggingface/transformers');
|
|
||||||
const pipe = await pipeline('feature-extraction', this.model);
|
|
||||||
|
|
||||||
return this.caller.call(async () => {
|
|
||||||
const output = await pipe(texts, { pooling: 'mean', normalize: true });
|
|
||||||
return output.tolist();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -4,8 +4,7 @@ import BaseModelProvider from './baseProvider';
|
|||||||
import { Embeddings } from '@langchain/core/embeddings';
|
import { Embeddings } from '@langchain/core/embeddings';
|
||||||
import { UIConfigField } from '@/lib/config/types';
|
import { UIConfigField } from '@/lib/config/types';
|
||||||
import { getConfiguredModelProviderById } from '@/lib/config/serverRegistry';
|
import { getConfiguredModelProviderById } from '@/lib/config/serverRegistry';
|
||||||
import { HuggingFaceTransformersEmbeddings } from '@/lib/huggingfaceTransformer';
|
import { HuggingFaceTransformersEmbeddings } from '@langchain/community/embeddings/huggingface_transformers';
|
||||||
|
|
||||||
interface TransformersConfig {}
|
interface TransformersConfig {}
|
||||||
|
|
||||||
const defaultEmbeddingModels: Model[] = [
|
const defaultEmbeddingModels: Model[] = [
|
||||||
|
|||||||
Reference in New Issue
Block a user