mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-10-24 07:58:14 +00:00
Compare commits
5 Commits
v1.11.1
...
feat/struc
Author | SHA1 | Date | |
---|---|---|---|
|
df33229934 | ||
|
49fafaa096 | ||
|
ca9b32a23b | ||
|
76e3ff4e02 | ||
|
eabf3ca7d3 |
13
package.json
13
package.json
@@ -15,11 +15,12 @@
|
||||
"@headlessui/react": "^2.2.0",
|
||||
"@iarna/toml": "^2.2.5",
|
||||
"@icons-pack/react-simple-icons": "^12.3.0",
|
||||
"@langchain/anthropic": "^0.3.15",
|
||||
"@langchain/community": "^0.3.36",
|
||||
"@langchain/core": "^0.3.42",
|
||||
"@langchain/google-genai": "^0.1.12",
|
||||
"@langchain/openai": "^0.0.25",
|
||||
"@langchain/anthropic": "^0.3.24",
|
||||
"@langchain/community": "^0.3.49",
|
||||
"@langchain/core": "^0.3.66",
|
||||
"@langchain/google-genai": "^0.2.15",
|
||||
"@langchain/ollama": "^0.2.3",
|
||||
"@langchain/openai": "^0.6.2",
|
||||
"@langchain/textsplitters": "^0.1.0",
|
||||
"@tailwindcss/typography": "^0.5.12",
|
||||
"@xenova/transformers": "^2.17.2",
|
||||
@@ -31,7 +32,7 @@
|
||||
"drizzle-orm": "^0.40.1",
|
||||
"html-to-text": "^9.0.5",
|
||||
"jspdf": "^3.0.1",
|
||||
"langchain": "^0.1.30",
|
||||
"langchain": "^0.3.30",
|
||||
"lucide-react": "^0.363.0",
|
||||
"mammoth": "^1.9.1",
|
||||
"markdown-to-jsx": "^7.7.2",
|
||||
|
@@ -223,7 +223,7 @@ export const POST = async (req: Request) => {
|
||||
|
||||
if (body.chatModel?.provider === 'custom_openai') {
|
||||
llm = new ChatOpenAI({
|
||||
openAIApiKey: getCustomOpenaiApiKey(),
|
||||
apiKey: getCustomOpenaiApiKey(),
|
||||
modelName: getCustomOpenaiModelName(),
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
|
@@ -49,7 +49,7 @@ export const POST = async (req: Request) => {
|
||||
|
||||
if (body.chatModel?.provider === 'custom_openai') {
|
||||
llm = new ChatOpenAI({
|
||||
openAIApiKey: getCustomOpenaiApiKey(),
|
||||
apiKey: getCustomOpenaiApiKey(),
|
||||
modelName: getCustomOpenaiModelName(),
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
|
@@ -81,7 +81,7 @@ export const POST = async (req: Request) => {
|
||||
if (body.chatModel?.provider === 'custom_openai') {
|
||||
llm = new ChatOpenAI({
|
||||
modelName: body.chatModel?.name || getCustomOpenaiModelName(),
|
||||
openAIApiKey:
|
||||
apiKey:
|
||||
body.chatModel?.customOpenAIKey || getCustomOpenaiApiKey(),
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
|
@@ -48,7 +48,7 @@ export const POST = async (req: Request) => {
|
||||
|
||||
if (body.chatModel?.provider === 'custom_openai') {
|
||||
llm = new ChatOpenAI({
|
||||
openAIApiKey: getCustomOpenaiApiKey(),
|
||||
apiKey: getCustomOpenaiApiKey(),
|
||||
modelName: getCustomOpenaiModelName(),
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
|
@@ -49,7 +49,7 @@ export const POST = async (req: Request) => {
|
||||
|
||||
if (body.chatModel?.provider === 'custom_openai') {
|
||||
llm = new ChatOpenAI({
|
||||
openAIApiKey: getCustomOpenaiApiKey(),
|
||||
apiKey: getCustomOpenaiApiKey(),
|
||||
modelName: getCustomOpenaiModelName(),
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
|
@@ -1,63 +1,41 @@
|
||||
export const webSearchRetrieverPrompt = `
|
||||
You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it.
|
||||
If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
|
||||
If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
|
||||
You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
|
||||
You are an AI question rephraser. You will be given a conversation and a follow-up question; rephrase it into a standalone question that another LLM can use to search the web.
|
||||
|
||||
There are several examples attached for your reference inside the below \`examples\` XML block
|
||||
Return ONLY a JSON object that matches this schema:
|
||||
query: string // the standalone question (or "summarize")
|
||||
links: string[] // URLs extracted from the user query (empty if none)
|
||||
searchRequired: boolean // true if web search is needed, false for greetings/simple writing tasks
|
||||
searchMode: "" | "normal" | "news" // "" when searchRequired is false; "news" if the user asks for news/articles, otherwise "normal"
|
||||
|
||||
<examples>
|
||||
1. Follow up question: What is the capital of France
|
||||
Rephrased question:\`
|
||||
<question>
|
||||
Capital of france
|
||||
</question>
|
||||
\`
|
||||
Rules
|
||||
- Greetings / simple writing tasks → query:"", links:[], searchRequired:false, searchMode:""
|
||||
- Summarizing a URL → query:"summarize", links:[url...], searchRequired:true, searchMode:"normal"
|
||||
- Asking for news/articles → searchMode:"news"
|
||||
|
||||
Examples
|
||||
1. Follow-up: What is the capital of France?
|
||||
"query":"capital of France","links":[],"searchRequired":true,"searchMode":"normal"
|
||||
|
||||
2. Hi, how are you?
|
||||
Rephrased question\`
|
||||
<question>
|
||||
not_needed
|
||||
</question>
|
||||
\`
|
||||
"query":"","links":[],"searchRequired":false,"searchMode":""
|
||||
|
||||
3. Follow up question: What is Docker?
|
||||
Rephrased question: \`
|
||||
<question>
|
||||
What is Docker
|
||||
</question>
|
||||
\`
|
||||
3. Follow-up: What is Docker?
|
||||
"query":"what is Docker","links":[],"searchRequired":true,"searchMode":"normal"
|
||||
|
||||
4. Follow up question: Can you tell me what is X from https://example.com
|
||||
Rephrased question: \`
|
||||
<question>
|
||||
Can you tell me what is X?
|
||||
</question>
|
||||
4. Follow-up: Can you tell me what is X from https://example.com?
|
||||
"query":"what is X","links":["https://example.com"],"searchRequired":true,"searchMode":"normal"
|
||||
|
||||
<links>
|
||||
https://example.com
|
||||
</links>
|
||||
\`
|
||||
5. Follow-up: Summarize the content from https://example.com
|
||||
"query":"summarize","links":["https://example.com"],"searchRequired":true,"searchMode":"normal"
|
||||
|
||||
5. Follow up question: Summarize the content from https://example.com
|
||||
Rephrased question: \`
|
||||
<question>
|
||||
summarize
|
||||
</question>
|
||||
|
||||
<links>
|
||||
https://example.com
|
||||
</links>
|
||||
\`
|
||||
</examples>
|
||||
|
||||
Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above.
|
||||
6. Follow-up: Latest news about AI
|
||||
"query":"latest news about AI","links":[],"searchRequired":true,"searchMode":"news"
|
||||
|
||||
<conversation>
|
||||
{chat_history}
|
||||
</conversation>
|
||||
|
||||
Follow up question: {query}
|
||||
Follow-up question: {query}
|
||||
Rephrased question:
|
||||
`;
|
||||
|
||||
|
@@ -38,7 +38,7 @@ export const loadAimlApiChatModels = async () => {
|
||||
chatModels[model.id] = {
|
||||
displayName: model.name || model.id,
|
||||
model: new ChatOpenAI({
|
||||
openAIApiKey: apiKey,
|
||||
apiKey: apiKey,
|
||||
modelName: model.id,
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
@@ -76,7 +76,7 @@ export const loadAimlApiEmbeddingModels = async () => {
|
||||
embeddingModels[model.id] = {
|
||||
displayName: model.name || model.id,
|
||||
model: new OpenAIEmbeddings({
|
||||
openAIApiKey: apiKey,
|
||||
apiKey: apiKey,
|
||||
modelName: model.id,
|
||||
configuration: {
|
||||
baseURL: API_URL,
|
||||
|
@@ -31,7 +31,7 @@ export const loadDeepseekChatModels = async () => {
|
||||
chatModels[model.key] = {
|
||||
displayName: model.displayName,
|
||||
model: new ChatOpenAI({
|
||||
openAIApiKey: deepseekApiKey,
|
||||
apiKey: deepseekApiKey,
|
||||
modelName: model.key,
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
|
@@ -29,12 +29,15 @@ export const loadGroqChatModels = async () => {
|
||||
chatModels[model.id] = {
|
||||
displayName: model.id,
|
||||
model: new ChatOpenAI({
|
||||
openAIApiKey: groqApiKey,
|
||||
apiKey: groqApiKey,
|
||||
modelName: model.id,
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
baseURL: 'https://api.groq.com/openai/v1',
|
||||
},
|
||||
metadata: {
|
||||
'model-type': 'groq',
|
||||
},
|
||||
}) as unknown as BaseChatModel,
|
||||
};
|
||||
});
|
||||
|
@@ -118,7 +118,7 @@ export const getAvailableChatModelProviders = async () => {
|
||||
[customOpenAiModelName]: {
|
||||
displayName: customOpenAiModelName,
|
||||
model: new ChatOpenAI({
|
||||
openAIApiKey: customOpenAiApiKey,
|
||||
apiKey: customOpenAiApiKey,
|
||||
modelName: customOpenAiModelName,
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
|
@@ -47,7 +47,7 @@ export const loadLMStudioChatModels = async () => {
|
||||
chatModels[model.id] = {
|
||||
displayName: model.name || model.id,
|
||||
model: new ChatOpenAI({
|
||||
openAIApiKey: 'lm-studio',
|
||||
apiKey: 'lm-studio',
|
||||
configuration: {
|
||||
baseURL: ensureV1Endpoint(endpoint),
|
||||
},
|
||||
@@ -83,7 +83,7 @@ export const loadLMStudioEmbeddingsModels = async () => {
|
||||
embeddingsModels[model.id] = {
|
||||
displayName: model.name || model.id,
|
||||
model: new OpenAIEmbeddings({
|
||||
openAIApiKey: 'lm-studio',
|
||||
apiKey: 'lm-studio',
|
||||
configuration: {
|
||||
baseURL: ensureV1Endpoint(endpoint),
|
||||
},
|
||||
|
@@ -6,8 +6,8 @@ export const PROVIDER_INFO = {
|
||||
key: 'ollama',
|
||||
displayName: 'Ollama',
|
||||
};
|
||||
import { ChatOllama } from '@langchain/community/chat_models/ollama';
|
||||
import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama';
|
||||
import { ChatOllama } from '@langchain/ollama';
|
||||
import { OllamaEmbeddings } from '@langchain/ollama';
|
||||
|
||||
export const loadOllamaChatModels = async () => {
|
||||
const ollamaApiEndpoint = getOllamaApiEndpoint();
|
||||
|
@@ -67,7 +67,7 @@ export const loadOpenAIChatModels = async () => {
|
||||
chatModels[model.key] = {
|
||||
displayName: model.displayName,
|
||||
model: new ChatOpenAI({
|
||||
openAIApiKey: openaiApiKey,
|
||||
apiKey: openaiApiKey,
|
||||
modelName: model.key,
|
||||
temperature: 0.7,
|
||||
}) as unknown as BaseChatModel,
|
||||
@@ -93,7 +93,7 @@ export const loadOpenAIEmbeddingModels = async () => {
|
||||
embeddingModels[model.key] = {
|
||||
displayName: model.displayName,
|
||||
model: new OpenAIEmbeddings({
|
||||
openAIApiKey: openaiApiKey,
|
||||
apiKey: openaiApiKey,
|
||||
modelName: model.key,
|
||||
}) as unknown as Embeddings,
|
||||
};
|
||||
|
@@ -24,6 +24,7 @@ import computeSimilarity from '../utils/computeSimilarity';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import eventEmitter from 'events';
|
||||
import { StreamEvent } from '@langchain/core/tracers/log_stream';
|
||||
import { z } from 'zod';
|
||||
|
||||
export interface MetaSearchAgentType {
|
||||
searchAndAnswer: (
|
||||
@@ -52,6 +53,17 @@ type BasicChainInput = {
|
||||
query: string;
|
||||
};
|
||||
|
||||
const retrieverLLMOutputSchema = z.object({
|
||||
query: z.string().describe('The query to search the web for.'),
|
||||
links: z
|
||||
.array(z.string())
|
||||
.describe('The links to search/summarize if present'),
|
||||
searchRequired: z
|
||||
.boolean()
|
||||
.describe('Wether there is a need to search the web'),
|
||||
searchMode: z.enum(['', 'normal', 'news']).describe('The search mode.'),
|
||||
});
|
||||
|
||||
class MetaSearchAgent implements MetaSearchAgentType {
|
||||
private config: Config;
|
||||
private strParser = new StringOutputParser();
|
||||
@@ -62,73 +74,71 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||
|
||||
private async createSearchRetrieverChain(llm: BaseChatModel) {
|
||||
(llm as unknown as ChatOpenAI).temperature = 0;
|
||||
|
||||
return RunnableSequence.from([
|
||||
PromptTemplate.fromTemplate(this.config.queryGeneratorPrompt),
|
||||
llm,
|
||||
this.strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
const linksOutputParser = new LineListOutputParser({
|
||||
key: 'links',
|
||||
});
|
||||
Object.assign(
|
||||
Object.create(Object.getPrototypeOf(llm)),
|
||||
llm,
|
||||
).withStructuredOutput(retrieverLLMOutputSchema, {
|
||||
...(llm.metadata?.['model-type'] === 'groq'
|
||||
? {
|
||||
method: 'json-object',
|
||||
}
|
||||
: {}),
|
||||
}),
|
||||
RunnableLambda.from(
|
||||
async (input: z.infer<typeof retrieverLLMOutputSchema>) => {
|
||||
let question = input.query;
|
||||
const links = input.links;
|
||||
|
||||
const questionOutputParser = new LineOutputParser({
|
||||
key: 'question',
|
||||
});
|
||||
|
||||
const links = await linksOutputParser.parse(input);
|
||||
let question = this.config.summarizer
|
||||
? await questionOutputParser.parse(input)
|
||||
: input;
|
||||
|
||||
if (question === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
}
|
||||
|
||||
if (links.length > 0) {
|
||||
if (question.length === 0) {
|
||||
question = 'summarize';
|
||||
if (!input.searchRequired) {
|
||||
return { query: '', docs: [] };
|
||||
}
|
||||
|
||||
let docs: Document[] = [];
|
||||
|
||||
const linkDocs = await getDocumentsFromLinks({ links });
|
||||
|
||||
const docGroups: Document[] = [];
|
||||
|
||||
linkDocs.map((doc) => {
|
||||
const URLDocExists = docGroups.find(
|
||||
(d) =>
|
||||
d.metadata.url === doc.metadata.url &&
|
||||
d.metadata.totalDocs < 10,
|
||||
);
|
||||
|
||||
if (!URLDocExists) {
|
||||
docGroups.push({
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
totalDocs: 1,
|
||||
},
|
||||
});
|
||||
if (links.length > 0) {
|
||||
if (question.length === 0) {
|
||||
question = 'summarize';
|
||||
}
|
||||
|
||||
const docIndex = docGroups.findIndex(
|
||||
(d) =>
|
||||
d.metadata.url === doc.metadata.url &&
|
||||
d.metadata.totalDocs < 10,
|
||||
);
|
||||
let docs: Document[] = [];
|
||||
|
||||
if (docIndex !== -1) {
|
||||
docGroups[docIndex].pageContent =
|
||||
docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
|
||||
docGroups[docIndex].metadata.totalDocs += 1;
|
||||
}
|
||||
});
|
||||
const linkDocs = await getDocumentsFromLinks({ links });
|
||||
|
||||
await Promise.all(
|
||||
docGroups.map(async (doc) => {
|
||||
const res = await llm.invoke(`
|
||||
const docGroups: Document[] = [];
|
||||
|
||||
linkDocs.map((doc) => {
|
||||
const URLDocExists = docGroups.find(
|
||||
(d) =>
|
||||
d.metadata.url === doc.metadata.url &&
|
||||
d.metadata.totalDocs < 10,
|
||||
);
|
||||
|
||||
if (!URLDocExists) {
|
||||
docGroups.push({
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
totalDocs: 1,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const docIndex = docGroups.findIndex(
|
||||
(d) =>
|
||||
d.metadata.url === doc.metadata.url &&
|
||||
d.metadata.totalDocs < 10,
|
||||
);
|
||||
|
||||
if (docIndex !== -1) {
|
||||
docGroups[docIndex].pageContent =
|
||||
docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
|
||||
docGroups[docIndex].metadata.totalDocs += 1;
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.all(
|
||||
docGroups.map(async (doc) => {
|
||||
const res = await llm.invoke(`
|
||||
You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the
|
||||
text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query.
|
||||
If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary.
|
||||
@@ -189,46 +199,50 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||
Make sure to answer the query in the summary.
|
||||
`);
|
||||
|
||||
const document = new Document({
|
||||
pageContent: res.content as string,
|
||||
metadata: {
|
||||
title: doc.metadata.title,
|
||||
url: doc.metadata.url,
|
||||
},
|
||||
});
|
||||
const document = new Document({
|
||||
pageContent: res.content as string,
|
||||
metadata: {
|
||||
title: doc.metadata.title,
|
||||
url: doc.metadata.url,
|
||||
},
|
||||
});
|
||||
|
||||
docs.push(document);
|
||||
}),
|
||||
);
|
||||
|
||||
return { query: question, docs: docs };
|
||||
} else {
|
||||
question = question.replace(/<think>.*?<\/think>/g, '');
|
||||
|
||||
const res = await searchSearxng(question, {
|
||||
language: 'en',
|
||||
engines: this.config.activeEngines,
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent:
|
||||
result.content ||
|
||||
(this.config.activeEngines.includes('youtube')
|
||||
? result.title
|
||||
: '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
docs.push(document);
|
||||
}),
|
||||
);
|
||||
);
|
||||
|
||||
return { query: question, docs: documents };
|
||||
}
|
||||
}),
|
||||
return { query: question, docs: docs };
|
||||
} else {
|
||||
question = question.replace(/<think>.*?<\/think>/g, '');
|
||||
|
||||
const res = await searchSearxng(question, {
|
||||
language: 'en',
|
||||
engines:
|
||||
input.searchMode === 'normal'
|
||||
? this.config.activeEngines
|
||||
: ['bing news'],
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent:
|
||||
result.content ||
|
||||
(this.config.activeEngines.includes('youtube')
|
||||
? result.title
|
||||
: '') /* Todo: Implement transcript grabbing using Youtubei (source: https://www.npmjs.com/package/youtubei) */,
|
||||
metadata: {
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
...(result.img_src && { img_src: result.img_src }),
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
return { query: question, docs: documents };
|
||||
}
|
||||
},
|
||||
),
|
||||
]);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user