mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2025-12-25 21:18:14 +00:00
feat(actions): limit urls & queries to 3
This commit is contained in:
@@ -6,52 +6,54 @@ import TurnDown from 'turndown';
|
||||
const turndownService = new TurnDown();
|
||||
|
||||
const schema = z.object({
|
||||
urls: z.array(z.string()).describe('A list of URLs to scrape content from.'),
|
||||
urls: z.array(z.string()).describe('A list of URLs to scrape content from.'),
|
||||
});
|
||||
|
||||
const scrapeURLAction: ResearchAction<typeof schema> = {
|
||||
name: 'scrape_url',
|
||||
description:
|
||||
'Use after __plan to scrape and extract content from the provided URLs. This is useful when you need detailed information from specific web pages or if the user asks you to summarize or analyze content from certain links.',
|
||||
schema: schema,
|
||||
enabled: (_) => true,
|
||||
execute: async (params, additionalConfig) => {
|
||||
const results: Chunk[] = [];
|
||||
name: 'scrape_url',
|
||||
description:
|
||||
'Use after __plan to scrape and extract content from the provided URLs. This is useful when you need detailed information from specific web pages or if the user asks you to summarize or analyze content from certain links. You can scrape maximum of 3 URLs.',
|
||||
schema: schema,
|
||||
enabled: (_) => true,
|
||||
execute: async (params, additionalConfig) => {
|
||||
params.urls = params.urls.slice(0, 3);
|
||||
|
||||
await Promise.all(
|
||||
params.urls.map(async (url) => {
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
const text = await res.text();
|
||||
const results: Chunk[] = [];
|
||||
|
||||
const title =
|
||||
text.match(/<title>(.*?)<\/title>/i)?.[1] || `Content from ${url}`;
|
||||
const markdown = turndownService.turndown(text);
|
||||
await Promise.all(
|
||||
params.urls.map(async (url) => {
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
const text = await res.text();
|
||||
|
||||
results.push({
|
||||
content: markdown,
|
||||
metadata: {
|
||||
url,
|
||||
title: title,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
results.push({
|
||||
content: `Failed to fetch content from ${url}: ${error}`,
|
||||
metadata: {
|
||||
url,
|
||||
title: `Error fetching ${url}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
}),
|
||||
);
|
||||
const title =
|
||||
text.match(/<title>(.*?)<\/title>/i)?.[1] || `Content from ${url}`;
|
||||
const markdown = turndownService.turndown(text);
|
||||
|
||||
return {
|
||||
type: 'search_results',
|
||||
results,
|
||||
};
|
||||
},
|
||||
results.push({
|
||||
content: markdown,
|
||||
metadata: {
|
||||
url,
|
||||
title: title,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
results.push({
|
||||
content: `Failed to fetch content from ${url}: ${error}`,
|
||||
metadata: {
|
||||
url,
|
||||
title: `Error fetching ${url}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
return {
|
||||
type: 'search_results',
|
||||
results,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
export default scrapeURLAction;
|
||||
|
||||
Reference in New Issue
Block a user