mirror of
https://github.com/ItzCrazyKns/Perplexica.git
synced 2026-04-09 21:34:27 +00:00
Compare commits
37 Commits
86274326e9
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e8d883768 | ||
|
|
71790f346e | ||
|
|
40a7cdeb4b | ||
|
|
ebcf5384f3 | ||
|
|
a889fdc31e | ||
|
|
c3ee1988d2 | ||
|
|
5991416142 | ||
|
|
7a6fad95ef | ||
|
|
b868aa0287 | ||
|
|
65b2c3f234 | ||
|
|
cbe538cc36 | ||
|
|
f6a47fd3e1 | ||
|
|
690b11cc1c | ||
|
|
96e860a310 | ||
|
|
65475b418f | ||
|
|
acaa208a41 | ||
|
|
54f3a7d2b8 | ||
|
|
1a124b8b07 | ||
|
|
3098622cb0 | ||
|
|
3646495bdf | ||
|
|
476c4ec8c2 | ||
|
|
0e33641927 | ||
|
|
8c061f20a5 | ||
|
|
72ac815294 | ||
|
|
d16b7e271a | ||
|
|
58ed869b3d | ||
|
|
3fede054da | ||
|
|
21bd88787e | ||
|
|
b02f5aa37f | ||
|
|
f83f813bd7 | ||
|
|
0c101d9704 | ||
|
|
7ab23d6339 | ||
|
|
c42d2177b7 | ||
|
|
39c0f198b5 | ||
|
|
80d4f23765 | ||
|
|
a2f2ac532e | ||
|
|
1763ee9d1f |
Binary file not shown.
|
Before Width: | Height: | Size: 2.1 MiB |
BIN
.assets/vane-screenshot.png
Normal file
BIN
.assets/vane-screenshot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.6 MiB |
80
.github/workflows/docker-build.yaml
vendored
80
.github/workflows/docker-build.yaml
vendored
@@ -44,11 +44,11 @@ jobs:
|
||||
DOCKERFILE=${{ matrix.variant.dockerfile }}
|
||||
VARIANT=${{ matrix.variant.name }}
|
||||
docker buildx build --platform linux/amd64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/perplexica:${VARIANT}-amd64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/vane:${VARIANT}-amd64 \
|
||||
--cache-to=type=inline \
|
||||
--provenance false \
|
||||
-f $DOCKERFILE \
|
||||
-t itzcrazykns1337/perplexica:${VARIANT}-amd64 \
|
||||
-t itzcrazykns1337/vane:${VARIANT}-amd64 \
|
||||
--push .
|
||||
|
||||
- name: Build and push AMD64 Canary Docker image
|
||||
@@ -57,11 +57,11 @@ jobs:
|
||||
DOCKERFILE=${{ matrix.variant.dockerfile }}
|
||||
VARIANT=${{ matrix.variant.name }}
|
||||
docker buildx build --platform linux/amd64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/perplexica:${VARIANT}-canary-amd64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/vane:${VARIANT}-canary-amd64 \
|
||||
--cache-to=type=inline \
|
||||
--provenance false \
|
||||
-f $DOCKERFILE \
|
||||
-t itzcrazykns1337/perplexica:${VARIANT}-canary-amd64 \
|
||||
-t itzcrazykns1337/vane:${VARIANT}-canary-amd64 \
|
||||
--push .
|
||||
|
||||
- name: Build and push AMD64 release Docker image
|
||||
@@ -70,11 +70,11 @@ jobs:
|
||||
DOCKERFILE=${{ matrix.variant.dockerfile }}
|
||||
VARIANT=${{ matrix.variant.name }}
|
||||
docker buildx build --platform linux/amd64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }}-amd64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }}-amd64 \
|
||||
--cache-to=type=inline \
|
||||
--provenance false \
|
||||
-f $DOCKERFILE \
|
||||
-t itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }}-amd64 \
|
||||
-t itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }}-amd64 \
|
||||
--push .
|
||||
|
||||
build-arm64:
|
||||
@@ -112,11 +112,11 @@ jobs:
|
||||
DOCKERFILE=${{ matrix.variant.dockerfile }}
|
||||
VARIANT=${{ matrix.variant.name }}
|
||||
docker buildx build --platform linux/arm64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/perplexica:${VARIANT}-arm64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/vane:${VARIANT}-arm64 \
|
||||
--cache-to=type=inline \
|
||||
--provenance false \
|
||||
-f $DOCKERFILE \
|
||||
-t itzcrazykns1337/perplexica:${VARIANT}-arm64 \
|
||||
-t itzcrazykns1337/vane:${VARIANT}-arm64 \
|
||||
--push .
|
||||
|
||||
- name: Build and push ARM64 Canary Docker image
|
||||
@@ -125,11 +125,11 @@ jobs:
|
||||
DOCKERFILE=${{ matrix.variant.dockerfile }}
|
||||
VARIANT=${{ matrix.variant.name }}
|
||||
docker buildx build --platform linux/arm64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/perplexica:${VARIANT}-canary-arm64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/vane:${VARIANT}-canary-arm64 \
|
||||
--cache-to=type=inline \
|
||||
--provenance false \
|
||||
-f $DOCKERFILE \
|
||||
-t itzcrazykns1337/perplexica:${VARIANT}-canary-arm64 \
|
||||
-t itzcrazykns1337/vane:${VARIANT}-canary-arm64 \
|
||||
--push .
|
||||
|
||||
- name: Build and push ARM64 release Docker image
|
||||
@@ -138,11 +138,11 @@ jobs:
|
||||
DOCKERFILE=${{ matrix.variant.dockerfile }}
|
||||
VARIANT=${{ matrix.variant.name }}
|
||||
docker buildx build --platform linux/arm64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }}-arm64 \
|
||||
--cache-from=type=registry,ref=itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }}-arm64 \
|
||||
--cache-to=type=inline \
|
||||
--provenance false \
|
||||
-f $DOCKERFILE \
|
||||
-t itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }}-arm64 \
|
||||
-t itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }}-arm64 \
|
||||
--push .
|
||||
|
||||
manifest:
|
||||
@@ -167,51 +167,51 @@ jobs:
|
||||
if: github.ref == 'refs/heads/master' && github.event_name == 'push'
|
||||
run: |
|
||||
VARIANT=${{ matrix.variant }}
|
||||
docker manifest create itzcrazykns1337/perplexica:${VARIANT}-latest \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-amd64 \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-arm64
|
||||
docker manifest push itzcrazykns1337/perplexica:${VARIANT}-latest
|
||||
docker manifest create itzcrazykns1337/vane:${VARIANT}-latest \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-amd64 \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-arm64
|
||||
docker manifest push itzcrazykns1337/vane:${VARIANT}-latest
|
||||
|
||||
if [ "$VARIANT" = "full" ]; then
|
||||
docker manifest create itzcrazykns1337/perplexica:latest \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-amd64 \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-arm64
|
||||
docker manifest push itzcrazykns1337/perplexica:latest
|
||||
docker manifest create itzcrazykns1337/vane:latest \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-amd64 \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-arm64
|
||||
docker manifest push itzcrazykns1337/vane:latest
|
||||
|
||||
docker manifest create itzcrazykns1337/perplexica:main \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-amd64 \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-arm64
|
||||
docker manifest push itzcrazykns1337/perplexica:main
|
||||
docker manifest create itzcrazykns1337/vane:main \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-amd64 \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-arm64
|
||||
docker manifest push itzcrazykns1337/vane:main
|
||||
fi
|
||||
|
||||
- name: Create and push manifest for canary
|
||||
if: github.ref == 'refs/heads/canary' && github.event_name == 'push'
|
||||
run: |
|
||||
VARIANT=${{ matrix.variant }}
|
||||
docker manifest create itzcrazykns1337/perplexica:${VARIANT}-canary \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-canary-amd64 \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-canary-arm64
|
||||
docker manifest push itzcrazykns1337/perplexica:${VARIANT}-canary
|
||||
docker manifest create itzcrazykns1337/vane:${VARIANT}-canary \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-canary-amd64 \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-canary-arm64
|
||||
docker manifest push itzcrazykns1337/vane:${VARIANT}-canary
|
||||
|
||||
if [ "$VARIANT" = "full" ]; then
|
||||
docker manifest create itzcrazykns1337/perplexica:canary \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-canary-amd64 \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-canary-arm64
|
||||
docker manifest push itzcrazykns1337/perplexica:canary
|
||||
docker manifest create itzcrazykns1337/vane:canary \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-canary-amd64 \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-canary-arm64
|
||||
docker manifest push itzcrazykns1337/vane:canary
|
||||
fi
|
||||
|
||||
- name: Create and push manifest for releases
|
||||
if: github.event_name == 'release'
|
||||
run: |
|
||||
VARIANT=${{ matrix.variant }}
|
||||
docker manifest create itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }} \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }}-amd64 \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }}-arm64
|
||||
docker manifest push itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }}
|
||||
docker manifest create itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }} \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }}-amd64 \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }}-arm64
|
||||
docker manifest push itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }}
|
||||
|
||||
if [ "$VARIANT" = "full" ]; then
|
||||
docker manifest create itzcrazykns1337/perplexica:${{ env.RELEASE_VERSION }} \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }}-amd64 \
|
||||
--amend itzcrazykns1337/perplexica:${VARIANT}-${{ env.RELEASE_VERSION }}-arm64
|
||||
docker manifest push itzcrazykns1337/perplexica:${{ env.RELEASE_VERSION }}
|
||||
docker manifest create itzcrazykns1337/vane:${{ env.RELEASE_VERSION }} \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }}-amd64 \
|
||||
--amend itzcrazykns1337/vane:${VARIANT}-${{ env.RELEASE_VERSION }}-arm64
|
||||
docker manifest push itzcrazykns1337/vane:${{ env.RELEASE_VERSION }}
|
||||
fi
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
# How to Contribute to Perplexica
|
||||
# How to Contribute to Vane
|
||||
|
||||
Thanks for your interest in contributing to Perplexica! Your help makes this project better. This guide explains how to contribute effectively.
|
||||
Thanks for your interest in contributing to Vane! Your help makes this project better. This guide explains how to contribute effectively.
|
||||
|
||||
Perplexica is a modern AI chat application with advanced search capabilities.
|
||||
Vane is a modern AI chat application with advanced search capabilities.
|
||||
|
||||
## Project Structure
|
||||
|
||||
Perplexica's codebase is organized as follows:
|
||||
Vane's codebase is organized as follows:
|
||||
|
||||
- **UI Components and Pages**:
|
||||
- **Components (`src/components`)**: Reusable UI components.
|
||||
@@ -53,7 +53,7 @@ If you are not sure where to start, use this section as a map.
|
||||
|
||||
## API Documentation
|
||||
|
||||
Perplexica includes API documentation for programmatic access.
|
||||
Vane includes API documentation for programmatic access.
|
||||
|
||||
- **Search API**: For detailed documentation, see `docs/API/SEARCH.md`.
|
||||
|
||||
@@ -79,4 +79,4 @@ Before committing changes:
|
||||
2. Always run `npm run format:write` to format your code according to the project's coding standards. This helps maintain consistency and code quality.
|
||||
3. We currently do not have a code of conduct, but it is in the works. In the meantime, please be mindful of how you engage with the project and its community.
|
||||
|
||||
Following these steps will help maintain the integrity of Perplexica's codebase and facilitate a smoother integration of your valuable contributions. Thank you for your support and commitment to improving Perplexica.
|
||||
Following these steps will help maintain the integrity of Vane's codebase and facilitate a smoother integration of your valuable contributions. Thank you for your support and commitment to improving Vane.
|
||||
|
||||
25
Dockerfile
25
Dockerfile
@@ -2,7 +2,7 @@ FROM node:24.5.0-slim AS builder
|
||||
|
||||
RUN apt-get update && apt-get install -y python3 python3-pip sqlite3 && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /home/perplexica
|
||||
WORKDIR /home/vane
|
||||
|
||||
COPY package.json yarn.lock ./
|
||||
RUN yarn install --frozen-lockfile --network-timeout 600000
|
||||
@@ -12,7 +12,7 @@ COPY src ./src
|
||||
COPY public ./public
|
||||
COPY drizzle ./drizzle
|
||||
|
||||
RUN mkdir -p /home/perplexica/data
|
||||
RUN mkdir -p /home/vane/data
|
||||
RUN yarn build
|
||||
|
||||
FROM node:24.5.0-slim
|
||||
@@ -24,15 +24,18 @@ RUN apt-get update && apt-get install -y \
|
||||
curl sudo \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /home/perplexica
|
||||
WORKDIR /home/vane
|
||||
|
||||
COPY --from=builder /home/perplexica/public ./public
|
||||
COPY --from=builder /home/perplexica/.next/static ./public/_next/static
|
||||
COPY --from=builder /home/perplexica/.next/standalone ./
|
||||
COPY --from=builder /home/perplexica/data ./data
|
||||
COPY --from=builder /home/vane/public ./public
|
||||
COPY --from=builder /home/vane/.next/static ./public/_next/static
|
||||
COPY --from=builder /home/vane/.next/standalone ./
|
||||
COPY --from=builder /home/vane/data ./data
|
||||
COPY drizzle ./drizzle
|
||||
|
||||
RUN mkdir /home/perplexica/uploads
|
||||
RUN mkdir /home/vane/uploads
|
||||
|
||||
RUN yarn add playwright
|
||||
RUN yarn playwright install --with-deps --only-shell chromium
|
||||
|
||||
RUN useradd --shell /bin/bash --system \
|
||||
--home-dir "/usr/local/searxng" \
|
||||
@@ -54,13 +57,13 @@ RUN git clone "https://github.com/searxng/searxng" \
|
||||
"/usr/local/searxng/searxng-src"
|
||||
|
||||
RUN python3 -m venv "/usr/local/searxng/searx-pyenv"
|
||||
RUN "/usr/local/searxng/searx-pyenv/bin/pip" install --upgrade pip setuptools wheel pyyaml msgspec
|
||||
RUN "/usr/local/searxng/searx-pyenv/bin/pip" install --upgrade pip setuptools wheel pyyaml msgspec typing_extensions
|
||||
RUN cd "/usr/local/searxng/searxng-src" && \
|
||||
"/usr/local/searxng/searx-pyenv/bin/pip" install --use-pep517 --no-build-isolation -e .
|
||||
|
||||
USER root
|
||||
|
||||
WORKDIR /home/perplexica
|
||||
WORKDIR /home/vane
|
||||
COPY entrypoint.sh ./entrypoint.sh
|
||||
RUN chmod +x ./entrypoint.sh
|
||||
RUN sed -i 's/\r$//' ./entrypoint.sh || true
|
||||
@@ -71,4 +74,4 @@ EXPOSE 3000 8080
|
||||
|
||||
ENV SEARXNG_API_URL=http://localhost:8080
|
||||
|
||||
CMD ["/home/perplexica/entrypoint.sh"]
|
||||
CMD ["/home/vane/entrypoint.sh"]
|
||||
|
||||
@@ -2,7 +2,7 @@ FROM node:24.5.0-slim AS builder
|
||||
|
||||
RUN apt-get update && apt-get install -y python3 python3-pip sqlite3 && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /home/perplexica
|
||||
WORKDIR /home/vane
|
||||
|
||||
COPY package.json yarn.lock ./
|
||||
RUN yarn install --frozen-lockfile --network-timeout 600000
|
||||
@@ -12,23 +12,23 @@ COPY src ./src
|
||||
COPY public ./public
|
||||
COPY drizzle ./drizzle
|
||||
|
||||
RUN mkdir -p /home/perplexica/data
|
||||
RUN mkdir -p /home/vane/data
|
||||
RUN yarn build
|
||||
|
||||
FROM node:24.5.0-slim
|
||||
|
||||
RUN apt-get update && apt-get install -y python3 python3-pip sqlite3 && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /home/perplexica
|
||||
WORKDIR /home/vane
|
||||
|
||||
COPY --from=builder /home/perplexica/public ./public
|
||||
COPY --from=builder /home/perplexica/.next/static ./public/_next/static
|
||||
COPY --from=builder /home/vane/public ./public
|
||||
COPY --from=builder /home/vane/.next/static ./public/_next/static
|
||||
|
||||
COPY --from=builder /home/perplexica/.next/standalone ./
|
||||
COPY --from=builder /home/perplexica/data ./data
|
||||
COPY --from=builder /home/vane/.next/standalone ./
|
||||
COPY --from=builder /home/vane/data ./data
|
||||
COPY drizzle ./drizzle
|
||||
|
||||
RUN mkdir /home/perplexica/uploads
|
||||
RUN mkdir /home/vane/uploads
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
|
||||
88
README.md
88
README.md
@@ -1,18 +1,18 @@
|
||||
# Perplexica 🔍
|
||||
# Vane 🔍
|
||||
|
||||
[](https://github.com/ItzCrazyKns/Perplexica/stargazers)
|
||||
[](https://github.com/ItzCrazyKns/Perplexica/network/members)
|
||||
[](https://github.com/ItzCrazyKns/Perplexica/watchers)
|
||||
[](https://hub.docker.com/r/itzcrazykns1337/perplexica)
|
||||
[](https://github.com/ItzCrazyKns/Perplexica/blob/master/LICENSE)
|
||||
[](https://github.com/ItzCrazyKns/Perplexica/commits/master)
|
||||
[](https://github.com/ItzCrazyKns/Vane/stargazers)
|
||||
[](https://github.com/ItzCrazyKns/Vane/network/members)
|
||||
[](https://github.com/ItzCrazyKns/Vane/watchers)
|
||||
[](https://hub.docker.com/r/itzcrazykns1337/vane)
|
||||
[](https://github.com/ItzCrazyKns/Vane/blob/master/LICENSE)
|
||||
[](https://github.com/ItzCrazyKns/Vane/commits/master)
|
||||
[](https://discord.gg/26aArMy8tT)
|
||||
|
||||
Perplexica is a **privacy-focused AI answering engine** that runs entirely on your own hardware. It combines knowledge from the vast internet with support for **local LLMs** (Ollama) and cloud providers (OpenAI, Claude, Groq), delivering accurate answers with **cited sources** while keeping your searches completely private.
|
||||
Vane is a **privacy-focused AI answering engine** that runs entirely on your own hardware. It combines knowledge from the vast internet with support for **local LLMs** (Ollama) and cloud providers (OpenAI, Claude, Groq), delivering accurate answers with **cited sources** while keeping your searches completely private.
|
||||
|
||||

|
||||

|
||||
|
||||
Want to know more about its architecture and how it works? You can read it [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/architecture/README.md).
|
||||
Want to know more about its architecture and how it works? You can read it [here](https://github.com/ItzCrazyKns/Vane/tree/master/docs/architecture/README.md).
|
||||
|
||||
## ✨ Features
|
||||
|
||||
@@ -28,7 +28,7 @@ Want to know more about its architecture and how it works? You can read it [here
|
||||
|
||||
📷 **Image and video search** - Find visual content alongside text results. Search isn't limited to just articles anymore.
|
||||
|
||||
📄 **File uploads** - Upload documents and ask questions about them. PDFs, text files, images - Perplexica understands them all.
|
||||
📄 **File uploads** - Upload documents and ask questions about them. PDFs, text files, images - Vane understands them all.
|
||||
|
||||
🌐 **Search specific domains** - Limit your search to specific websites when you know where to look. Perfect for technical documentation or research papers.
|
||||
|
||||
@@ -38,11 +38,11 @@ Want to know more about its architecture and how it works? You can read it [here
|
||||
|
||||
🕒 **Search history** - Every search is saved locally so you can revisit your discoveries anytime. Your research is never lost.
|
||||
|
||||
✨ **More coming soon** - We're actively developing new features based on community feedback. Join our Discord to help shape Perplexica's future!
|
||||
✨ **More coming soon** - We're actively developing new features based on community feedback. Join our Discord to help shape Vane's future!
|
||||
|
||||
## Sponsors
|
||||
|
||||
Perplexica's development is powered by the generous support of our sponsors. Their contributions help keep this project free, open-source, and accessible to everyone.
|
||||
Vane's development is powered by the generous support of our sponsors. Their contributions help keep this project free, open-source, and accessible to everyone.
|
||||
|
||||
<div align="center">
|
||||
|
||||
@@ -51,7 +51,7 @@ Perplexica's development is powered by the generous support of our sponsors. The
|
||||
<img alt="Warp Terminal" src=".assets/sponsers/warp.png" width="100%">
|
||||
</a>
|
||||
|
||||
### **✨ [Try Warp - The AI-Powered Terminal →](https://www.warp.dev/perplexica)**
|
||||
### **✨ [Try Warp - The AI-Powered Terminal →](https://www.warp.dev/vane)**
|
||||
|
||||
Warp is revolutionizing development workflows with AI-powered features, modern UX, and blazing-fast performance. Used by developers at top companies worldwide.
|
||||
|
||||
@@ -76,26 +76,26 @@ We'd also like to thank the following partners for their generous support:
|
||||
|
||||
## Installation
|
||||
|
||||
There are mainly 2 ways of installing Perplexica - With Docker, Without Docker. Using Docker is highly recommended.
|
||||
There are mainly 2 ways of installing Vane - With Docker, Without Docker. Using Docker is highly recommended.
|
||||
|
||||
### Getting Started with Docker (Recommended)
|
||||
|
||||
Perplexica can be easily run using Docker. Simply run the following command:
|
||||
Vane can be easily run using Docker. Simply run the following command:
|
||||
|
||||
```bash
|
||||
docker run -d -p 3000:3000 -v perplexica-data:/home/perplexica/data --name perplexica itzcrazykns1337/perplexica:latest
|
||||
docker run -d -p 3000:3000 -v vane-data:/home/vane/data --name vane itzcrazykns1337/vane:latest
|
||||
```
|
||||
|
||||
This will pull and start the Perplexica container with the bundled SearxNG search engine. Once running, open your browser and navigate to http://localhost:3000. You can then configure your settings (API keys, models, etc.) directly in the setup screen.
|
||||
This will pull and start the Vane container with the bundled SearxNG search engine. Once running, open your browser and navigate to http://localhost:3000. You can then configure your settings (API keys, models, etc.) directly in the setup screen.
|
||||
|
||||
**Note**: The image includes both Perplexica and SearxNG, so no additional setup is required. The `-v` flags create persistent volumes for your data and uploaded files.
|
||||
**Note**: The image includes both Vane and SearxNG, so no additional setup is required. The `-v` flags create persistent volumes for your data and uploaded files.
|
||||
|
||||
#### Using Perplexica with Your Own SearxNG Instance
|
||||
#### Using Vane with Your Own SearxNG Instance
|
||||
|
||||
If you already have SearxNG running, you can use the slim version of Perplexica:
|
||||
If you already have SearxNG running, you can use the slim version of Vane:
|
||||
|
||||
```bash
|
||||
docker run -d -p 3000:3000 -e SEARXNG_API_URL=http://your-searxng-url:8080 -v perplexica-data:/home/perplexica/data --name perplexica itzcrazykns1337/perplexica:slim-latest
|
||||
docker run -d -p 3000:3000 -e SEARXNG_API_URL=http://your-searxng-url:8080 -v vane-data:/home/vane/data --name vane itzcrazykns1337/vane:slim-latest
|
||||
```
|
||||
|
||||
**Important**: Make sure your SearxNG instance has:
|
||||
@@ -110,10 +110,10 @@ Replace `http://your-searxng-url:8080` with your actual SearxNG URL. Then config
|
||||
If you prefer to build from source or need more control:
|
||||
|
||||
1. Ensure Docker is installed and running on your system.
|
||||
2. Clone the Perplexica repository:
|
||||
2. Clone the Vane repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/ItzCrazyKns/Perplexica.git
|
||||
git clone https://github.com/ItzCrazyKns/Vane.git
|
||||
```
|
||||
|
||||
3. After cloning, navigate to the directory containing the project files.
|
||||
@@ -121,13 +121,13 @@ If you prefer to build from source or need more control:
|
||||
4. Build and run using Docker:
|
||||
|
||||
```bash
|
||||
docker build -t perplexica .
|
||||
docker run -d -p 3000:3000 -v perplexica-data:/home/perplexica/data --name perplexica perplexica
|
||||
docker build -t vane .
|
||||
docker run -d -p 3000:3000 -v vane-data:/home/vane/data --name vane vane
|
||||
```
|
||||
|
||||
5. Access Perplexica at http://localhost:3000 and configure your settings in the setup screen.
|
||||
5. Access Vane at http://localhost:3000 and configure your settings in the setup screen.
|
||||
|
||||
**Note**: After the containers are built, you can start Perplexica directly from Docker without having to open a terminal.
|
||||
**Note**: After the containers are built, you can start Vane directly from Docker without having to open a terminal.
|
||||
|
||||
### Non-Docker Installation
|
||||
|
||||
@@ -135,8 +135,8 @@ If you prefer to build from source or need more control:
|
||||
2. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/ItzCrazyKns/Perplexica.git
|
||||
cd Perplexica
|
||||
git clone https://github.com/ItzCrazyKns/Vane.git
|
||||
cd Vane
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
@@ -161,13 +161,13 @@ If you prefer to build from source or need more control:
|
||||
|
||||
**Note**: Using Docker is recommended as it simplifies the setup process, especially for managing environment variables and dependencies.
|
||||
|
||||
See the [installation documentation](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/installation) for more information like updating, etc.
|
||||
See the [installation documentation](https://github.com/ItzCrazyKns/Vane/tree/master/docs/installation) for more information like updating, etc.
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
#### Local OpenAI-API-Compliant Servers
|
||||
|
||||
If Perplexica tells you that you haven't configured any chat model providers, ensure that:
|
||||
If Vane tells you that you haven't configured any chat model providers, ensure that:
|
||||
|
||||
1. Your server is running on `0.0.0.0` (not `127.0.0.1`) and on the same port you put in the API URL.
|
||||
2. You have specified the correct model name loaded by your local LLM server.
|
||||
@@ -213,29 +213,29 @@ If you're encountering a Lemonade connection error, it is likely due to the back
|
||||
|
||||
## Using as a Search Engine
|
||||
|
||||
If you wish to use Perplexica as an alternative to traditional search engines like Google or Bing, or if you want to add a shortcut for quick access from your browser's search bar, follow these steps:
|
||||
If you wish to use Vane as an alternative to traditional search engines like Google or Bing, or if you want to add a shortcut for quick access from your browser's search bar, follow these steps:
|
||||
|
||||
1. Open your browser's settings.
|
||||
2. Navigate to the 'Search Engines' section.
|
||||
3. Add a new site search with the following URL: `http://localhost:3000/?q=%s`. Replace `localhost` with your IP address or domain name, and `3000` with the port number if Perplexica is not hosted locally.
|
||||
4. Click the add button. Now, you can use Perplexica directly from your browser's search bar.
|
||||
3. Add a new site search with the following URL: `http://localhost:3000/?q=%s`. Replace `localhost` with your IP address or domain name, and `3000` with the port number if Vane is not hosted locally.
|
||||
4. Click the add button. Now, you can use Vane directly from your browser's search bar.
|
||||
|
||||
## Using Perplexica's API
|
||||
## Using Vane's API
|
||||
|
||||
Perplexica also provides an API for developers looking to integrate its powerful search engine into their own applications. You can run searches, use multiple models and get answers to your queries.
|
||||
Vane also provides an API for developers looking to integrate its powerful search engine into their own applications. You can run searches, use multiple models and get answers to your queries.
|
||||
|
||||
For more details, check out the full documentation [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/API/SEARCH.md).
|
||||
For more details, check out the full documentation [here](https://github.com/ItzCrazyKns/Vane/tree/master/docs/API/SEARCH.md).
|
||||
|
||||
## Expose Perplexica to network
|
||||
## Expose Vane to network
|
||||
|
||||
Perplexica runs on Next.js and handles all API requests. It works right away on the same network and stays accessible even with port forwarding.
|
||||
Vane runs on Next.js and handles all API requests. It works right away on the same network and stays accessible even with port forwarding.
|
||||
|
||||
## One-Click Deployment
|
||||
|
||||
[](https://usw.sealos.io/?openapp=system-template%3FtemplateName%3Dperplexica)
|
||||
[](https://repocloud.io/details/?app_id=267)
|
||||
[](https://template.run.claw.cloud/?referralCode=U11MRQ8U9RM4&openapp=system-fastdeploy%3FtemplateName%3Dperplexica)
|
||||
[](https://www.hostinger.com/vps/docker-hosting?compose_url=https://raw.githubusercontent.com/ItzCrazyKns/Perplexica/refs/heads/master/docker-compose.yaml)
|
||||
[](https://www.hostinger.com/vps/docker-hosting?compose_url=https://raw.githubusercontent.com/ItzCrazyKns/Vane/refs/heads/master/docker-compose.yaml)
|
||||
|
||||
## Upcoming Features
|
||||
|
||||
@@ -245,7 +245,7 @@ Perplexica runs on Next.js and handles all API requests. It works right away on
|
||||
|
||||
## Support Us
|
||||
|
||||
If you find Perplexica useful, consider giving us a star on GitHub. This helps more people discover Perplexica and supports the development of new features. Your support is greatly appreciated.
|
||||
If you find Vane useful, consider giving us a star on GitHub. This helps more people discover Vane and supports the development of new features. Your support is greatly appreciated.
|
||||
|
||||
### Donations
|
||||
|
||||
@@ -257,10 +257,10 @@ We also accept donations to help sustain our project. If you would like to contr
|
||||
|
||||
## Contribution
|
||||
|
||||
Perplexica is built on the idea that AI and large language models should be easy for everyone to use. If you find bugs or have ideas, please share them in via GitHub Issues. For more information on contributing to Perplexica you can read the [CONTRIBUTING.md](CONTRIBUTING.md) file to learn more about Perplexica and how you can contribute to it.
|
||||
Vane is built on the idea that AI and large language models should be easy for everyone to use. If you find bugs or have ideas, please share them in via GitHub Issues. For more information on contributing to Vane you can read the [CONTRIBUTING.md](CONTRIBUTING.md) file to learn more about Vane and how you can contribute to it.
|
||||
|
||||
## Help and Support
|
||||
|
||||
If you have any questions or feedback, please feel free to reach out to us. You can create an issue on GitHub or join our Discord server. There, you can connect with other users, share your experiences and reviews, and receive more personalized help. [Click here](https://discord.gg/EFwsmQDgAu) to join the Discord server. To discuss matters outside of regular support, feel free to contact me on Discord at `itzcrazykns`.
|
||||
|
||||
Thank you for exploring Perplexica, the AI-powered search engine designed to enhance your search experience. We are constantly working to improve Perplexica and expand its capabilities. We value your feedback and contributions which help us make Perplexica even better. Don't forget to check back for updates and new features!
|
||||
Thank you for exploring Vane, the AI-powered search engine designed to enhance your search experience. We are constantly working to improve Vane and expand its capabilities. We value your feedback and contributions which help us make Vane even better. Don't forget to check back for updates and new features!
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
services:
|
||||
perplexica:
|
||||
image: itzcrazykns1337/perplexica:latest
|
||||
vane:
|
||||
image: itzcrazykns1337/vane:latest
|
||||
build:
|
||||
context: .
|
||||
ports:
|
||||
- '3000:3000'
|
||||
volumes:
|
||||
- data:/home/perplexica/data
|
||||
- data:/home/vane/data
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
data:
|
||||
name: 'perplexica-data'
|
||||
name: 'vane-data'
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Perplexica Search API Documentation
|
||||
# Vane Search API Documentation
|
||||
|
||||
## Overview
|
||||
|
||||
Perplexica’s Search API makes it easy to use our AI-powered search engine. You can run different types of searches, pick the models you want to use, and get the most recent info. Follow the following headings to learn more about Perplexica's search API.
|
||||
Vane's Search API makes it easy to use our AI-powered search engine. You can run different types of searches, pick the models you want to use, and get the most recent info. Follow the following headings to learn more about Vane's search API.
|
||||
|
||||
## Endpoints
|
||||
|
||||
@@ -53,7 +53,7 @@ Use the `id` field as the `providerId` and the `key` field from the models array
|
||||
|
||||
**Full URL**: `http://localhost:3000/api/search`
|
||||
|
||||
**Note**: Replace `localhost:3000` with your Perplexica instance URL if running on a different host or port
|
||||
**Note**: Replace `localhost:3000` with your Vane instance URL if running on a different host or port
|
||||
|
||||
### Request
|
||||
|
||||
@@ -73,12 +73,12 @@ The API accepts a JSON object in the request body, where you define the enabled
|
||||
},
|
||||
"optimizationMode": "speed",
|
||||
"sources": ["web"],
|
||||
"query": "What is Perplexica",
|
||||
"query": "What is Vane",
|
||||
"history": [
|
||||
["human", "Hi, how are you?"],
|
||||
["assistant", "I am doing well, how can I help you today?"]
|
||||
],
|
||||
"systemInstructions": "Focus on providing technical details about Perplexica's architecture.",
|
||||
"systemInstructions": "Focus on providing technical details about Vane's architecture.",
|
||||
"stream": false
|
||||
}
|
||||
```
|
||||
@@ -115,8 +115,8 @@ The API accepts a JSON object in the request body, where you define the enabled
|
||||
|
||||
```json
|
||||
[
|
||||
["human", "What is Perplexica?"],
|
||||
["assistant", "Perplexica is an AI-powered search engine..."]
|
||||
["human", "What is Vane?"],
|
||||
["assistant", "Vane is an AI-powered search engine..."]
|
||||
]
|
||||
```
|
||||
|
||||
@@ -130,20 +130,20 @@ The response from the API includes both the final message and the sources used t
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online. Here are some key features and characteristics of Perplexica:\n\n- **AI-Powered Technology**: It utilizes advanced machine learning algorithms to not only retrieve information but also to understand the context and intent behind user queries, providing more relevant results [1][5].\n\n- **Open-Source**: Being open-source, Perplexica offers flexibility and transparency, allowing users to explore its functionalities without the constraints of proprietary software [3][10].",
|
||||
"message": "Vane is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online. Here are some key features and characteristics of Vane:\n\n- **AI-Powered Technology**: It utilizes advanced machine learning algorithms to not only retrieve information but also to understand the context and intent behind user queries, providing more relevant results [1][5].\n\n- **Open-Source**: Being open-source, Vane offers flexibility and transparency, allowing users to explore its functionalities without the constraints of proprietary software [3][10].",
|
||||
"sources": [
|
||||
{
|
||||
"content": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online.",
|
||||
"content": "Vane is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online.",
|
||||
"metadata": {
|
||||
"title": "What is Perplexica, and how does it function as an AI-powered search ...",
|
||||
"url": "https://askai.glarity.app/search/What-is-Perplexica--and-how-does-it-function-as-an-AI-powered-search-engine"
|
||||
"title": "What is Vane, and how does it function as an AI-powered search ...",
|
||||
"url": "https://askai.glarity.app/search/What-is-Vane--and-how-does-it-function-as-an-AI-powered-search-engine"
|
||||
}
|
||||
},
|
||||
{
|
||||
"content": "Perplexica is an open-source AI-powered search tool that dives deep into the internet to find precise answers.",
|
||||
"content": "Vane is an open-source AI-powered search tool that dives deep into the internet to find precise answers.",
|
||||
"metadata": {
|
||||
"title": "Sahar Mor's Post",
|
||||
"url": "https://www.linkedin.com/posts/sahar-mor_a-new-open-source-project-called-perplexica-activity-7204489745668694016-ncja"
|
||||
"url": "https://www.linkedin.com/posts/sahar-mor_a-new-open-source-project-called-vane-activity-7204489745668694016-ncja"
|
||||
}
|
||||
}
|
||||
....
|
||||
@@ -160,7 +160,7 @@ Example of streamed response objects:
|
||||
```
|
||||
{"type":"init","data":"Stream connected"}
|
||||
{"type":"sources","data":[{"content":"...","metadata":{"title":"...","url":"..."}},...]}
|
||||
{"type":"response","data":"Perplexica is an "}
|
||||
{"type":"response","data":"Vane is an "}
|
||||
{"type":"response","data":"innovative, open-source "}
|
||||
{"type":"response","data":"AI-powered search engine..."}
|
||||
{"type":"done"}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Perplexica Architecture
|
||||
# Vane Architecture
|
||||
|
||||
Perplexica is a Next.js application that combines an AI chat experience with search.
|
||||
Vane is a Next.js application that combines an AI chat experience with search.
|
||||
|
||||
For a high level flow, see [WORKING.md](WORKING.md). For deeper implementation details, see [CONTRIBUTING.md](../../CONTRIBUTING.md).
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# How Perplexica Works
|
||||
# How Vane Works
|
||||
|
||||
This is a high level overview of how Perplexica answers a question.
|
||||
This is a high level overview of how Vane answers a question.
|
||||
|
||||
If you want a component level overview, see [README.md](README.md).
|
||||
|
||||
@@ -58,7 +58,7 @@ We prompt the model to cite the references it used. The UI then renders those ci
|
||||
|
||||
## Search API
|
||||
|
||||
If you are integrating Perplexica into another product, you can call `POST /api/search`.
|
||||
If you are integrating Vane into another product, you can call `POST /api/search`.
|
||||
|
||||
It returns:
|
||||
|
||||
|
||||
@@ -1,60 +1,60 @@
|
||||
# Update Perplexica to the latest version
|
||||
# Update Vane to the latest version
|
||||
|
||||
To update Perplexica to the latest version, follow these steps:
|
||||
To update Vane to the latest version, follow these steps:
|
||||
|
||||
## For Docker users (Using pre-built images)
|
||||
|
||||
Simply pull the latest image and restart your container:
|
||||
|
||||
```bash
|
||||
docker pull itzcrazykns1337/perplexica:latest
|
||||
docker stop perplexica
|
||||
docker rm perplexica
|
||||
docker run -d -p 3000:3000 -v perplexica-data:/home/perplexica/data --name perplexica itzcrazykns1337/perplexica:latest
|
||||
docker pull itzcrazykns1337/vane:latest
|
||||
docker stop vane
|
||||
docker rm vane
|
||||
docker run -d -p 3000:3000 -v vane-data:/home/vane/data --name vane itzcrazykns1337/vane:latest
|
||||
```
|
||||
|
||||
For slim version:
|
||||
|
||||
```bash
|
||||
docker pull itzcrazykns1337/perplexica:slim-latest
|
||||
docker stop perplexica
|
||||
docker rm perplexica
|
||||
docker run -d -p 3000:3000 -e SEARXNG_API_URL=http://your-searxng-url:8080 -v perplexica-data:/home/perplexica/data --name perplexica itzcrazykns1337/perplexica:slim-latest
|
||||
docker pull itzcrazykns1337/vane:slim-latest
|
||||
docker stop vane
|
||||
docker rm vane
|
||||
docker run -d -p 3000:3000 -e SEARXNG_API_URL=http://your-searxng-url:8080 -v vane-data:/home/vane/data --name vane itzcrazykns1337/vane:slim-latest
|
||||
```
|
||||
|
||||
Once updated, go to http://localhost:3000 and verify the latest changes. Your settings are preserved automatically.
|
||||
|
||||
## For Docker users (Building from source)
|
||||
|
||||
1. Navigate to your Perplexica directory and pull the latest changes:
|
||||
1. Navigate to your Vane directory and pull the latest changes:
|
||||
|
||||
```bash
|
||||
cd Perplexica
|
||||
cd Vane
|
||||
git pull origin master
|
||||
```
|
||||
|
||||
2. Rebuild the Docker image:
|
||||
|
||||
```bash
|
||||
docker build -t perplexica .
|
||||
docker build -t vane .
|
||||
```
|
||||
|
||||
3. Stop and remove the old container, then start the new one:
|
||||
|
||||
```bash
|
||||
docker stop perplexica
|
||||
docker rm perplexica
|
||||
docker run -p 3000:3000 -p 8080:8080 --name perplexica perplexica
|
||||
docker stop vane
|
||||
docker rm vane
|
||||
docker run -p 3000:3000 -p 8080:8080 --name vane vane
|
||||
```
|
||||
|
||||
4. Once the command completes, go to http://localhost:3000 and verify the latest changes.
|
||||
|
||||
## For non-Docker users
|
||||
|
||||
1. Navigate to your Perplexica directory and pull the latest changes:
|
||||
1. Navigate to your Vane directory and pull the latest changes:
|
||||
|
||||
```bash
|
||||
cd Perplexica
|
||||
cd Vane
|
||||
git pull origin master
|
||||
```
|
||||
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
import { defineConfig } from 'drizzle-kit';
|
||||
import path from 'path';
|
||||
|
||||
export default defineConfig({
|
||||
export default {
|
||||
dialect: 'sqlite',
|
||||
schema: './src/lib/db/schema.ts',
|
||||
out: './drizzle',
|
||||
dbCredentials: {
|
||||
url: path.join(process.cwd(), 'data', 'db.sqlite'),
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
@@ -26,7 +26,7 @@ else
|
||||
echo "SearXNG may not be fully ready, but continuing (PID: $SEARXNG_PID)"
|
||||
fi
|
||||
|
||||
cd /home/perplexica
|
||||
echo "Starting Perplexica..."
|
||||
cd /home/vane
|
||||
echo "Starting Vane..."
|
||||
|
||||
exec node server.js
|
||||
@@ -1,3 +1,4 @@
|
||||
import path from 'node:path';
|
||||
import pkg from './package.json' with { type: 'json' };
|
||||
|
||||
/** @type {import('next').NextConfig} */
|
||||
@@ -10,7 +11,12 @@ const nextConfig = {
|
||||
},
|
||||
],
|
||||
},
|
||||
serverExternalPackages: ['pdf-parse'],
|
||||
serverExternalPackages: [
|
||||
'pdf-parse',
|
||||
'playwright',
|
||||
'officeparser',
|
||||
'file-type',
|
||||
],
|
||||
outputFileTracingIncludes: {
|
||||
'/api/**': [
|
||||
'./node_modules/@napi-rs/canvas/**',
|
||||
@@ -21,6 +27,9 @@ const nextConfig = {
|
||||
env: {
|
||||
NEXT_PUBLIC_VERSION: pkg.version,
|
||||
},
|
||||
turbopack: {
|
||||
root: process.cwd(),
|
||||
},
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
|
||||
22
package.json
22
package.json
@@ -1,10 +1,10 @@
|
||||
{
|
||||
"name": "perplexica",
|
||||
"version": "1.12.1",
|
||||
"name": "vane",
|
||||
"version": "1.12.2",
|
||||
"license": "MIT",
|
||||
"author": "ItzCrazyKns",
|
||||
"scripts": {
|
||||
"dev": "next dev --webpack",
|
||||
"dev": "next dev",
|
||||
"build": "next build --webpack",
|
||||
"start": "next start",
|
||||
"lint": "next lint",
|
||||
@@ -16,16 +16,19 @@
|
||||
"@headlessui/tailwindcss": "^0.2.2",
|
||||
"@huggingface/transformers": "^3.8.1",
|
||||
"@icons-pack/react-simple-icons": "^12.3.0",
|
||||
"@mozilla/readability": "^0.6.0",
|
||||
"@phosphor-icons/react": "^2.1.10",
|
||||
"@radix-ui/react-tooltip": "^1.2.8",
|
||||
"@tailwindcss/typography": "^0.5.12",
|
||||
"@toolsycc/json-repair": "^0.1.22",
|
||||
"async-mutex": "^0.5.0",
|
||||
"axios": "^1.8.3",
|
||||
"better-sqlite3": "^11.9.1",
|
||||
"clsx": "^2.1.0",
|
||||
"drizzle-orm": "^0.40.1",
|
||||
"drizzle-orm": "^0.45.2",
|
||||
"js-tiktoken": "^1.0.21",
|
||||
"jspdf": "^3.0.4",
|
||||
"jsdom": "^29.0.1",
|
||||
"jspdf": "^4.2.1",
|
||||
"lightweight-charts": "^5.0.9",
|
||||
"lucide-react": "^0.556.0",
|
||||
"mammoth": "^1.9.1",
|
||||
@@ -34,11 +37,12 @@
|
||||
"motion": "^12.23.26",
|
||||
"next": "^16.0.7",
|
||||
"next-themes": "^0.3.0",
|
||||
"officeparser": "^5.2.2",
|
||||
"officeparser": "^6.0.7",
|
||||
"ollama": "^0.6.3",
|
||||
"openai": "^6.9.0",
|
||||
"partial-json": "^0.1.7",
|
||||
"pdf-parse": "^2.4.5",
|
||||
"playwright": "^1.59.1",
|
||||
"react": "^18",
|
||||
"react-dom": "^18",
|
||||
"react-syntax-highlighter": "^16.1.0",
|
||||
@@ -47,13 +51,13 @@
|
||||
"rfc6902": "^5.1.2",
|
||||
"sonner": "^1.4.41",
|
||||
"tailwind-merge": "^2.2.2",
|
||||
"turndown": "^7.2.2",
|
||||
"yahoo-finance2": "^3.10.2",
|
||||
"yet-another-react-lightbox": "^3.17.2",
|
||||
"zod": "^4.1.12"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/better-sqlite3": "^7.6.12",
|
||||
"@types/jsdom": "^28.0.1",
|
||||
"@types/jspdf": "^2.0.0",
|
||||
"@types/node": "^24.8.1",
|
||||
"@types/pdf-parse": "^1.1.4",
|
||||
@@ -62,9 +66,9 @@
|
||||
"@types/react-syntax-highlighter": "^15.5.13",
|
||||
"@types/turndown": "^5.0.6",
|
||||
"autoprefixer": "^10.0.1",
|
||||
"drizzle-kit": "^0.30.5",
|
||||
"drizzle-kit": "^0.18.1",
|
||||
"eslint": "^8",
|
||||
"eslint-config-next": "14.1.4",
|
||||
"eslint-config-next": "^16.2.2",
|
||||
"postcss": "^8",
|
||||
"prettier": "^3.2.5",
|
||||
"tailwindcss": "^3.3.0",
|
||||
|
||||
@@ -19,9 +19,8 @@ const montserrat = Montserrat({
|
||||
});
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: 'Perplexica - Chat with the internet',
|
||||
description:
|
||||
'Perplexica is an AI powered chatbot that is connected to the internet.',
|
||||
title: 'Vane - Direct your curiosity',
|
||||
description: 'Vane is an AI powered answering engine.',
|
||||
};
|
||||
|
||||
export default function RootLayout({
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Metadata } from 'next';
|
||||
import React from 'react';
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: 'Library - Perplexica',
|
||||
title: 'Library - Vane',
|
||||
};
|
||||
|
||||
const Layout = ({ children }: { children: React.ReactNode }) => {
|
||||
|
||||
@@ -2,10 +2,9 @@ import type { MetadataRoute } from 'next';
|
||||
|
||||
export default function manifest(): MetadataRoute.Manifest {
|
||||
return {
|
||||
name: 'Perplexica - Chat with the internet',
|
||||
short_name: 'Perplexica',
|
||||
description:
|
||||
'Perplexica is an AI powered chatbot that is connected to the internet.',
|
||||
name: 'Vane - Direct Your Curiosity',
|
||||
short_name: 'Vane',
|
||||
description: 'Vane is an AI powered answering engine.',
|
||||
start_url: '/',
|
||||
display: 'standalone',
|
||||
background_color: '#0a0a0a',
|
||||
|
||||
@@ -2,8 +2,8 @@ import ChatWindow from '@/components/ChatWindow';
|
||||
import { Metadata } from 'next';
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: 'Chat - Perplexica',
|
||||
description: 'Chat with the internet, chat with Perplexica.',
|
||||
title: 'Chat - Vane',
|
||||
description: 'Chat with the internet, chat with Vane.',
|
||||
};
|
||||
|
||||
const Home = () => {
|
||||
|
||||
@@ -37,7 +37,8 @@ const getStepTitle = (
|
||||
if (step.type === 'reasoning') {
|
||||
return isStreaming && !step.reasoning ? 'Thinking...' : 'Thinking';
|
||||
} else if (step.type === 'searching') {
|
||||
return `Searching ${step.searching.length} ${step.searching.length === 1 ? 'query' : 'queries'}`;
|
||||
const queries = Array.isArray(step.searching) ? step.searching : [];
|
||||
return `Searching ${queries.length} ${queries.length === 1 ? 'query' : 'queries'}`;
|
||||
} else if (step.type === 'search_results') {
|
||||
return `Found ${step.reading.length} ${step.reading.length === 1 ? 'result' : 'results'}`;
|
||||
} else if (step.type === 'reading') {
|
||||
@@ -160,6 +161,7 @@ const AssistantSteps = ({
|
||||
)}
|
||||
|
||||
{step.type === 'searching' &&
|
||||
Array.isArray(step.searching) &&
|
||||
step.searching.length > 0 && (
|
||||
<div className="flex flex-wrap gap-1.5 mt-1.5">
|
||||
{step.searching.map((query, idx) => (
|
||||
|
||||
@@ -49,7 +49,7 @@ const Chat = () => {
|
||||
};
|
||||
|
||||
if (messages.length === 1) {
|
||||
document.title = `${messages[0].query.substring(0, 30)} - Perplexica`;
|
||||
document.title = `${messages[0].query.substring(0, 30)} - Vane`;
|
||||
}
|
||||
|
||||
if (sections.length > lastScrolledRef.current) {
|
||||
|
||||
@@ -18,6 +18,7 @@ import { Fragment, useRef, useState } from 'react';
|
||||
import { useChat } from '@/lib/hooks/useChat';
|
||||
import { AnimatePresence } from 'motion/react';
|
||||
import { motion } from 'framer-motion';
|
||||
import { toast } from 'sonner';
|
||||
|
||||
const Attach = () => {
|
||||
const { files, setFiles, setFileIds, fileIds } = useChat();
|
||||
@@ -26,11 +27,19 @@ const Attach = () => {
|
||||
const fileInputRef = useRef<any>();
|
||||
|
||||
const handleChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const selectedFiles = e.target.files;
|
||||
|
||||
if (!selectedFiles?.length) {
|
||||
return;
|
||||
}
|
||||
|
||||
setLoading(true);
|
||||
|
||||
try {
|
||||
const data = new FormData();
|
||||
|
||||
for (let i = 0; i < e.target.files!.length; i++) {
|
||||
data.append('files', e.target.files![i]);
|
||||
for (let i = 0; i < selectedFiles.length; i++) {
|
||||
data.append('files', selectedFiles[i]);
|
||||
}
|
||||
|
||||
const embeddingModelProvider = localStorage.getItem(
|
||||
@@ -38,19 +47,39 @@ const Attach = () => {
|
||||
);
|
||||
const embeddingModel = localStorage.getItem('embeddingModelKey');
|
||||
|
||||
data.append('embedding_model_provider_id', embeddingModelProvider!);
|
||||
data.append('embedding_model_key', embeddingModel!);
|
||||
if (!embeddingModelProvider || !embeddingModel) {
|
||||
throw new Error('Please select an embedding model before uploading.');
|
||||
}
|
||||
|
||||
data.append('embedding_model_provider_id', embeddingModelProvider);
|
||||
data.append('embedding_model_key', embeddingModel);
|
||||
|
||||
const res = await fetch(`/api/uploads`, {
|
||||
method: 'POST',
|
||||
body: data,
|
||||
});
|
||||
|
||||
const resData = await res.json();
|
||||
const resData = await res.json().catch(() => ({}));
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(resData.message || 'Failed to upload file(s).');
|
||||
}
|
||||
|
||||
if (!Array.isArray(resData.files)) {
|
||||
throw new Error('Invalid upload response from server.');
|
||||
}
|
||||
|
||||
setFiles([...files, ...resData.files]);
|
||||
setFileIds([...fileIds, ...resData.files.map((file: any) => file.fileId)]);
|
||||
setFileIds([
|
||||
...fileIds,
|
||||
...resData.files.map((file: any) => file.fileId),
|
||||
]);
|
||||
} catch (err: any) {
|
||||
toast(err?.message || 'Failed to upload file(s).');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
e.target.value = '';
|
||||
}
|
||||
};
|
||||
|
||||
return loading ? (
|
||||
|
||||
@@ -9,6 +9,7 @@ import { Fragment, useRef, useState } from 'react';
|
||||
import { useChat } from '@/lib/hooks/useChat';
|
||||
import { AnimatePresence } from 'motion/react';
|
||||
import { motion } from 'framer-motion';
|
||||
import { toast } from 'sonner';
|
||||
|
||||
const AttachSmall = () => {
|
||||
const { files, setFiles, setFileIds, fileIds } = useChat();
|
||||
@@ -17,11 +18,19 @@ const AttachSmall = () => {
|
||||
const fileInputRef = useRef<any>();
|
||||
|
||||
const handleChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const selectedFiles = e.target.files;
|
||||
|
||||
if (!selectedFiles?.length) {
|
||||
return;
|
||||
}
|
||||
|
||||
setLoading(true);
|
||||
|
||||
try {
|
||||
const data = new FormData();
|
||||
|
||||
for (let i = 0; i < e.target.files!.length; i++) {
|
||||
data.append('files', e.target.files![i]);
|
||||
for (let i = 0; i < selectedFiles.length; i++) {
|
||||
data.append('files', selectedFiles[i]);
|
||||
}
|
||||
|
||||
const embeddingModelProvider = localStorage.getItem(
|
||||
@@ -29,19 +38,39 @@ const AttachSmall = () => {
|
||||
);
|
||||
const embeddingModel = localStorage.getItem('embeddingModelKey');
|
||||
|
||||
data.append('embedding_model_provider_id', embeddingModelProvider!);
|
||||
data.append('embedding_model_key', embeddingModel!);
|
||||
if (!embeddingModelProvider || !embeddingModel) {
|
||||
throw new Error('Please select an embedding model before uploading.');
|
||||
}
|
||||
|
||||
data.append('embedding_model_provider_id', embeddingModelProvider);
|
||||
data.append('embedding_model_key', embeddingModel);
|
||||
|
||||
const res = await fetch(`/api/uploads`, {
|
||||
method: 'POST',
|
||||
body: data,
|
||||
});
|
||||
|
||||
const resData = await res.json();
|
||||
const resData = await res.json().catch(() => ({}));
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(resData.message || 'Failed to upload file(s).');
|
||||
}
|
||||
|
||||
if (!Array.isArray(resData.files)) {
|
||||
throw new Error('Invalid upload response from server.');
|
||||
}
|
||||
|
||||
setFiles([...files, ...resData.files]);
|
||||
setFileIds([...fileIds, ...resData.files.map((file: any) => file.fileId)]);
|
||||
setFileIds([
|
||||
...fileIds,
|
||||
...resData.files.map((file: any) => file.fileId),
|
||||
]);
|
||||
} catch (err: any) {
|
||||
toast(err?.message || 'Failed to upload file(s).');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
e.target.value = '';
|
||||
}
|
||||
};
|
||||
|
||||
return loading ? (
|
||||
|
||||
@@ -7,6 +7,9 @@ import SyntaxHighlighter from 'react-syntax-highlighter';
|
||||
import darkTheme from './CodeBlockDarkTheme';
|
||||
import lightTheme from './CodeBlockLightTheme';
|
||||
|
||||
const SyntaxHighlighterComponent =
|
||||
SyntaxHighlighter as unknown as React.ComponentType<any>;
|
||||
|
||||
const CodeBlock = ({
|
||||
language,
|
||||
children,
|
||||
@@ -50,13 +53,13 @@ const CodeBlock = ({
|
||||
/>
|
||||
)}
|
||||
</button>
|
||||
<SyntaxHighlighter
|
||||
<SyntaxHighlighterComponent
|
||||
language={language}
|
||||
style={syntaxTheme}
|
||||
showInlineLineNumbers
|
||||
>
|
||||
{children as string}
|
||||
</SyntaxHighlighter>
|
||||
</SyntaxHighlighterComponent>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -154,7 +154,7 @@ const SettingsDialogue = ({
|
||||
Version: {process.env.NEXT_PUBLIC_VERSION}
|
||||
</p>
|
||||
<a
|
||||
href="https://github.com/itzcrazykns/perplexica"
|
||||
href="https://github.com/itzcrazykns/vane"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-xs text-black/70 dark:text-white/70 flex flex-row space-x-1 items-center transition duration-200 hover:text-black/90 hover:dark:text-white/90"
|
||||
|
||||
@@ -46,9 +46,9 @@ const SetupWizard = ({
|
||||
animate={{ opacity: 1, translateY: '0px' }}
|
||||
className="text-4xl md:text-6xl xl:text-8xl font-normal font-['Instrument_Serif'] tracking-tight"
|
||||
>
|
||||
Welcome to{' '}
|
||||
Welcome to
|
||||
<span className="text-[#24A0ED] italic font-['PP_Editorial']">
|
||||
Perplexica
|
||||
Vane
|
||||
</span>
|
||||
</motion.h2>
|
||||
<motion.p
|
||||
@@ -91,9 +91,9 @@ const SetupWizard = ({
|
||||
}}
|
||||
className="text-2xl md:text-4xl xl:text-6xl font-normal font-['Instrument_Serif'] tracking-tight"
|
||||
>
|
||||
Let us get{' '}
|
||||
Let us get
|
||||
<span className="text-[#24A0ED] italic font-['PP_Editorial']">
|
||||
Perplexica
|
||||
Vane
|
||||
</span>{' '}
|
||||
set up for you
|
||||
</motion.p>
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import { Cloud, Sun, CloudRain, CloudSnow, Wind } from 'lucide-react';
|
||||
'use client';
|
||||
|
||||
import { Wind } from 'lucide-react';
|
||||
import { useEffect, useState } from 'react';
|
||||
import { getApproxLocation } from '@/lib/actions';
|
||||
|
||||
const WeatherWidget = () => {
|
||||
const [data, setData] = useState({
|
||||
@@ -15,17 +18,6 @@ const WeatherWidget = () => {
|
||||
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
const getApproxLocation = async () => {
|
||||
const res = await fetch('https://ipwhois.app/json/');
|
||||
const data = await res.json();
|
||||
|
||||
return {
|
||||
latitude: data.latitude,
|
||||
longitude: data.longitude,
|
||||
city: data.city,
|
||||
};
|
||||
};
|
||||
|
||||
const getLocation = async (
|
||||
callback: (location: {
|
||||
latitude: number;
|
||||
|
||||
@@ -257,21 +257,21 @@ const Stock = (props: StockWidgetProps) => {
|
||||
const isPostMarket = props.marketState === 'POST';
|
||||
|
||||
const displayPrice = isPostMarket
|
||||
? props.postMarketPrice ?? props.regularMarketPrice
|
||||
? (props.postMarketPrice ?? props.regularMarketPrice)
|
||||
: isPreMarket
|
||||
? props.preMarketPrice ?? props.regularMarketPrice
|
||||
? (props.preMarketPrice ?? props.regularMarketPrice)
|
||||
: props.regularMarketPrice;
|
||||
|
||||
const displayChange = isPostMarket
|
||||
? props.postMarketChange ?? props.regularMarketChange
|
||||
? (props.postMarketChange ?? props.regularMarketChange)
|
||||
: isPreMarket
|
||||
? props.preMarketChange ?? props.regularMarketChange
|
||||
? (props.preMarketChange ?? props.regularMarketChange)
|
||||
: props.regularMarketChange;
|
||||
|
||||
const displayChangePercent = isPostMarket
|
||||
? props.postMarketChangePercent ?? props.regularMarketChangePercent
|
||||
? (props.postMarketChangePercent ?? props.regularMarketChangePercent)
|
||||
: isPreMarket
|
||||
? props.preMarketChangePercent ?? props.regularMarketChangePercent
|
||||
? (props.preMarketChangePercent ?? props.regularMarketChangePercent)
|
||||
: props.regularMarketChangePercent;
|
||||
|
||||
const changeColor = isPositive
|
||||
|
||||
@@ -20,3 +20,17 @@ export const getSuggestions = async (chatHistory: [string, string][]) => {
|
||||
|
||||
return data.suggestions;
|
||||
};
|
||||
|
||||
export const getApproxLocation = async () => {
|
||||
const res = await fetch('https://free.freeipapi.com/api/json', {
|
||||
method: 'GET',
|
||||
});
|
||||
|
||||
const data = await res.json();
|
||||
|
||||
return {
|
||||
latitude: data.latitude,
|
||||
longitude: data.longitude,
|
||||
city: data.cityName,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -19,6 +19,9 @@ class APISearchAgent {
|
||||
chatHistory: input.chatHistory,
|
||||
followUp: input.followUp,
|
||||
llm: input.config.llm,
|
||||
}).catch((err) => {
|
||||
console.error(`Error executing widgets: ${err}`);
|
||||
return [];
|
||||
});
|
||||
|
||||
let searchPromise: Promise<ResearcherOutput> | null = null;
|
||||
|
||||
@@ -5,9 +5,10 @@ import Researcher from './researcher';
|
||||
import { getWriterPrompt } from '@/lib/prompts/search/writer';
|
||||
import { WidgetExecutor } from './widgets';
|
||||
import db from '@/lib/db';
|
||||
import { chats, messages } from '@/lib/db/schema';
|
||||
import { messages } from '@/lib/db/schema';
|
||||
import { and, eq, gt } from 'drizzle-orm';
|
||||
import { TextBlock } from '@/lib/types';
|
||||
import { getTokenCount } from '@/lib/utils/splitText';
|
||||
|
||||
class SearchAgent {
|
||||
async searchAsync(session: SessionManager, input: SearchAgentInput) {
|
||||
@@ -98,13 +99,17 @@ class SearchAgent {
|
||||
type: 'researchComplete',
|
||||
});
|
||||
|
||||
const finalContext =
|
||||
searchResults?.searchFindings
|
||||
let finalContext =
|
||||
'<Query to be answered without searching; Search not made>';
|
||||
|
||||
if (searchResults) {
|
||||
finalContext = searchResults?.searchFindings
|
||||
.map(
|
||||
(f, index) =>
|
||||
`<result index=${index + 1} title=${f.metadata.title}>${f.content}</result>`,
|
||||
)
|
||||
.join('\n') || '';
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
const widgetContext = widgetOutputs
|
||||
.map((o) => {
|
||||
@@ -119,6 +124,7 @@ class SearchAgent {
|
||||
input.config.systemInstructions,
|
||||
input.config.mode,
|
||||
);
|
||||
|
||||
const answerStream = input.config.llm.streamText({
|
||||
messages: [
|
||||
{
|
||||
|
||||
@@ -1,129 +0,0 @@
|
||||
import z from 'zod';
|
||||
import { ResearchAction } from '../../types';
|
||||
import { Chunk, SearchResultsResearchBlock } from '@/lib/types';
|
||||
import { searchSearxng } from '@/lib/searxng';
|
||||
|
||||
const schema = z.object({
|
||||
queries: z.array(z.string()).describe('List of academic search queries'),
|
||||
});
|
||||
|
||||
const academicSearchDescription = `
|
||||
Use this tool to perform academic searches for scholarly articles, papers, and research studies relevant to the user's query. Provide a list of concise search queries that will help gather comprehensive academic information on the topic at hand.
|
||||
You can provide up to 3 queries at a time. Make sure the queries are specific and relevant to the user's needs.
|
||||
|
||||
For example, if the user is interested in recent advancements in renewable energy, your queries could be:
|
||||
1. "Recent advancements in renewable energy 2024"
|
||||
2. "Cutting-edge research on solar power technologies"
|
||||
3. "Innovations in wind energy systems"
|
||||
|
||||
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed academic information.
|
||||
`;
|
||||
|
||||
const academicSearchAction: ResearchAction<typeof schema> = {
|
||||
name: 'academic_search',
|
||||
schema: schema,
|
||||
getDescription: () => academicSearchDescription,
|
||||
getToolDescription: () =>
|
||||
"Use this tool to perform academic searches for scholarly articles, papers, and research studies relevant to the user's query. Provide a list of concise search queries that will help gather comprehensive academic information on the topic at hand.",
|
||||
enabled: (config) =>
|
||||
config.sources.includes('academic') &&
|
||||
config.classification.classification.skipSearch === false &&
|
||||
config.classification.classification.academicSearch === true,
|
||||
execute: async (input, additionalConfig) => {
|
||||
input.queries = input.queries.slice(0, 3);
|
||||
|
||||
const researchBlock = additionalConfig.session.getBlock(
|
||||
additionalConfig.researchBlockId,
|
||||
);
|
||||
|
||||
if (researchBlock && researchBlock.type === 'research') {
|
||||
researchBlock.data.subSteps.push({
|
||||
type: 'searching',
|
||||
id: crypto.randomUUID(),
|
||||
searching: input.queries,
|
||||
});
|
||||
|
||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
const searchResultsBlockId = crypto.randomUUID();
|
||||
let searchResultsEmitted = false;
|
||||
|
||||
let results: Chunk[] = [];
|
||||
|
||||
const search = async (q: string) => {
|
||||
const res = await searchSearxng(q, {
|
||||
engines: ['arxiv', 'google scholar', 'pubmed'],
|
||||
});
|
||||
|
||||
const resultChunks: Chunk[] = res.results.map((r) => ({
|
||||
content: r.content || r.title,
|
||||
metadata: {
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
},
|
||||
}));
|
||||
|
||||
results.push(...resultChunks);
|
||||
|
||||
if (
|
||||
!searchResultsEmitted &&
|
||||
researchBlock &&
|
||||
researchBlock.type === 'research'
|
||||
) {
|
||||
searchResultsEmitted = true;
|
||||
|
||||
researchBlock.data.subSteps.push({
|
||||
id: searchResultsBlockId,
|
||||
type: 'search_results',
|
||||
reading: resultChunks,
|
||||
});
|
||||
|
||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
} else if (
|
||||
searchResultsEmitted &&
|
||||
researchBlock &&
|
||||
researchBlock.type === 'research'
|
||||
) {
|
||||
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
||||
(step) => step.id === searchResultsBlockId,
|
||||
);
|
||||
|
||||
const subStep = researchBlock.data.subSteps[
|
||||
subStepIndex
|
||||
] as SearchResultsResearchBlock;
|
||||
|
||||
subStep.reading.push(...resultChunks);
|
||||
|
||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(input.queries.map(search));
|
||||
|
||||
return {
|
||||
type: 'search_results',
|
||||
results,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
export default academicSearchAction;
|
||||
@@ -1,11 +1,11 @@
|
||||
import academicSearchAction from './academicSearch';
|
||||
import academicSearchAction from './search/academicSearch';
|
||||
import doneAction from './done';
|
||||
import planAction from './plan';
|
||||
import ActionRegistry from './registry';
|
||||
import scrapeURLAction from './scrapeURL';
|
||||
import socialSearchAction from './socialSearch';
|
||||
import socialSearchAction from './search/socialSearch';
|
||||
import uploadsSearchAction from './uploadsSearch';
|
||||
import webSearchAction from './webSearch';
|
||||
import webSearchAction from './search/webSearch';
|
||||
|
||||
ActionRegistry.register(webSearchAction);
|
||||
ActionRegistry.register(doneAction);
|
||||
|
||||
@@ -67,6 +67,7 @@ class ActionRegistry {
|
||||
additionalConfig: AdditionalConfig & {
|
||||
researchBlockId: string;
|
||||
fileIds: string[];
|
||||
mode: SearchAgentConfig['mode'];
|
||||
},
|
||||
) {
|
||||
const action = this.actions.get(name);
|
||||
@@ -83,6 +84,7 @@ class ActionRegistry {
|
||||
additionalConfig: AdditionalConfig & {
|
||||
researchBlockId: string;
|
||||
fileIds: string[];
|
||||
mode: SearchAgentConfig['mode'];
|
||||
},
|
||||
): Promise<ActionOutput[]> {
|
||||
const results: ActionOutput[] = [];
|
||||
|
||||
@@ -1,10 +1,50 @@
|
||||
import z from 'zod';
|
||||
import { ResearchAction } from '../../types';
|
||||
import { Chunk, ReadingResearchBlock } from '@/lib/types';
|
||||
import TurnDown from 'turndown';
|
||||
import path from 'path';
|
||||
import Scraper from '@/lib/scraper';
|
||||
import { splitText } from '@/lib/utils/splitText';
|
||||
|
||||
const turndownService = new TurnDown();
|
||||
const extractorPrompt = `
|
||||
Assistant is an AI information extractor. Assistant will be shared with scraped information from a website along with the queries used to retrieve that information. Assistant's task is to extract relevant facts from the scraped data to answer the queries.
|
||||
|
||||
## Things to taken into consideration when extracting information:
|
||||
1. Relevance to the query: The extracted information must dynamically adjust based on the query's intent. If the query asks "What is [X]", you must extract the definition/identity. If the query asks for "[X] specs" or "features", you must provide deep, granular technical details.
|
||||
- Example: For "What is [Product]", extract the core definition. For "[Product] capabilities", extract every technical function mentioned.
|
||||
2. Concentrate on extracting factual information that can help in answering the question rather than opinions or commentary. Ignore marketing fluff like "best-in-class" or "seamless."
|
||||
3. Noise to signal ratio: If the scraped data is noisy (headers, footers, UI text), ignore it and extract only the high-value information.
|
||||
- Example: Discard "Click for more" or "Subscribe now" messages.
|
||||
4. Avoid using filler sentences or words; extract concise, telegram-style information.
|
||||
- Example: Change "The device features a weight of only 1.2kg" to "Weight: 1.2kg."
|
||||
5. Duplicate information: If a fact appears multiple times (e.g., in a paragraph and a technical table), merge the details into a single, high-density bullet point to avoid redundancy.
|
||||
6. Numerical Data Integrity: NEVER summarize or generalize numbers, benchmarks, or table data. Extract raw values exactly as they appear.
|
||||
- Example: Do not say "Improved coding scores." Say "LiveCodeBench v6: 80.0%."
|
||||
|
||||
## Example
|
||||
For example, if the query is "What are the health benefits of green tea?" and the scraped data contains various pieces of information about green tea, Assistant should focus on extracting factual information related to the health benefits of green tea such as "Green tea contains antioxidants which can help in reducing inflammation" and ignore irrelevant information such as "Green tea is a popular beverage worldwide".
|
||||
|
||||
It can also remove filler words to reduce the sentence to "Contains antioxidants; reduces inflammation."
|
||||
|
||||
For tables/numerical data extraction, Assistant should extract the raw numerical data or the content of the table without trying to summarize it to avoid losing important details. For example, if a table lists specific battery life hours for different modes, Assistant should list every mode and its corresponding hour count rather than giving a general average.
|
||||
|
||||
Make sure the extracted facts are in bullet points format to make it easier to read and understand.
|
||||
|
||||
## Output format
|
||||
Assistant should reply with a JSON object containing a key "extracted_facts" which is a string of the bulleted facts. Return only raw JSON without markdown formatting (no \`\`\`json blocks).
|
||||
|
||||
<example_output>
|
||||
{
|
||||
"extracted_facts": "- Fact 1\n- Fact 2\n- Fact 3"
|
||||
}
|
||||
</example_output>
|
||||
`;
|
||||
|
||||
const extractorSchema = z.object({
|
||||
extracted_facts: z
|
||||
.string()
|
||||
.describe(
|
||||
'The extracted facts that are relevant to the query and can help in answering the question should be listed here in a concise manner.',
|
||||
),
|
||||
});
|
||||
|
||||
const schema = z.object({
|
||||
urls: z.array(z.string()).describe('A list of URLs to scrape content from.'),
|
||||
@@ -39,11 +79,7 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
||||
await Promise.all(
|
||||
params.urls.map(async (url) => {
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
const text = await res.text();
|
||||
|
||||
const title =
|
||||
text.match(/<title>(.*?)<\/title>/i)?.[1] || `Content from ${url}`;
|
||||
const scraped = await Scraper.scrape(url);
|
||||
|
||||
if (
|
||||
!readingEmitted &&
|
||||
@@ -59,7 +95,7 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
||||
content: '',
|
||||
metadata: {
|
||||
url,
|
||||
title: title,
|
||||
title: scraped.title,
|
||||
},
|
||||
},
|
||||
],
|
||||
@@ -92,7 +128,7 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
||||
content: '',
|
||||
metadata: {
|
||||
url,
|
||||
title: title,
|
||||
title: scraped.title,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -108,13 +144,49 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
||||
);
|
||||
}
|
||||
|
||||
const markdown = turndownService.turndown(text);
|
||||
const chunks = splitText(scraped.content, 4000, 500);
|
||||
|
||||
let accumulatedContent = '';
|
||||
|
||||
if (chunks.length > 1) {
|
||||
try {
|
||||
await Promise.all(
|
||||
chunks.map(async (chunk) => {
|
||||
const extracted = await additionalConfig.llm.generateObject<
|
||||
typeof extractorSchema
|
||||
>({
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: extractorPrompt,
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `<queries>Summarize</queries>\n<scraped_data>${chunk}</scraped_data>`,
|
||||
},
|
||||
],
|
||||
schema: extractorSchema,
|
||||
});
|
||||
|
||||
accumulatedContent += extracted.extracted_facts + '\n';
|
||||
}),
|
||||
);
|
||||
} catch (err) {
|
||||
console.log(
|
||||
'Error during extraction, falling back to raw content',
|
||||
err,
|
||||
);
|
||||
accumulatedContent = chunks[0];
|
||||
}
|
||||
} else {
|
||||
accumulatedContent = scraped.content;
|
||||
}
|
||||
|
||||
results.push({
|
||||
content: markdown,
|
||||
content: accumulatedContent,
|
||||
metadata: {
|
||||
url,
|
||||
title: title,
|
||||
title: scraped.title,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
@@ -122,7 +194,7 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
|
||||
content: `Failed to fetch content from ${url}: ${error}`,
|
||||
metadata: {
|
||||
url,
|
||||
title: `Error fetching ${url}`,
|
||||
title: `Error scraping ${url}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
import z from 'zod';
|
||||
import { ResearchAction } from '../../../types';
|
||||
import { ResearchBlock } from '@/lib/types';
|
||||
import { executeSearch } from './baseSearch';
|
||||
|
||||
const schema = z.object({
|
||||
queries: z.array(z.string()).describe('List of academic search queries'),
|
||||
});
|
||||
|
||||
const academicSearchDescription = `
|
||||
Use this tool to perform academic searches for scholarly articles, papers, and research studies relevant to the user's query. Provide a list of concise search queries that will help gather comprehensive academic information on the topic at hand.
|
||||
You can provide up to 3 queries at a time. Make sure the queries are specific and relevant to the user's needs.
|
||||
|
||||
For example, if the user is interested in recent advancements in renewable energy, your queries could be:
|
||||
1. "Recent advancements in renewable energy 2024"
|
||||
2. "Cutting-edge research on solar power technologies"
|
||||
3. "Innovations in wind energy systems"
|
||||
|
||||
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed academic information.
|
||||
`;
|
||||
|
||||
const academicSearchAction: ResearchAction<typeof schema> = {
|
||||
name: 'academic_search',
|
||||
schema: schema,
|
||||
getDescription: () => academicSearchDescription,
|
||||
getToolDescription: () =>
|
||||
"Use this tool to perform academic searches for scholarly articles, papers, and research studies relevant to the user's query. Provide a list of concise search queries that will help gather comprehensive academic information on the topic at hand.",
|
||||
enabled: (config) =>
|
||||
config.sources.includes('academic') &&
|
||||
config.classification.classification.skipSearch === false &&
|
||||
config.classification.classification.academicSearch === true,
|
||||
execute: async (input, additionalConfig) => {
|
||||
input.queries = (
|
||||
Array.isArray(input.queries) ? input.queries : [input.queries]
|
||||
).slice(0, 3);
|
||||
|
||||
const researchBlock = additionalConfig.session.getBlock(
|
||||
additionalConfig.researchBlockId,
|
||||
) as ResearchBlock | undefined;
|
||||
|
||||
if (!researchBlock) throw new Error('Failed to retrieve research block');
|
||||
|
||||
const results = await executeSearch({
|
||||
llm: additionalConfig.llm,
|
||||
embedding: additionalConfig.embedding,
|
||||
mode: additionalConfig.mode,
|
||||
queries: input.queries,
|
||||
researchBlock: researchBlock,
|
||||
session: additionalConfig.session,
|
||||
searchConfig: {
|
||||
engines: ['arxiv', 'google scholar', 'pubmed'],
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
type: 'search_results',
|
||||
results: results,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
export default academicSearchAction;
|
||||
423
src/lib/agents/search/researcher/actions/search/baseSearch.ts
Normal file
423
src/lib/agents/search/researcher/actions/search/baseSearch.ts
Normal file
@@ -0,0 +1,423 @@
|
||||
import BaseEmbedding from '@/lib/models/base/embedding';
|
||||
import BaseLLM from '@/lib/models/base/llm';
|
||||
import { searchSearxng, SearxngSearchOptions } from '@/lib/searxng';
|
||||
import SessionManager from '@/lib/session';
|
||||
import { Chunk, ResearchBlock, SearchResultsResearchBlock } from '@/lib/types';
|
||||
import { SearchAgentConfig } from '../../../types';
|
||||
import computeSimilarity from '@/lib/utils/computeSimilarity';
|
||||
import z from 'zod';
|
||||
import Scraper from '@/lib/scraper';
|
||||
import { splitText } from '@/lib/utils/splitText';
|
||||
|
||||
export const executeSearch = async (input: {
|
||||
queries: string[];
|
||||
mode: SearchAgentConfig['mode'];
|
||||
searchConfig?: SearxngSearchOptions;
|
||||
researchBlock: ResearchBlock;
|
||||
session: InstanceType<typeof SessionManager>;
|
||||
llm: BaseLLM<any>;
|
||||
embedding: BaseEmbedding<any>;
|
||||
}) => {
|
||||
const researchBlock = input.researchBlock;
|
||||
|
||||
researchBlock.data.subSteps.push({
|
||||
id: crypto.randomUUID(),
|
||||
type: 'searching',
|
||||
searching: input.queries,
|
||||
});
|
||||
|
||||
input.session.updateBlock(researchBlock.id, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
|
||||
if (input.mode === 'speed' || input.mode === 'balanced') {
|
||||
const searchResultsBlockId = crypto.randomUUID();
|
||||
let searchResultsEmitted = false;
|
||||
|
||||
const results: Chunk[] = [];
|
||||
|
||||
const search = async (q: string) => {
|
||||
const res = await searchSearxng(q, {
|
||||
...(input.searchConfig ? input.searchConfig : {}),
|
||||
});
|
||||
|
||||
let resultChunks: Chunk[] = [];
|
||||
|
||||
try {
|
||||
const queryEmbedding = (await input.embedding.embedText([q]))[0];
|
||||
|
||||
resultChunks = (
|
||||
await Promise.all(
|
||||
res.results.map(async (r) => {
|
||||
const content = r.content || r.title;
|
||||
const chunkEmbedding = (
|
||||
await input.embedding.embedText([content])
|
||||
)[0];
|
||||
|
||||
return {
|
||||
content,
|
||||
metadata: {
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
similarity: computeSimilarity(queryEmbedding, chunkEmbedding),
|
||||
embedding: chunkEmbedding,
|
||||
},
|
||||
};
|
||||
}),
|
||||
)
|
||||
).filter((c) => c.metadata.similarity > 0.5);
|
||||
} catch (err) {
|
||||
resultChunks = res.results.map((r) => {
|
||||
const content = r.content || r.title;
|
||||
|
||||
return {
|
||||
content,
|
||||
metadata: {
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
similarity: 1,
|
||||
embedding: [],
|
||||
},
|
||||
};
|
||||
});
|
||||
} finally {
|
||||
results.push(...resultChunks);
|
||||
}
|
||||
|
||||
if (!searchResultsEmitted) {
|
||||
searchResultsEmitted = true;
|
||||
|
||||
researchBlock.data.subSteps.push({
|
||||
id: searchResultsBlockId,
|
||||
type: 'search_results',
|
||||
reading: resultChunks,
|
||||
});
|
||||
|
||||
input.session.updateBlock(researchBlock.id, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
} else if (searchResultsEmitted) {
|
||||
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
||||
(step) => step.id === searchResultsBlockId,
|
||||
);
|
||||
|
||||
const subStep = researchBlock.data.subSteps[
|
||||
subStepIndex
|
||||
] as SearchResultsResearchBlock;
|
||||
|
||||
subStep.reading.push(...resultChunks);
|
||||
|
||||
input.session.updateBlock(researchBlock.id, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(input.queries.map(search));
|
||||
|
||||
results.sort((a, b) => b.metadata.similarity - a.metadata.similarity);
|
||||
|
||||
const uniqueSearchResultIndices: Set<number> = new Set();
|
||||
|
||||
for (let i = 0; i < results.length; i++) {
|
||||
let isDuplicate = false;
|
||||
|
||||
for (const indice of uniqueSearchResultIndices.keys()) {
|
||||
if (
|
||||
results[i].metadata.embedding.length === 0 ||
|
||||
results[indice].metadata.embedding.length === 0
|
||||
)
|
||||
continue;
|
||||
|
||||
const similarity = computeSimilarity(
|
||||
results[i].metadata.embedding,
|
||||
results[indice].metadata.embedding,
|
||||
);
|
||||
|
||||
if (similarity > 0.75) {
|
||||
isDuplicate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isDuplicate) {
|
||||
uniqueSearchResultIndices.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
const uniqueSearchResults = Array.from(uniqueSearchResultIndices.keys())
|
||||
.map((i) => {
|
||||
const uniqueResult = results[i];
|
||||
|
||||
delete uniqueResult.metadata.embedding;
|
||||
delete uniqueResult.metadata.similarity;
|
||||
|
||||
return uniqueResult;
|
||||
})
|
||||
.slice(0, 20);
|
||||
|
||||
return uniqueSearchResults;
|
||||
} else if (input.mode === 'quality') {
|
||||
const searchResultsBlockId = crypto.randomUUID();
|
||||
let searchResultsEmitted = false;
|
||||
|
||||
const searchResults: Chunk[] = [];
|
||||
|
||||
const search = async (q: string) => {
|
||||
const res = await searchSearxng(q, {
|
||||
...(input.searchConfig ? input.searchConfig : {}),
|
||||
});
|
||||
|
||||
let resultChunks: Chunk[] = [];
|
||||
|
||||
resultChunks = res.results.map((r) => {
|
||||
const content = r.content || r.title;
|
||||
|
||||
return {
|
||||
content,
|
||||
metadata: {
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
similarity: 1,
|
||||
embedding: [],
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
searchResults.push(...resultChunks);
|
||||
|
||||
if (!searchResultsEmitted) {
|
||||
searchResultsEmitted = true;
|
||||
|
||||
researchBlock.data.subSteps.push({
|
||||
id: searchResultsBlockId,
|
||||
type: 'search_results',
|
||||
reading: resultChunks,
|
||||
});
|
||||
|
||||
input.session.updateBlock(researchBlock.id, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
} else if (searchResultsEmitted) {
|
||||
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
||||
(step) => step.id === searchResultsBlockId,
|
||||
);
|
||||
|
||||
const subStep = researchBlock.data.subSteps[
|
||||
subStepIndex
|
||||
] as SearchResultsResearchBlock;
|
||||
|
||||
subStep.reading.push(...resultChunks);
|
||||
|
||||
input.session.updateBlock(researchBlock.id, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(input.queries.map(search));
|
||||
|
||||
const pickerPrompt = `
|
||||
Assistant is an AI search result picker. Assistant's task is to pick 2-3 of the most relevant search results based off the query which can be then scraped for information to answer the query.
|
||||
Assistant will be shared with the search results retrieved from a search engine along with the queries used to retrieve those results. Assistant will then pick maxiumum 3 of the most relevant search results based on the queries and the content of the search results. Assistant should only pick search results that are relevant to the query and can help in answering the question.
|
||||
|
||||
## Things to taken into consideration when picking the search results:
|
||||
1. Relevance to the query: The search results should be relevant to the query provided. Irrelevant results should be ignored.
|
||||
2. Content quality: The content of the search results should be of high quality and provide valuable information that can help in answering the question.
|
||||
3. Favour known and reputable sources: If there are search results from known and reputable sources that are relevant to the query, those should be prioritized.
|
||||
4. Diversity: If there are multiple search results that are relevant and of high quality, try to pick results that provide diverse perspectives or information to get a well-rounded understanding of the topic.
|
||||
5. Avoid picking search results that are too similar to each other in terms of content to maximize the amount of information gathered.
|
||||
6. Maximum 3 results: Assistant should pick a maximum of 3 search results. If there are more than 3 relevant and high-quality search results, pick the top 3 based on the above criteria. If the queries are very specific and there are only 1 or 2 relevant search results, it's okay to pick only those 1 or 2 results.
|
||||
7. Try to pick only one high quality result unless there are diverse perspective in multiple results then you can pick a maximum of 3.
|
||||
8. Analyze the title, the snippet and the URL to determine the relevant to query, quality of the content that might be present inside and the reputation of the source before picking the search result.
|
||||
|
||||
## Output format
|
||||
Assistant should output an array of indices corresponding to the search results that were picked based on the above criteria. The indices should be based on the order of the search results provided to Assistant. For example, if Assistant picks the 1st, 3rd, and 5th search results, Assistant should output [0, 2, 4].
|
||||
|
||||
<example_output>
|
||||
{
|
||||
"picked_indices": [0,2,4]
|
||||
}
|
||||
</example_output>
|
||||
`;
|
||||
|
||||
const pickerSchema = z.object({
|
||||
picked_indices: z
|
||||
.array(z.number())
|
||||
.describe(
|
||||
'The array of the picked indices to be scraped for answering',
|
||||
),
|
||||
});
|
||||
|
||||
const pickerResponse = await input.llm.generateObject<typeof pickerSchema>({
|
||||
schema: pickerSchema,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: pickerPrompt,
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `<queries>${input.queries.join(', ')}</queries>\n<search_results>${searchResults.map((result, index) => `<result indice=${index}>${JSON.stringify(result)}</result>`).join('\n')}</search_results>`,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const pickedIndices = pickerResponse.picked_indices.slice(0, 3);
|
||||
const pickedResults = pickedIndices
|
||||
.map((i) => searchResults[i])
|
||||
.filter((r) => r !== undefined);
|
||||
|
||||
const alreadyExtractedURLs: string[] = [];
|
||||
|
||||
researchBlock.data.subSteps.forEach((step) => {
|
||||
if (step.type === 'reading') {
|
||||
step.reading.forEach((chunk) => {
|
||||
alreadyExtractedURLs.push(chunk.metadata.url);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
const filteredResults = pickedResults.filter(
|
||||
(r) => !alreadyExtractedURLs.find((url) => url === r.metadata.url),
|
||||
);
|
||||
|
||||
if (filteredResults.length > 0) {
|
||||
researchBlock.data.subSteps.push({
|
||||
id: crypto.randomUUID(),
|
||||
type: 'reading',
|
||||
reading: filteredResults,
|
||||
});
|
||||
|
||||
input.session.updateBlock(researchBlock.id, [
|
||||
{
|
||||
path: '/data/subSteps',
|
||||
op: 'replace',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
const extractedFacts: Chunk[] = [];
|
||||
|
||||
const extractorPrompt = `
|
||||
Assistant is an AI information extractor. Assistant will be shared with scraped information from a website along with the queries used to retrieve that information. Assistant's task is to extract relevant facts from the scraped data to answer the queries.
|
||||
|
||||
## Things to taken into consideration when extracting information:
|
||||
1. Relevance to the query: The extracted information must dynamically adjust based on the query's intent. If the query asks "What is [X]", you must extract the definition/identity. If the query asks for "[X] specs" or "features", you must provide deep, granular technical details.
|
||||
- Example: For "What is [Product]", extract the core definition. For "[Product] capabilities", extract every technical function mentioned.
|
||||
2. Concentrate on extracting factual information that can help in answering the question rather than opinions or commentary. Ignore marketing fluff like "best-in-class" or "seamless."
|
||||
3. Noise to signal ratio: If the scraped data is noisy (headers, footers, UI text), ignore it and extract only the high-value information.
|
||||
- Example: Discard "Click for more" or "Subscribe now" messages.
|
||||
4. Avoid using filler sentences or words; extract concise, telegram-style information.
|
||||
- Example: Change "The device features a weight of only 1.2kg" to "Weight: 1.2kg."
|
||||
5. Duplicate information: If a fact appears multiple times (e.g., in a paragraph and a technical table), merge the details into a single, high-density bullet point to avoid redundancy.
|
||||
6. Numerical Data Integrity: NEVER summarize or generalize numbers, benchmarks, or table data. Extract raw values exactly as they appear.
|
||||
- Example: Do not say "Improved coding scores." Say "LiveCodeBench v6: 80.0%."
|
||||
|
||||
## Example
|
||||
For example, if the query is "What are the health benefits of green tea?" and the scraped data contains various pieces of information about green tea, Assistant should focus on extracting factual information related to the health benefits of green tea such as "Green tea contains antioxidants which can help in reducing inflammation" and ignore irrelevant information such as "Green tea is a popular beverage worldwide".
|
||||
|
||||
It can also remove filler words to reduce the sentence to "Contains antioxidants; reduces inflammation."
|
||||
|
||||
For tables/numerical data extraction, Assistant should extract the raw numerical data or the content of the table without trying to summarize it to avoid losing important details. For example, if a table lists specific battery life hours for different modes, Assistant should list every mode and its corresponding hour count rather than giving a general average.
|
||||
|
||||
Make sure the extracted facts are in bullet points format to make it easier to read and understand.
|
||||
|
||||
## Output format
|
||||
Assistant should reply with a JSON object containing a key "extracted_facts" which is a string of the bulleted facts. Return only raw JSON without markdown formatting (no \`\`\`json blocks).
|
||||
|
||||
<example_output>
|
||||
{
|
||||
"extracted_facts": "- Fact 1\n- Fact 2\n- Fact 3"
|
||||
}
|
||||
</example_output>
|
||||
`;
|
||||
|
||||
const extractorSchema = z.object({
|
||||
extracted_facts: z
|
||||
.string()
|
||||
.describe(
|
||||
'The extracted facts that are relevant to the query and can help in answering the question should be listed here in a concise manner.',
|
||||
),
|
||||
});
|
||||
|
||||
await Promise.all(
|
||||
filteredResults.map(async (result, i) => {
|
||||
try {
|
||||
const scrapedData = await Scraper.scrape(result.metadata.url).catch(
|
||||
(err) => {
|
||||
console.log('Error scraping data from', result.metadata.url, err);
|
||||
},
|
||||
);
|
||||
|
||||
if (!scrapedData) return;
|
||||
|
||||
let accumulatedContent = '';
|
||||
const chunks = splitText(scrapedData.content, 4000, 500);
|
||||
|
||||
await Promise.all(
|
||||
chunks.map(async (chunk) => {
|
||||
try {
|
||||
const extractorOutput = await input.llm.generateObject<
|
||||
typeof extractorSchema
|
||||
>({
|
||||
schema: extractorSchema,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: extractorPrompt,
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `<queries>${input.queries.join(', ')}</queries>\n<scraped_data>${chunk}</scraped_data>`,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
accumulatedContent += extractorOutput.extracted_facts + '\n';
|
||||
} catch (err) {
|
||||
console.log('Error extracting information from chunk', err);
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
extractedFacts.push({
|
||||
...result,
|
||||
content: accumulatedContent,
|
||||
});
|
||||
} catch (err) {
|
||||
console.log(
|
||||
'Error scraping or extracting information from',
|
||||
result.metadata.url,
|
||||
err,
|
||||
);
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
return extractedFacts;
|
||||
} else {
|
||||
return [];
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,62 @@
|
||||
import z from 'zod';
|
||||
import { ResearchAction } from '../../../types';
|
||||
import { ResearchBlock } from '@/lib/types';
|
||||
import { executeSearch } from './baseSearch';
|
||||
|
||||
const schema = z.object({
|
||||
queries: z.array(z.string()).describe('List of social search queries'),
|
||||
});
|
||||
|
||||
const socialSearchDescription = `
|
||||
Use this tool to perform social media searches for relevant posts, discussions, and trends related to the user's query. Provide a list of concise search queries that will help gather comprehensive social media information on the topic at hand.
|
||||
You can provide up to 3 queries at a time. Make sure the queries are specific and relevant to the user's needs.
|
||||
|
||||
For example, if the user is interested in public opinion on electric vehicles, your queries could be:
|
||||
1. "Electric vehicles public opinion 2024"
|
||||
2. "Social media discussions on EV adoption"
|
||||
3. "Trends in electric vehicle usage"
|
||||
|
||||
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed social media information.
|
||||
`;
|
||||
|
||||
const socialSearchAction: ResearchAction<typeof schema> = {
|
||||
name: 'social_search',
|
||||
schema: schema,
|
||||
getDescription: () => socialSearchDescription,
|
||||
getToolDescription: () =>
|
||||
"Use this tool to perform social media searches for relevant posts, discussions, and trends related to the user's query. Provide a list of concise search queries that will help gather comprehensive social media information on the topic at hand.",
|
||||
enabled: (config) =>
|
||||
config.sources.includes('discussions') &&
|
||||
config.classification.classification.skipSearch === false &&
|
||||
config.classification.classification.discussionSearch === true,
|
||||
execute: async (input, additionalConfig) => {
|
||||
input.queries = (
|
||||
Array.isArray(input.queries) ? input.queries : [input.queries]
|
||||
).slice(0, 3);
|
||||
|
||||
const researchBlock = additionalConfig.session.getBlock(
|
||||
additionalConfig.researchBlockId,
|
||||
) as ResearchBlock | undefined;
|
||||
|
||||
if (!researchBlock) throw new Error('Failed to retrieve research block');
|
||||
|
||||
const results = await executeSearch({
|
||||
llm: additionalConfig.llm,
|
||||
embedding: additionalConfig.embedding,
|
||||
mode: additionalConfig.mode,
|
||||
queries: input.queries,
|
||||
researchBlock: researchBlock,
|
||||
session: additionalConfig.session,
|
||||
searchConfig: {
|
||||
engines: ['reddit'],
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
type: 'search_results',
|
||||
results: results,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
export default socialSearchAction;
|
||||
@@ -1,7 +1,7 @@
|
||||
import z from 'zod';
|
||||
import { ResearchAction } from '../../types';
|
||||
import { searchSearxng } from '@/lib/searxng';
|
||||
import { Chunk, SearchResultsResearchBlock } from '@/lib/types';
|
||||
import { ResearchAction } from '../../../types';
|
||||
import { ResearchBlock } from '@/lib/types';
|
||||
import { executeSearch } from './baseSearch';
|
||||
|
||||
const actionSchema = z.object({
|
||||
type: z.literal('web_search'),
|
||||
@@ -85,96 +85,28 @@ const webSearchAction: ResearchAction<typeof actionSchema> = {
|
||||
config.sources.includes('web') &&
|
||||
config.classification.classification.skipSearch === false,
|
||||
execute: async (input, additionalConfig) => {
|
||||
input.queries = input.queries.slice(0, 3);
|
||||
input.queries = (
|
||||
Array.isArray(input.queries) ? input.queries : [input.queries]
|
||||
).slice(0, 3);
|
||||
|
||||
const researchBlock = additionalConfig.session.getBlock(
|
||||
additionalConfig.researchBlockId,
|
||||
);
|
||||
) as ResearchBlock | undefined;
|
||||
|
||||
if (researchBlock && researchBlock.type === 'research') {
|
||||
researchBlock.data.subSteps.push({
|
||||
id: crypto.randomUUID(),
|
||||
type: 'searching',
|
||||
searching: input.queries,
|
||||
if (!researchBlock) throw new Error('Failed to retrieve research block');
|
||||
|
||||
const results = await executeSearch({
|
||||
llm: additionalConfig.llm,
|
||||
embedding: additionalConfig.embedding,
|
||||
mode: additionalConfig.mode,
|
||||
queries: input.queries,
|
||||
researchBlock: researchBlock,
|
||||
session: additionalConfig.session,
|
||||
});
|
||||
|
||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
const searchResultsBlockId = crypto.randomUUID();
|
||||
let searchResultsEmitted = false;
|
||||
|
||||
let results: Chunk[] = [];
|
||||
|
||||
const search = async (q: string) => {
|
||||
const res = await searchSearxng(q);
|
||||
|
||||
const resultChunks: Chunk[] = res.results.map((r) => ({
|
||||
content: r.content || r.title,
|
||||
metadata: {
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
},
|
||||
}));
|
||||
|
||||
results.push(...resultChunks);
|
||||
|
||||
if (
|
||||
!searchResultsEmitted &&
|
||||
researchBlock &&
|
||||
researchBlock.type === 'research'
|
||||
) {
|
||||
searchResultsEmitted = true;
|
||||
|
||||
researchBlock.data.subSteps.push({
|
||||
id: searchResultsBlockId,
|
||||
type: 'search_results',
|
||||
reading: resultChunks,
|
||||
});
|
||||
|
||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
} else if (
|
||||
searchResultsEmitted &&
|
||||
researchBlock &&
|
||||
researchBlock.type === 'research'
|
||||
) {
|
||||
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
||||
(step) => step.id === searchResultsBlockId,
|
||||
);
|
||||
|
||||
const subStep = researchBlock.data.subSteps[
|
||||
subStepIndex
|
||||
] as SearchResultsResearchBlock;
|
||||
|
||||
subStep.reading.push(...resultChunks);
|
||||
|
||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(input.queries.map(search));
|
||||
|
||||
return {
|
||||
type: 'search_results',
|
||||
results,
|
||||
results: results,
|
||||
};
|
||||
},
|
||||
};
|
||||
@@ -1,129 +0,0 @@
|
||||
import z from 'zod';
|
||||
import { ResearchAction } from '../../types';
|
||||
import { Chunk, SearchResultsResearchBlock } from '@/lib/types';
|
||||
import { searchSearxng } from '@/lib/searxng';
|
||||
|
||||
const schema = z.object({
|
||||
queries: z.array(z.string()).describe('List of social search queries'),
|
||||
});
|
||||
|
||||
const socialSearchDescription = `
|
||||
Use this tool to perform social media searches for relevant posts, discussions, and trends related to the user's query. Provide a list of concise search queries that will help gather comprehensive social media information on the topic at hand.
|
||||
You can provide up to 3 queries at a time. Make sure the queries are specific and relevant to the user's needs.
|
||||
|
||||
For example, if the user is interested in public opinion on electric vehicles, your queries could be:
|
||||
1. "Electric vehicles public opinion 2024"
|
||||
2. "Social media discussions on EV adoption"
|
||||
3. "Trends in electric vehicle usage"
|
||||
|
||||
If this tool is present and no other tools are more relevant, you MUST use this tool to get the needed social media information.
|
||||
`;
|
||||
|
||||
const socialSearchAction: ResearchAction<typeof schema> = {
|
||||
name: 'social_search',
|
||||
schema: schema,
|
||||
getDescription: () => socialSearchDescription,
|
||||
getToolDescription: () =>
|
||||
"Use this tool to perform social media searches for relevant posts, discussions, and trends related to the user's query. Provide a list of concise search queries that will help gather comprehensive social media information on the topic at hand.",
|
||||
enabled: (config) =>
|
||||
config.sources.includes('discussions') &&
|
||||
config.classification.classification.skipSearch === false &&
|
||||
config.classification.classification.discussionSearch === true,
|
||||
execute: async (input, additionalConfig) => {
|
||||
input.queries = input.queries.slice(0, 3);
|
||||
|
||||
const researchBlock = additionalConfig.session.getBlock(
|
||||
additionalConfig.researchBlockId,
|
||||
);
|
||||
|
||||
if (researchBlock && researchBlock.type === 'research') {
|
||||
researchBlock.data.subSteps.push({
|
||||
type: 'searching',
|
||||
id: crypto.randomUUID(),
|
||||
searching: input.queries,
|
||||
});
|
||||
|
||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
const searchResultsBlockId = crypto.randomUUID();
|
||||
let searchResultsEmitted = false;
|
||||
|
||||
let results: Chunk[] = [];
|
||||
|
||||
const search = async (q: string) => {
|
||||
const res = await searchSearxng(q, {
|
||||
engines: ['reddit'],
|
||||
});
|
||||
|
||||
const resultChunks: Chunk[] = res.results.map((r) => ({
|
||||
content: r.content || r.title,
|
||||
metadata: {
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
},
|
||||
}));
|
||||
|
||||
results.push(...resultChunks);
|
||||
|
||||
if (
|
||||
!searchResultsEmitted &&
|
||||
researchBlock &&
|
||||
researchBlock.type === 'research'
|
||||
) {
|
||||
searchResultsEmitted = true;
|
||||
|
||||
researchBlock.data.subSteps.push({
|
||||
id: searchResultsBlockId,
|
||||
type: 'search_results',
|
||||
reading: resultChunks,
|
||||
});
|
||||
|
||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
} else if (
|
||||
searchResultsEmitted &&
|
||||
researchBlock &&
|
||||
researchBlock.type === 'research'
|
||||
) {
|
||||
const subStepIndex = researchBlock.data.subSteps.findIndex(
|
||||
(step) => step.id === searchResultsBlockId,
|
||||
);
|
||||
|
||||
const subStep = researchBlock.data.subSteps[
|
||||
subStepIndex
|
||||
] as SearchResultsResearchBlock;
|
||||
|
||||
subStep.reading.push(...resultChunks);
|
||||
|
||||
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
||||
{
|
||||
op: 'replace',
|
||||
path: '/data/subSteps',
|
||||
value: researchBlock.data.subSteps,
|
||||
},
|
||||
]);
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(input.queries.map(search));
|
||||
|
||||
return {
|
||||
type: 'search_results',
|
||||
results,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
export default socialSearchAction;
|
||||
@@ -167,6 +167,7 @@ class Researcher {
|
||||
session: session,
|
||||
researchBlockId: researchBlockId,
|
||||
fileIds: input.config.fileIds,
|
||||
mode: input.config.mode,
|
||||
});
|
||||
|
||||
actionOutput.push(...actionResults);
|
||||
|
||||
@@ -117,6 +117,7 @@ export interface ResearchAction<
|
||||
additionalConfig: AdditionalConfig & {
|
||||
researchBlockId: string;
|
||||
fileIds: string[];
|
||||
mode: SearchAgentConfig['mode'];
|
||||
},
|
||||
) => Promise<ActionOutput>;
|
||||
}
|
||||
|
||||
@@ -90,7 +90,7 @@ const weatherWidget: Widget = {
|
||||
|
||||
const locationRes = await fetch(openStreetMapUrl, {
|
||||
headers: {
|
||||
'User-Agent': 'Perplexica',
|
||||
'User-Agent': 'Vane',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
@@ -109,7 +109,7 @@ const weatherWidget: Widget = {
|
||||
`https://api.open-meteo.com/v1/forecast?latitude=${location.lat}&longitude=${location.lon}¤t=temperature_2m,relative_humidity_2m,apparent_temperature,is_day,precipitation,rain,showers,snowfall,weather_code,cloud_cover,pressure_msl,surface_pressure,wind_speed_10m,wind_direction_10m,wind_gusts_10m&hourly=temperature_2m,precipitation_probability,precipitation,weather_code&daily=weather_code,temperature_2m_max,temperature_2m_min,precipitation_sum,precipitation_probability_max&timezone=auto&forecast_days=7`,
|
||||
{
|
||||
headers: {
|
||||
'User-Agent': 'Perplexica',
|
||||
'User-Agent': 'Vane',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
@@ -143,7 +143,7 @@ const weatherWidget: Widget = {
|
||||
`https://api.open-meteo.com/v1/forecast?latitude=${params.lat}&longitude=${params.lon}¤t=temperature_2m,relative_humidity_2m,apparent_temperature,is_day,precipitation,rain,showers,snowfall,weather_code,cloud_cover,pressure_msl,surface_pressure,wind_speed_10m,wind_direction_10m,wind_gusts_10m&hourly=temperature_2m,precipitation_probability,precipitation,weather_code&daily=weather_code,temperature_2m_max,temperature_2m_min,precipitation_sum,precipitation_probability_max&timezone=auto&forecast_days=7`,
|
||||
{
|
||||
headers: {
|
||||
'User-Agent': 'Perplexica',
|
||||
'User-Agent': 'Vane',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
@@ -152,7 +152,7 @@ const weatherWidget: Widget = {
|
||||
`https://nominatim.openstreetmap.org/reverse?lat=${params.lat}&lon=${params.lon}&format=json`,
|
||||
{
|
||||
headers: {
|
||||
'User-Agent': 'Perplexica',
|
||||
'User-Agent': 'Vane',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import path from 'node:path';
|
||||
import fs from 'fs';
|
||||
import { Config, ConfigModelProvider, UIConfigSections } from './types';
|
||||
import { hashObj } from '../serverUtils';
|
||||
import { hashObj } from '../utils/hash';
|
||||
import { getModelProvidersUIConfigSection } from '../models/providers';
|
||||
|
||||
class ConfigManager {
|
||||
|
||||
@@ -25,7 +25,9 @@ const reasoningModels = [
|
||||
'qwen3',
|
||||
'deepseek-v3.1',
|
||||
'magistral',
|
||||
'nemotron-3-nano',
|
||||
'nemotron-3',
|
||||
'nemotron-cascade-2',
|
||||
'glm-4.7-flash',
|
||||
];
|
||||
|
||||
class OllamaLLM extends BaseLLM<OllamaConfig> {
|
||||
|
||||
@@ -4,7 +4,7 @@ export const getWriterPrompt = (
|
||||
mode: 'speed' | 'balanced' | 'quality',
|
||||
) => {
|
||||
return `
|
||||
You are Perplexica, an AI model skilled in web search and crafting detailed, engaging, and well-structured answers. You excel at summarizing web pages and extracting relevant information to create professional, blog-style responses.
|
||||
You are Vane, an AI model skilled in web search and crafting detailed, engaging, and well-structured answers. You excel at summarizing web pages and extracting relevant information to create professional, blog-style responses.
|
||||
|
||||
Your task is to provide answers that are:
|
||||
- **Informative and relevant**: Thoroughly address the user's query using the given context.
|
||||
|
||||
116
src/lib/scraper.ts
Normal file
116
src/lib/scraper.ts
Normal file
@@ -0,0 +1,116 @@
|
||||
import { JSDOM } from 'jsdom';
|
||||
import { Readability } from '@mozilla/readability';
|
||||
import { Mutex } from 'async-mutex';
|
||||
|
||||
class Scraper {
|
||||
private static browser: any | undefined;
|
||||
private static IDLE_KILL_TIMEOUT = 30000;
|
||||
private static NAVIGATION_TIMEOUT = 20000;
|
||||
private static idleTimeout: NodeJS.Timeout | undefined;
|
||||
private static browserMutex = new Mutex();
|
||||
private static userCount = 0;
|
||||
|
||||
private static async initBrowser() {
|
||||
await this.browserMutex.runExclusive(async () => {
|
||||
if (!this.browser) {
|
||||
const { chromium } = await import('playwright');
|
||||
this.browser = await chromium.launch({
|
||||
headless: true,
|
||||
channel: 'chromium-headless-shell',
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
if (this.idleTimeout) clearTimeout(this.idleTimeout);
|
||||
});
|
||||
}
|
||||
|
||||
private static scheduleIdleKill() {
|
||||
if (this.idleTimeout) clearTimeout(this.idleTimeout);
|
||||
|
||||
this.idleTimeout = setTimeout(async () => {
|
||||
await this.browserMutex.runExclusive(async () => {
|
||||
if (this.browser && this.userCount === 0) {
|
||||
{
|
||||
await this.browser.close();
|
||||
this.browser = undefined;
|
||||
}
|
||||
}
|
||||
});
|
||||
}, this.IDLE_KILL_TIMEOUT);
|
||||
}
|
||||
|
||||
static async scrape(
|
||||
url: string,
|
||||
): Promise<{ content: string; title: string }> {
|
||||
await this.initBrowser();
|
||||
|
||||
if (!this.browser) throw new Error('Browser not initialized');
|
||||
|
||||
const context = await this.browser.newContext({
|
||||
userAgent:
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
||||
});
|
||||
|
||||
await context.addInitScript(() => {
|
||||
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
||||
});
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
this.userCount++;
|
||||
|
||||
try {
|
||||
await page.goto(url, {
|
||||
waitUntil: 'domcontentloaded',
|
||||
timeout: this.NAVIGATION_TIMEOUT,
|
||||
});
|
||||
|
||||
await page
|
||||
.waitForLoadState('load', { timeout: 5000 })
|
||||
.catch(() => undefined);
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
const html = await page.content();
|
||||
|
||||
const dom = new JSDOM(html, {
|
||||
url,
|
||||
});
|
||||
|
||||
const content = new Readability(dom.window.document).parse();
|
||||
|
||||
const title = await page.title();
|
||||
|
||||
return {
|
||||
content: `
|
||||
# ${title ?? 'No title'} - ${url}
|
||||
${content?.textContent?.trim() ?? 'No content available'}
|
||||
`,
|
||||
title,
|
||||
};
|
||||
} catch (err) {
|
||||
console.log(`Error scraping ${url}:`, err);
|
||||
|
||||
return {
|
||||
title: 'Failed to scrape',
|
||||
content: `# ${url}\n\nError scraping content.`,
|
||||
};
|
||||
} finally {
|
||||
this.userCount--;
|
||||
|
||||
await context.close().catch(() => undefined);
|
||||
|
||||
if (this.userCount === 0) {
|
||||
this.scheduleIdleKill();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default Scraper;
|
||||
@@ -1,6 +1,6 @@
|
||||
import { getSearxngURL } from './config/serverRegistry';
|
||||
|
||||
interface SearxngSearchOptions {
|
||||
export interface SearxngSearchOptions {
|
||||
categories?: string[];
|
||||
engines?: string[];
|
||||
language?: string;
|
||||
@@ -38,11 +38,30 @@ export const searchSearxng = async (
|
||||
});
|
||||
}
|
||||
|
||||
const res = await fetch(url);
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), 10000);
|
||||
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`SearXNG error: ${res.statusText}`);
|
||||
}
|
||||
|
||||
const data = await res.json();
|
||||
|
||||
const results: SearxngSearchResult[] = data.results;
|
||||
const suggestions: string[] = data.suggestions;
|
||||
|
||||
return { results, suggestions };
|
||||
} catch (err: any) {
|
||||
if (err.name === 'AbortError') {
|
||||
throw new Error('SearXNG search timed out');
|
||||
}
|
||||
throw err;
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
};
|
||||
|
||||
1
src/lib/serverActions.ts
Normal file
1
src/lib/serverActions.ts
Normal file
@@ -0,0 +1 @@
|
||||
'use server';
|
||||
@@ -146,7 +146,7 @@ class UploadManager {
|
||||
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
||||
const docBuffer = fs.readFileSync(filePath);
|
||||
|
||||
const docText = await officeParser.parseOfficeAsync(docBuffer)
|
||||
const docText = (await officeParser.parseOffice(docBuffer)).toText()
|
||||
|
||||
const docSplittedText = splitText(docText, 512, 128)
|
||||
const docEmbeddings = await this.embeddingModel.embedText(docSplittedText)
|
||||
|
||||
@@ -2,8 +2,7 @@ import BaseEmbedding from "../models/base/embedding";
|
||||
import UploadManager from "./manager";
|
||||
import computeSimilarity from "../utils/computeSimilarity";
|
||||
import { Chunk } from "../types";
|
||||
import { hashObj } from "../serverUtils";
|
||||
import fs from 'fs';
|
||||
import { hashObj } from '../utils/hash';
|
||||
|
||||
type UploadStoreParams = {
|
||||
embeddingModel: BaseEmbedding<any>;
|
||||
|
||||
16
src/lib/utils/jaccardSim.ts
Normal file
16
src/lib/utils/jaccardSim.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
const computeJaccardSimilarity = (a: string, b: string): number => {
|
||||
const wordsA = a.toLowerCase().split(/\W+/);
|
||||
const wordsB = b.toLowerCase().split(/\W+/);
|
||||
|
||||
const setA = new Set(wordsA);
|
||||
const setB = new Set(wordsB);
|
||||
|
||||
if (setA.size === 0 || setB.size === 0) return 0;
|
||||
|
||||
const union = setA.union(setB);
|
||||
const intersections = setA.intersection(setB);
|
||||
|
||||
return intersections.size / union.size;
|
||||
};
|
||||
|
||||
export default computeJaccardSimilarity;
|
||||
@@ -4,7 +4,7 @@ const splitRegex = /(?<=\. |\n|! |\? |; |:\s|\d+\.\s|- |\* )/g;
|
||||
|
||||
const enc = getEncoding('cl100k_base');
|
||||
|
||||
const getTokenCount = (text: string): number => {
|
||||
export const getTokenCount = (text: string): number => {
|
||||
try {
|
||||
return enc.encode(text).length;
|
||||
} catch {
|
||||
|
||||
Reference in New Issue
Block a user