diff --git a/docs/API/SEARCH.md b/docs/API/SEARCH.md index 04f11ef..0c35a81 100644 --- a/docs/API/SEARCH.md +++ b/docs/API/SEARCH.md @@ -57,7 +57,7 @@ Use the `id` field as the `providerId` and the `key` field from the models array ### Request -The API accepts a JSON object in the request body, where you define the focus mode, chat models, embedding models, and your query. +The API accepts a JSON object in the request body, where you define the enabled search `sources`, chat models, embedding models, and your query. #### Request Body Structure @@ -72,7 +72,7 @@ The API accepts a JSON object in the request body, where you define the focus mo "key": "text-embedding-3-large" }, "optimizationMode": "speed", - "focusMode": "webSearch", + "sources": ["web"], "query": "What is Perplexica", "history": [ ["human", "Hi, how are you?"], @@ -87,24 +87,25 @@ The API accepts a JSON object in the request body, where you define the focus mo ### Request Parameters -- **`chatModel`** (object, optional): Defines the chat model to be used for the query. To get available providers and models, send a GET request to `http://localhost:3000/api/providers`. +- **`chatModel`** (object, required): Defines the chat model to be used for the query. To get available providers and models, send a GET request to `http://localhost:3000/api/providers`. - `providerId` (string): The UUID of the provider. You can get this from the `/api/providers` endpoint response. - `key` (string): The model key/identifier (e.g., `gpt-4o-mini`, `llama3.1:latest`). Use the `key` value from the provider's `chatModels` array, not the display name. -- **`embeddingModel`** (object, optional): Defines the embedding model for similarity-based searching. To get available providers and models, send a GET request to `http://localhost:3000/api/providers`. +- **`embeddingModel`** (object, required): Defines the embedding model for similarity-based searching. To get available providers and models, send a GET request to `http://localhost:3000/api/providers`. - `providerId` (string): The UUID of the embedding provider. You can get this from the `/api/providers` endpoint response. - `key` (string): The embedding model key (e.g., `text-embedding-3-large`, `nomic-embed-text`). Use the `key` value from the provider's `embeddingModels` array, not the display name. -- **`focusMode`** (string, required): Specifies which focus mode to use. Available modes: +- **`sources`** (array, required): Which search sources to enable. Available values: - - `webSearch`, `academicSearch`, `writingAssistant`, `wolframAlphaSearch`, `youtubeSearch`, `redditSearch`. + - `web`, `academic`, `discussions`. - **`optimizationMode`** (string, optional): Specifies the optimization mode to control the balance between performance and quality. Available modes: - `speed`: Prioritize speed and return the fastest answer. - `balanced`: Provide a balanced answer with good speed and reasonable quality. + - `quality`: Prioritize answer quality (may be slower). - **`query`** (string, required): The search query or question. @@ -132,14 +133,14 @@ The response from the API includes both the final message and the sources used t "message": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online. Here are some key features and characteristics of Perplexica:\n\n- **AI-Powered Technology**: It utilizes advanced machine learning algorithms to not only retrieve information but also to understand the context and intent behind user queries, providing more relevant results [1][5].\n\n- **Open-Source**: Being open-source, Perplexica offers flexibility and transparency, allowing users to explore its functionalities without the constraints of proprietary software [3][10].", "sources": [ { - "pageContent": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online.", + "content": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online.", "metadata": { "title": "What is Perplexica, and how does it function as an AI-powered search ...", "url": "https://askai.glarity.app/search/What-is-Perplexica--and-how-does-it-function-as-an-AI-powered-search-engine" } }, { - "pageContent": "Perplexica is an open-source AI-powered search tool that dives deep into the internet to find precise answers.", + "content": "Perplexica is an open-source AI-powered search tool that dives deep into the internet to find precise answers.", "metadata": { "title": "Sahar Mor's Post", "url": "https://www.linkedin.com/posts/sahar-mor_a-new-open-source-project-called-perplexica-activity-7204489745668694016-ncja" @@ -158,7 +159,7 @@ Example of streamed response objects: ``` {"type":"init","data":"Stream connected"} -{"type":"sources","data":[{"pageContent":"...","metadata":{"title":"...","url":"..."}},...]} +{"type":"sources","data":[{"content":"...","metadata":{"title":"...","url":"..."}},...]} {"type":"response","data":"Perplexica is an "} {"type":"response","data":"innovative, open-source "} {"type":"response","data":"AI-powered search engine..."} @@ -174,9 +175,9 @@ Clients should process each line as a separate JSON object. The different messag ### Fields in the Response -- **`message`** (string): The search result, generated based on the query and focus mode. +- **`message`** (string): The search result, generated based on the query and enabled `sources`. - **`sources`** (array): A list of sources that were used to generate the search result. Each source includes: - - `pageContent`: A snippet of the relevant content from the source. + - `content`: A snippet of the relevant content from the source. - `metadata`: Metadata about the source, including: - `title`: The title of the webpage. - `url`: The URL of the webpage. @@ -185,5 +186,5 @@ Clients should process each line as a separate JSON object. The different messag If an error occurs during the search process, the API will return an appropriate error message with an HTTP status code. -- **400**: If the request is malformed or missing required fields (e.g., no focus mode or query). +- **400**: If the request is malformed or missing required fields (e.g., no `sources` or `query`). - **500**: If an internal server error occurs during the search. diff --git a/src/app/api/search/route.ts b/src/app/api/search/route.ts index dc1d0b8..0991268 100644 --- a/src/app/api/search/route.ts +++ b/src/app/api/search/route.ts @@ -1,12 +1,13 @@ import ModelRegistry from '@/lib/models/registry'; import { ModelWithProvider } from '@/lib/models/types'; import SessionManager from '@/lib/session'; -import SearchAgent from '@/lib/agents/search'; import { ChatTurnMessage } from '@/lib/types'; +import { SearchSources } from '@/lib/agents/search/types'; +import APISearchAgent from '@/lib/agents/search/api'; interface ChatRequestBody { - optimizationMode: 'speed' | 'balanced'; - focusMode: string; + optimizationMode: 'speed' | 'balanced' | 'quality'; + sources: SearchSources[]; chatModel: ModelWithProvider; embeddingModel: ModelWithProvider; query: string; @@ -19,15 +20,15 @@ export const POST = async (req: Request) => { try { const body: ChatRequestBody = await req.json(); - if (!body.focusMode || !body.query) { + if (!body.sources || !body.query) { return Response.json( - { message: 'Missing focus mode or query' }, + { message: 'Missing sources or query' }, { status: 400 }, ); } body.history = body.history || []; - body.optimizationMode = body.optimizationMode || 'balanced'; + body.optimizationMode = body.optimizationMode || 'speed'; body.stream = body.stream || false; const registry = new ModelRegistry(); @@ -48,18 +49,21 @@ export const POST = async (req: Request) => { const session = SessionManager.createSession(); - const agent = new SearchAgent(); + const agent = new APISearchAgent(); agent.searchAsync(session, { chatHistory: history, config: { embedding: embeddings, llm: llm, - sources: ['web', 'discussions', 'academic'], - mode: 'balanced', + sources: body.sources, + mode: body.optimizationMode, fileIds: [], + systemInstructions: body.systemInstructions || '', }, followUp: body.query, + chatId: crypto.randomUUID(), + messageId: crypto.randomUUID(), }); if (!body.stream) { @@ -71,36 +75,37 @@ export const POST = async (req: Request) => { let message = ''; let sources: any[] = []; - session.addListener('data', (data: string) => { - try { - const parsedData = JSON.parse(data); - if (parsedData.type === 'response') { - message += parsedData.data; - } else if (parsedData.type === 'sources') { - sources = parsedData.data; + session.subscribe((event: string, data: Record) => { + if (event === 'data') { + try { + if (data.type === 'response') { + message += data.data; + } else if (data.type === 'searchResults') { + sources = data.data; + } + } catch (error) { + reject( + Response.json( + { message: 'Error parsing data' }, + { status: 500 }, + ), + ); } - } catch (error) { + } + + if (event === 'end') { + resolve(Response.json({ message, sources }, { status: 200 })); + } + + if (event === 'error') { reject( Response.json( - { message: 'Error parsing data' }, + { message: 'Search error', error: data }, { status: 500 }, ), ); } }); - - session.addListener('end', () => { - resolve(Response.json({ message, sources }, { status: 200 })); - }); - - session.addListener('error', (error: any) => { - reject( - Response.json( - { message: 'Search error', error }, - { status: 500 }, - ), - ); - }); }, ); } @@ -131,54 +136,54 @@ export const POST = async (req: Request) => { } catch (error) {} }); - session.addListener('data', (data: string) => { - if (signal.aborted) return; + session.subscribe((event: string, data: Record) => { + if (event === 'data') { + if (signal.aborted) return; - try { - const parsedData = JSON.parse(data); - - if (parsedData.type === 'response') { - controller.enqueue( - encoder.encode( - JSON.stringify({ - type: 'response', - data: parsedData.data, - }) + '\n', - ), - ); - } else if (parsedData.type === 'sources') { - sources = parsedData.data; - controller.enqueue( - encoder.encode( - JSON.stringify({ - type: 'sources', - data: sources, - }) + '\n', - ), - ); + try { + if (data.type === 'response') { + controller.enqueue( + encoder.encode( + JSON.stringify({ + type: 'response', + data: data.data, + }) + '\n', + ), + ); + } else if (data.type === 'searchResults') { + sources = data.data; + controller.enqueue( + encoder.encode( + JSON.stringify({ + type: 'sources', + data: sources, + }) + '\n', + ), + ); + } + } catch (error) { + controller.error(error); } - } catch (error) { - controller.error(error); } - }); - session.addListener('end', () => { - if (signal.aborted) return; + if (event === 'end') { + if (signal.aborted) return; - controller.enqueue( - encoder.encode( - JSON.stringify({ - type: 'done', - }) + '\n', - ), - ); - controller.close(); - }); + controller.enqueue( + encoder.encode( + JSON.stringify({ + type: 'done', + }) + '\n', + ), + ); + controller.close(); + } - session.addListener('error', (error: any) => { - if (signal.aborted) return; + if (event === 'error') { + if (signal.aborted) return; - controller.error(error); + controller.error(data); + } }); }, cancel() {