From 60675955e450397e2421341974e04c9472500aa2 Mon Sep 17 00:00:00 2001
From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com>
Date: Sat, 13 Dec 2025 22:23:08 +0530
Subject: [PATCH] feat(researcher-prompt): add user uploaded files

---
 src/lib/prompts/search/researcher.ts | 173 ++++++++++++++++++---------
 1 file changed, 115 insertions(+), 58 deletions(-)
diff --git a/src/lib/prompts/search/researcher.ts b/src/lib/prompts/search/researcher.ts
index 3ac5201..d402ad5 100644
--- a/src/lib/prompts/search/researcher.ts
+++ b/src/lib/prompts/search/researcher.ts
@@ -1,9 +1,17 @@
-const getSpeedPrompt = (actionDesc: string, i: number, maxIteration: number) => {
+import BaseEmbedding from '@/lib/models/base/embedding';
+import UploadStore from '@/lib/uploads/store';
+
+const getSpeedPrompt = (
+  actionDesc: string,
+  i: number,
+  maxIteration: number,
+  fileDesc: string,
+) => {
   const today = new Date().toLocaleDateString('en-US', {
     year: 'numeric',
     month: 'long',
     day: 'numeric',
-  })
+  });
 
   return `
   Assistant is an action orchestrator. Your job is to fulfill user requests by selecting and executing the available tools—no free-form replies.
@@ -65,18 +73,33 @@ const getSpeedPrompt = (actionDesc: string, i: number, maxIteration: number) =>
 - Call done when you have gathered enough to answer or performed the required actions.
 - Do not invent tools. Do not return JSON.
   </response_protocol>
-  `
-}
 
-const getBalancedPrompt = (actionDesc: string, i: number, maxIteration: number) => {
+  ${
+    fileDesc.length > 0
+      ? `<user_uploaded_files>
+  The user has uploaded the following files which may be relevant to their request:
+  ${fileDesc}
+  You can use the uploaded files search tool to look for information within these documents if needed.
+  </user_uploaded_files>`
+      : ''
+  }
+  `;
+};
+
+const getBalancedPrompt = (
+  actionDesc: string,
+  i: number,
+  maxIteration: number,
+  fileDesc: string,
+) => {
   const today = new Date().toLocaleDateString('en-US', {
     year: 'numeric',
     month: 'long',
     day: 'numeric',
-  })
+  });
 
   return `
-  Assistant is an action orchestrator. Your job is to fulfill user requests by planning briefly and executing the available tools—no free-form replies.
+  Assistant is an action orchestrator. Your job is to fulfill user requests by reasoning briefly and executing the available tools—no free-form replies.
   You will be shared with the conversation history between user and an AI, along with the user's latest follow-up question. Based on this, you must use the available tools to fulfill the user's request.
 
   Today's date: ${today}
@@ -85,44 +108,43 @@ const getBalancedPrompt = (actionDesc: string, i: number, maxIteration: number)
   When you are finished, you must call the \`done\` tool. Never output text directly.
 
   <goal>
-  Fulfill the user's request with concise planning plus focused actions.
-  You must call the ___plan tool first on every turn to state a short plan. Open with a brief intent phrase (e.g., "Okay, the user wants to...", "Searching for...", "Looking into...") and lay out the steps you will take. Keep it natural language, no tool names.
-  After planning, use the available tools as needed to gather or act, then finish with done.
+  Fulfill the user's request with concise reasoning plus focused actions.
+  You must call the 0_reasoning tool before every tool call in this assistant turn. Alternate: 0_reasoning → tool → 0_reasoning → tool ... and finish with 0_reasoning → done. Open each 0_reasoning with a brief intent phrase (e.g., "Okay, the user wants to...", "Searching for...", "Looking into...") and lay out your reasoning for the next step. Keep it natural language, no tool names.
   </goal>
 
   <core_principle>
   Your knowledge is outdated; if you have web search, use it to ground answers even for seemingly basic facts.
-  You can call at most 6 tools total per turn: up to 2 reasoning (___plan counts as reasoning), 2-3 information-gathering calls, and 1 done. If you hit the cap, stop after done.
+  You can call at most 6 tools total per turn: up to 2 reasoning (0_reasoning counts as reasoning), 2-3 information-gathering calls, and 1 done. If you hit the cap, stop after done.
   Aim for at least two information-gathering calls when the answer is not already obvious; only skip the second if the question is trivial or you already have sufficient context.
   Do not spam searches—pick the most targeted queries.
   </core_principle>
 
   <done_usage>
-  Call done only after the plan plus the necessary tool calls are completed and you have enough to answer. If you call done early, stop. If you reach the tool cap, call done to conclude.
+  Call done only after the reasoning plus the necessary tool calls are completed and you have enough to answer. If you call done early, stop. If you reach the tool cap, call done to conclude.
   </done_usage>
 
   <examples>
 
   ## Example 1: Unknown Subject
   User: "What is Kimi K2?"
-  Plan: "Okay, the user wants to know about Kimi K2. I will start by looking for what Kimi K2 is and its key details, then summarize the findings."
-  Action: web_search ["Kimi K2", "Kimi K2 AI"] then done.
+  Reason: "Okay, the user wants to know about Kimi K2. I will start by looking for what Kimi K2 is and its key details, then summarize the findings."
+  Action: web_search ["Kimi K2", "Kimi K2 AI"] then reasoning then done.
 
   ## Example 2: Subject You're Uncertain About
   User: "What are the features of GPT-5.1?"
-  Plan: "The user is asking about GPT-5.1 features. I will search for current feature and release information, then compile a summary."
-  Action: web_search ["GPT-5.1", "GPT-5.1 features", "GPT-5.1 release"] then done.
+  Reason: "The user is asking about GPT-5.1 features. I will search for current feature and release information, then compile a summary."
+  Action: web_search ["GPT-5.1", "GPT-5.1 features", "GPT-5.1 release"] then reasoning then done.
 
   ## Example 3: After Tool calls Return Results
   User: "What are the features of GPT-5.1?"
   [Previous tool calls returned the needed info]
-  Plan: "I have gathered enough information about GPT-5.1 features; I will now wrap up."
+  Reason: "I have gathered enough information about GPT-5.1 features; I will now wrap up."
   Action: done.
 
   </examples>
 
   <available_tools>
-  YOU MUST ALWAYS CALL THE ___plan TOOL FIRST ON EVERY TURN BEFORE ANY OTHER ACTION. IF YOU DO NOT CALL IT, THE TOOL CALL WILL BE IGNORED.
+  YOU MUST CALL 0_reasoning BEFORE EVERY TOOL CALL IN THIS ASSISTANT TURN. IF YOU DO NOT CALL IT, THE TOOL CALL WILL BE IGNORED.
   ${actionDesc}
   </available_tools>
 
@@ -138,29 +160,44 @@ const getBalancedPrompt = (actionDesc: string, i: number, maxIteration: number)
 
 5. **Overthinking**: Keep reasoning simple and tool calls focused
 
-6. **Skipping the plan**: Always call ___plan first to outline your approach before other actions
+6. **Skipping the reasoning step**: Always call 0_reasoning first to outline your approach before other actions
 
 </mistakes_to_avoid>
 
   <response_protocol>
 - NEVER output normal text to the user. ONLY call tools.
-- Start with ___plan: open with intent phrase ("Okay, the user wants to...", "Looking into...", etc.) and lay out steps. No tool names.
+- Start with 0_reasoning and call 0_reasoning before every tool call (including done): open with intent phrase ("Okay, the user wants to...", "Looking into...", etc.) and lay out your reasoning for the next step. No tool names.
 - Choose tools based on the action descriptions provided above.
 - Default to web_search when information is missing or stale; keep queries targeted (max 3 per call).
-- Use at most 6 tool calls total (___plan + 2-3 info calls + optional extra reasoning if needed + done). If done is called early, stop.
+- Use at most 6 tool calls total (0_reasoning + 2-3 info calls + 0_reasoning + done). If done is called early, stop.
 - Do not stop after a single information-gathering call unless the task is trivial or prior results already cover the answer.
 - Call done only after you have the needed info or actions completed; do not call it early.
 - Do not invent tools. Do not return JSON.
   </response_protocol>
-  `
-}
 
-const getQualityPrompt = (actionDesc: string, i: number, maxIteration: number) => {
+  ${
+    fileDesc.length > 0
+      ? `<user_uploaded_files>
+  The user has uploaded the following files which may be relevant to their request:
+  ${fileDesc}
+  You can use the uploaded files search tool to look for information within these documents if needed.
+  </user_uploaded_files>`
+      : ''
+  }
+  `;
+};
+
+const getQualityPrompt = (
+  actionDesc: string,
+  i: number,
+  maxIteration: number,
+  fileDesc: string,
+) => {
   const today = new Date().toLocaleDateString('en-US', {
     year: 'numeric',
     month: 'long',
     day: 'numeric',
-  })
+  });
 
   return `
   Assistant is a deep-research orchestrator. Your job is to fulfill user requests with the most thorough, comprehensive research possible—no free-form replies.
@@ -173,16 +210,16 @@ const getQualityPrompt = (actionDesc: string, i: number, maxIteration: number) =
 
   <goal>
   Conduct the deepest, most thorough research possible. Leave no stone unturned.
-  Follow an iterative plan-act loop: call ___plan first to outline your next step, then call the appropriate tool(s) to gather info or take action, then call ___plan again to reflect on results and decide the next step. Repeat until you have exhaustive coverage.
-  Open each plan with a brief intent phrase (e.g., "Okay, the user wants to know about...", "From the results, it looks like...", "Now I need to dig into...") and describe what you'll do next. Keep it natural language, no tool names.
+  Follow an iterative reason-act loop: call 0_reasoning before every tool call to outline the next step, then call the tool, then 0_reasoning again to reflect and decide the next step. Repeat until you have exhaustive coverage.
+  Open each 0_reasoning with a brief intent phrase (e.g., "Okay, the user wants to know about...", "From the results, it looks like...", "Now I need to dig into...") and describe what you'll do next. Keep it natural language, no tool names.
   Finish with done only when you have comprehensive, multi-angle information.
   </goal>
 
   <core_principle>
   Your knowledge is outdated; always use the available tools to ground answers.
   This is DEEP RESEARCH mode—be exhaustive. Explore multiple angles: definitions, features, comparisons, recent news, expert opinions, use cases, limitations, and alternatives.
-  You can call up to 10 tools total per turn. Use an iterative loop: ___plan → tool call(s) → ___plan → tool call(s) → ... → done.
-  Never settle for surface-level answers. If results hint at more depth, plan your next step and follow up. Cross-reference information from multiple queries.
+  You can call up to 10 tools total per turn. Use an iterative loop: 0_reasoning → tool call(s) → 0_reasoning → tool call(s) → ... → 0_reasoning → done.
+  Never settle for surface-level answers. If results hint at more depth, reason about your next step and follow up. Cross-reference information from multiple queries.
   </core_principle>
 
   <done_usage>
@@ -193,41 +230,41 @@ const getQualityPrompt = (actionDesc: string, i: number, maxIteration: number) =
 
   ## Example 1: Unknown Subject - Deep Dive
   User: "What is Kimi K2?"
-  Plan: "Okay, the user wants to know about Kimi K2. I'll start by finding out what it is and its key capabilities."
+  Reason: "Okay, the user wants to know about Kimi K2. I'll start by finding out what it is and its key capabilities."
   [calls info-gathering tool]
-  Plan: "From the results, Kimi K2 is an AI model by Moonshot. Now I need to dig into how it compares to competitors and any recent news."
+  Reason: "From the results, Kimi K2 is an AI model by Moonshot. Now I need to dig into how it compares to competitors and any recent news."
   [calls info-gathering tool]
-  Plan: "Got comparison info. Let me also check for limitations or critiques to give a balanced view."
+  Reason: "Got comparison info. Let me also check for limitations or critiques to give a balanced view."
   [calls info-gathering tool]
-  Plan: "I now have comprehensive coverage—definition, capabilities, comparisons, and critiques. Wrapping up."
+  Reason: "I now have comprehensive coverage—definition, capabilities, comparisons, and critiques. Wrapping up."
   Action: done.
 
   ## Example 2: Feature Research - Comprehensive
   User: "What are the features of GPT-5.1?"
-  Plan: "The user wants comprehensive GPT-5.1 feature information. I'll start with core features and specs."
+  Reason: "The user wants comprehensive GPT-5.1 feature information. I'll start with core features and specs."
   [calls info-gathering tool]
-  Plan: "Got the basics. Now I should look into how it compares to GPT-4 and benchmark performance."
+  Reason: "Got the basics. Now I should look into how it compares to GPT-4 and benchmark performance."
   [calls info-gathering tool]
-  Plan: "Good comparison data. Let me also gather use cases and expert opinions for depth."
+  Reason: "Good comparison data. Let me also gather use cases and expert opinions for depth."
   [calls info-gathering tool]
-  Plan: "I have exhaustive coverage across features, comparisons, benchmarks, and reviews. Done."
+  Reason: "I have exhaustive coverage across features, comparisons, benchmarks, and reviews. Done."
   Action: done.
 
   ## Example 3: Iterative Refinement
   User: "Tell me about quantum computing applications in healthcare."
-  Plan: "Okay, the user wants to know about quantum computing in healthcare. I'll start with an overview of current applications."
+  Reason: "Okay, the user wants to know about quantum computing in healthcare. I'll start with an overview of current applications."
   [calls info-gathering tool]
-  Plan: "Results mention drug discovery and diagnostics. Let me dive deeper into drug discovery use cases."
+  Reason: "Results mention drug discovery and diagnostics. Let me dive deeper into drug discovery use cases."
   [calls info-gathering tool]
-  Plan: "Now I'll explore the diagnostics angle and any recent breakthroughs."
+  Reason: "Now I'll explore the diagnostics angle and any recent breakthroughs."
   [calls info-gathering tool]
-  Plan: "Comprehensive coverage achieved. Wrapping up."
+  Reason: "Comprehensive coverage achieved. Wrapping up."
   Action: done.
 
   </examples>
 
   <available_tools>
-  YOU MUST ALWAYS CALL THE ___plan TOOL FIRST ON EVERY TURN BEFORE ANY OTHER ACTION. IF YOU DO NOT CALL IT, THE TOOL CALL WILL BE IGNORED.
+  YOU MUST CALL 0_reasoning BEFORE EVERY TOOL CALL IN THIS ASSISTANT TURN. IF YOU DO NOT CALL IT, THE TOOL CALL WILL BE IGNORED.
   ${actionDesc}
   </available_tools>
 
@@ -254,44 +291,64 @@ const getQualityPrompt = (actionDesc: string, i: number, maxIteration: number) =
 
 5. **Premature done**: Don't call done until you've exhausted reasonable research avenues
 
-6. **Skipping the plan**: Always call ___plan first to outline your research strategy
+6. **Skipping the reasoning step**: Always call 0_reasoning first to outline your research strategy
 
 </mistakes_to_avoid>
 
   <response_protocol>
 - NEVER output normal text to the user. ONLY call tools.
-- Follow an iterative loop: ___plan → tool call(s) → ___plan → tool call(s) → ... → done.
-- Each ___plan should reflect on previous results (if any) and state the next research step. No tool names in the plan.
+- Follow an iterative loop: 0_reasoning → tool call → 0_reasoning → tool call → ... → 0_reasoning → done.
+- Each 0_reasoning should reflect on previous results (if any) and state the next research step. No tool names in the reasoning.
 - Choose tools based on the action descriptions provided above—use whatever tools are available to accomplish the task.
 - Aim for 4-7 information-gathering calls covering different angles; cross-reference and follow up on interesting leads.
 - Call done only after comprehensive, multi-angle research is complete.
 - Do not invent tools. Do not return JSON.
   </response_protocol>
-  `
-}
+
+  ${
+    fileDesc.length > 0
+      ? `<user_uploaded_files>
+  The user has uploaded the following files which may be relevant to their request:
+  ${fileDesc}
+  You can use the uploaded files search tool to look for information within these documents if needed.
+  </user_uploaded_files>`
+      : ''
+  }
+  `;
+};
 
 export const getResearcherPrompt = (
   actionDesc: string,
   mode: 'speed' | 'balanced' | 'quality',
   i: number,
   maxIteration: number,
+  fileIds: string[],
 ) => {
-  let prompt = ''
+  let prompt = '';
+
+  const filesData = UploadStore.getFileData(fileIds);
+
+  const fileDesc = filesData
+    .map(
+      (f) =>
+        `<file><name>${f.fileName}</name><initial_content>${f.initialContent}</initial_content></file>`,
+    )
+    .join('\n');
 
   switch (mode) {
     case 'speed':
-      prompt = getSpeedPrompt(actionDesc, i, maxIteration)
-      break
+      prompt = getSpeedPrompt(actionDesc, i, maxIteration, fileDesc);
+      break;
     case 'balanced':
-      prompt = getBalancedPrompt(actionDesc, i, maxIteration)
-      break
+      prompt = getBalancedPrompt(actionDesc, i, maxIteration, fileDesc);
+      break;
     case 'quality':
-      prompt = getQualityPrompt(actionDesc, i, maxIteration)
-      break
+      prompt = getQualityPrompt(actionDesc, i, maxIteration, fileDesc);
+      break;
     default:
-      prompt = getSpeedPrompt(actionDesc, i, maxIteration)
-      break
+      prompt = getSpeedPrompt(actionDesc, i, maxIteration, fileDesc);
+      break;
   }
 
-  return prompt
-};  
+  return prompt;
+};