mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-18 16:02:24 +00:00
Better proxy handling.
This commit is contained in:
parent
ec50ede6c2
commit
b6b64dff86
6 changed files with 298 additions and 160 deletions
|
|
@ -5,6 +5,7 @@ import { createRef, ref } from "lit/directives/ref.js";
|
|||
import { FileText } from "lucide";
|
||||
import { EXTRACT_DOCUMENT_DESCRIPTION } from "../prompts/prompts.js";
|
||||
import { loadAttachment } from "../utils/attachment-utils.js";
|
||||
import { isCorsError } from "../utils/proxy-utils.js";
|
||||
import { registerToolRenderer, renderCollapsibleHeader, renderHeader } from "./renderer-registry.js";
|
||||
import type { ToolRenderer, ToolRenderResult } from "./types.js";
|
||||
|
||||
|
|
@ -34,13 +35,13 @@ export interface ExtractDocumentResult {
|
|||
export function createExtractDocumentTool(): AgentTool<typeof extractDocumentSchema, ExtractDocumentResult> & {
|
||||
corsProxyUrl?: string;
|
||||
} {
|
||||
return {
|
||||
const tool = {
|
||||
label: "Extract Document",
|
||||
name: "extract_document",
|
||||
corsProxyUrl: undefined, // Can be set by consumer (e.g., from user settings)
|
||||
corsProxyUrl: undefined as string | undefined, // Can be set by consumer (e.g., from user settings)
|
||||
description: EXTRACT_DOCUMENT_DESCRIPTION,
|
||||
parameters: extractDocumentSchema,
|
||||
execute: async function (_toolCallId: string, args: ExtractDocumentParams, signal?: AbortSignal) {
|
||||
execute: async (_toolCallId: string, args: ExtractDocumentParams, signal?: AbortSignal) => {
|
||||
if (signal?.aborted) {
|
||||
throw new Error("Extract document aborted");
|
||||
}
|
||||
|
|
@ -57,17 +58,11 @@ export function createExtractDocumentTool(): AgentTool<typeof extractDocumentSch
|
|||
throw new Error(`Invalid URL: ${url}`);
|
||||
}
|
||||
|
||||
// Determine fetch URL (with or without CORS proxy)
|
||||
let fetchUrl = url;
|
||||
if (this.corsProxyUrl) {
|
||||
fetchUrl = this.corsProxyUrl + encodeURIComponent(url);
|
||||
}
|
||||
|
||||
// Size limit: 50MB
|
||||
const MAX_SIZE = 50 * 1024 * 1024;
|
||||
|
||||
try {
|
||||
// Attempt to fetch the document
|
||||
// Helper function to fetch and process document
|
||||
const fetchAndProcess = async (fetchUrl: string) => {
|
||||
const response = await fetch(fetchUrl, { signal });
|
||||
|
||||
if (!response.ok) {
|
||||
|
|
@ -98,52 +93,31 @@ export function createExtractDocumentTool(): AgentTool<typeof extractDocumentSch
|
|||
);
|
||||
}
|
||||
|
||||
// Extract filename from URL
|
||||
const urlParts = url.split("/");
|
||||
let fileName = urlParts[urlParts.length - 1]?.split("?")[0] || "document";
|
||||
if (url.startsWith("https://arxiv.org/")) {
|
||||
fileName = fileName + ".pdf";
|
||||
}
|
||||
return arrayBuffer;
|
||||
};
|
||||
|
||||
// Use loadAttachment to process the document
|
||||
const attachment = await loadAttachment(arrayBuffer, fileName);
|
||||
// Try without proxy first, fallback to proxy on CORS error
|
||||
let arrayBuffer: ArrayBuffer;
|
||||
|
||||
if (!attachment.extractedText) {
|
||||
const mimeType = response.headers.get("content-type") || "unknown";
|
||||
throw new Error(
|
||||
`Document format not supported. Supported formats:\n` +
|
||||
`- PDF (.pdf)\n` +
|
||||
`- Word (.docx)\n` +
|
||||
`- Excel (.xlsx, .xls)\n` +
|
||||
`- PowerPoint (.pptx)\n\n` +
|
||||
`Detected: ${mimeType}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Determine format from attachment
|
||||
let format = "unknown";
|
||||
if (attachment.mimeType.includes("pdf")) {
|
||||
format = "pdf";
|
||||
} else if (attachment.mimeType.includes("wordprocessingml")) {
|
||||
format = "docx";
|
||||
} else if (attachment.mimeType.includes("spreadsheetml") || attachment.mimeType.includes("ms-excel")) {
|
||||
format = "xlsx";
|
||||
} else if (attachment.mimeType.includes("presentationml")) {
|
||||
format = "pptx";
|
||||
}
|
||||
|
||||
return {
|
||||
output: attachment.extractedText,
|
||||
details: {
|
||||
extractedText: attachment.extractedText,
|
||||
format,
|
||||
fileName: attachment.fileName,
|
||||
size: attachment.size,
|
||||
},
|
||||
};
|
||||
} catch (error: any) {
|
||||
// Handle CORS errors specifically
|
||||
if (error.name === "TypeError" && error.message.includes("Failed to fetch")) {
|
||||
try {
|
||||
// Attempt direct fetch first
|
||||
arrayBuffer = await fetchAndProcess(url);
|
||||
} catch (directError: any) {
|
||||
// If CORS error and proxy is available, retry with proxy
|
||||
if (isCorsError(directError) && tool.corsProxyUrl) {
|
||||
try {
|
||||
const proxiedUrl = tool.corsProxyUrl + encodeURIComponent(url);
|
||||
arrayBuffer = await fetchAndProcess(proxiedUrl);
|
||||
} catch (proxyError: any) {
|
||||
// Proxy fetch also failed - throw helpful message
|
||||
throw new Error(
|
||||
`TELL USER: Unable to fetch the document due to CORS restrictions.\n\n` +
|
||||
`Tried with proxy but it also failed: ${proxyError.message}\n\n` +
|
||||
`INSTRUCT USER: Please download the file manually and attach it to your message using the attachment button (paperclip icon) in the message input area. I can then extract the text from the attached file.`,
|
||||
);
|
||||
}
|
||||
} else if (isCorsError(directError) && !tool.corsProxyUrl) {
|
||||
// CORS error but no proxy configured
|
||||
throw new Error(
|
||||
`TELL USER: Unable to fetch the document due to CORS restrictions (the server blocks requests from browser extensions).\n\n` +
|
||||
`To fix this, you need to configure a CORS proxy in Sitegeist settings:\n` +
|
||||
|
|
@ -151,15 +125,58 @@ export function createExtractDocumentTool(): AgentTool<typeof extractDocumentSch
|
|||
`2. Find "CORS Proxy URL" setting\n` +
|
||||
`3. Enter a proxy URL like: https://corsproxy.io/?\n` +
|
||||
`4. Save and try again\n\n` +
|
||||
`Would you like me to explain what a CORS proxy is and how to set one up?`,
|
||||
`Alternatively, download the file manually and attach it to your message using the attachment button (paperclip icon).`,
|
||||
);
|
||||
} else {
|
||||
// Not a CORS error - re-throw
|
||||
throw directError;
|
||||
}
|
||||
|
||||
// Re-throw other errors
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Extract filename from URL
|
||||
const urlParts = url.split("/");
|
||||
let fileName = urlParts[urlParts.length - 1]?.split("?")[0] || "document";
|
||||
if (url.startsWith("https://arxiv.org/")) {
|
||||
fileName = fileName + ".pdf";
|
||||
}
|
||||
|
||||
// Use loadAttachment to process the document
|
||||
const attachment = await loadAttachment(arrayBuffer, fileName);
|
||||
|
||||
if (!attachment.extractedText) {
|
||||
throw new Error(
|
||||
`Document format not supported. Supported formats:\n` +
|
||||
`- PDF (.pdf)\n` +
|
||||
`- Word (.docx)\n` +
|
||||
`- Excel (.xlsx, .xls)\n` +
|
||||
`- PowerPoint (.pptx)`,
|
||||
);
|
||||
}
|
||||
|
||||
// Determine format from attachment
|
||||
let format = "unknown";
|
||||
if (attachment.mimeType.includes("pdf")) {
|
||||
format = "pdf";
|
||||
} else if (attachment.mimeType.includes("wordprocessingml")) {
|
||||
format = "docx";
|
||||
} else if (attachment.mimeType.includes("spreadsheetml") || attachment.mimeType.includes("ms-excel")) {
|
||||
format = "xlsx";
|
||||
} else if (attachment.mimeType.includes("presentationml")) {
|
||||
format = "pptx";
|
||||
}
|
||||
|
||||
return {
|
||||
output: attachment.extractedText,
|
||||
details: {
|
||||
extractedText: attachment.extractedText,
|
||||
format,
|
||||
fileName: attachment.fileName,
|
||||
size: attachment.size,
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
return tool;
|
||||
}
|
||||
|
||||
// Export a default instance
|
||||
|
|
@ -214,7 +231,7 @@ export const extractDocumentRenderer: ToolRenderer<ExtractDocumentParams, Extrac
|
|||
}
|
||||
${
|
||||
result.isError && output
|
||||
? html`<console-block .content=${output} .variant="error"></console-block>`
|
||||
? html`<console-block .content=${output} .variant=${"error"}></console-block>`
|
||||
: ""
|
||||
}
|
||||
</div>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue