mirror of
https://github.com/harivansh-afk/clanker-agent.git
synced 2026-04-21 23:04:41 +00:00
refactor: finish companion rename migration
Complete the remaining pi-to-companion rename across companion-os, web, vm-orchestrator, docker, and archived fixtures. Verification: - semantic rg sweeps for Pi/piConfig/getPi/.pi runtime references - npm run check in apps/companion-os (fails in this worktree: biome not found) Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
parent
e8fe3d54af
commit
536241053c
303 changed files with 3603 additions and 3602 deletions
423
packages/companion-channels/src/adapters/slack.ts
Normal file
423
packages/companion-channels/src/adapters/slack.ts
Normal file
|
|
@ -0,0 +1,423 @@
|
|||
/**
|
||||
* companion-channels — Built-in Slack adapter (bidirectional).
|
||||
*
|
||||
* Outgoing: Slack Web API chat.postMessage.
|
||||
* Incoming: Socket Mode (WebSocket) for events + slash commands.
|
||||
*
|
||||
* Supports:
|
||||
* - Text messages (channels, groups, DMs, multi-party DMs)
|
||||
* - @mentions (app_mention events)
|
||||
* - Slash commands (/aivena by default)
|
||||
* - Typing indicators (chat action)
|
||||
* - Thread replies (when replying in threads)
|
||||
* - Message splitting for long messages (>3000 chars)
|
||||
* - Channel allowlisting (optional)
|
||||
*
|
||||
* Requires:
|
||||
* - App-level token (xapp-...) for Socket Mode — in settings under companion-channels.slack.appToken
|
||||
* - Bot token (xoxb-...) for Web API — in settings under companion-channels.slack.botToken
|
||||
* - Socket Mode enabled in app settings
|
||||
*
|
||||
* Config in ~/.companion/agent/settings.json:
|
||||
* {
|
||||
* "companion-channels": {
|
||||
* "adapters": {
|
||||
* "slack": {
|
||||
* "type": "slack",
|
||||
* "allowedChannelIds": ["C0123456789"],
|
||||
* "respondToMentionsOnly": true,
|
||||
* "slashCommand": "/aivena"
|
||||
* }
|
||||
* },
|
||||
* "slack": {
|
||||
* "appToken": "xapp-1-...",
|
||||
* "botToken": "xoxb-..."
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
||||
import { SocketModeClient } from "@slack/socket-mode";
|
||||
import { WebClient } from "@slack/web-api";
|
||||
import { getChannelSetting } from "../config.js";
|
||||
import type {
|
||||
AdapterConfig,
|
||||
ChannelAdapter,
|
||||
ChannelMessage,
|
||||
OnIncomingMessage,
|
||||
} from "../types.js";
|
||||
|
||||
const MAX_LENGTH = 3000; // Slack block text limit; actual API limit is 4000 but leave margin
|
||||
|
||||
// ── Slack event types (subset) ──────────────────────────────────
|
||||
|
||||
interface SlackMessageEvent {
|
||||
type: string;
|
||||
subtype?: string;
|
||||
channel: string;
|
||||
user?: string;
|
||||
text?: string;
|
||||
ts: string;
|
||||
thread_ts?: string;
|
||||
channel_type?: string;
|
||||
bot_id?: string;
|
||||
}
|
||||
|
||||
interface SlackMentionEvent {
|
||||
type: string;
|
||||
channel: string;
|
||||
user: string;
|
||||
text: string;
|
||||
ts: string;
|
||||
thread_ts?: string;
|
||||
}
|
||||
|
||||
interface SlackCommandPayload {
|
||||
command: string;
|
||||
text: string;
|
||||
user_id: string;
|
||||
user_name: string;
|
||||
channel_id: string;
|
||||
channel_name: string;
|
||||
trigger_id: string;
|
||||
}
|
||||
|
||||
// ── Factory ─────────────────────────────────────────────────────
|
||||
|
||||
export type SlackAdapterLogger = (
|
||||
event: string,
|
||||
data: Record<string, unknown>,
|
||||
level?: string,
|
||||
) => void;
|
||||
|
||||
export function createSlackAdapter(
|
||||
config: AdapterConfig,
|
||||
cwd?: string,
|
||||
log?: SlackAdapterLogger,
|
||||
): ChannelAdapter {
|
||||
// Tokens live in settings under companion-channels.slack (not in the adapter config block)
|
||||
const appToken =
|
||||
(cwd ? (getChannelSetting(cwd, "slack.appToken") as string) : null) ??
|
||||
(config.appToken as string);
|
||||
const botToken =
|
||||
(cwd ? (getChannelSetting(cwd, "slack.botToken") as string) : null) ??
|
||||
(config.botToken as string);
|
||||
|
||||
const allowedChannelIds = config.allowedChannelIds as string[] | undefined;
|
||||
const respondToMentionsOnly = config.respondToMentionsOnly === true;
|
||||
const slashCommand = (config.slashCommand as string) ?? "/aivena";
|
||||
|
||||
if (!appToken)
|
||||
throw new Error(
|
||||
"Slack adapter requires appToken (xapp-...) in settings under companion-channels.slack.appToken",
|
||||
);
|
||||
if (!botToken)
|
||||
throw new Error(
|
||||
"Slack adapter requires botToken (xoxb-...) in settings under companion-channels.slack.botToken",
|
||||
);
|
||||
|
||||
let socketClient: SocketModeClient | null = null;
|
||||
const webClient = new WebClient(botToken);
|
||||
let botUserId: string | null = null;
|
||||
|
||||
// ── Helpers ─────────────────────────────────────────────
|
||||
|
||||
function isAllowed(channelId: string): boolean {
|
||||
if (!allowedChannelIds || allowedChannelIds.length === 0) return true;
|
||||
return allowedChannelIds.includes(channelId);
|
||||
}
|
||||
|
||||
/** Strip the bot's own @mention from message text */
|
||||
function stripBotMention(text: string): string {
|
||||
if (!botUserId) return text;
|
||||
// Slack formats mentions as <@U12345>
|
||||
return text.replace(new RegExp(`<@${botUserId}>\\s*`, "g"), "").trim();
|
||||
}
|
||||
|
||||
/** Build metadata common to all incoming messages */
|
||||
function buildMetadata(
|
||||
event: {
|
||||
channel?: string;
|
||||
user?: string;
|
||||
ts?: string;
|
||||
thread_ts?: string;
|
||||
channel_type?: string;
|
||||
},
|
||||
extra?: Record<string, unknown>,
|
||||
): Record<string, unknown> {
|
||||
return {
|
||||
channelId: event.channel,
|
||||
userId: event.user,
|
||||
timestamp: event.ts,
|
||||
threadTs: event.thread_ts,
|
||||
channelType: event.channel_type,
|
||||
...extra,
|
||||
};
|
||||
}
|
||||
|
||||
// ── Sending ─────────────────────────────────────────────
|
||||
|
||||
async function sendSlack(
|
||||
channelId: string,
|
||||
text: string,
|
||||
threadTs?: string,
|
||||
): Promise<void> {
|
||||
await webClient.chat.postMessage({
|
||||
channel: channelId,
|
||||
text,
|
||||
thread_ts: threadTs,
|
||||
// Unfurl links/media is off by default to keep responses clean
|
||||
unfurl_links: false,
|
||||
unfurl_media: false,
|
||||
});
|
||||
}
|
||||
|
||||
// ── Adapter ─────────────────────────────────────────────
|
||||
|
||||
return {
|
||||
direction: "bidirectional" as const,
|
||||
|
||||
async sendTyping(_recipient: string): Promise<void> {
|
||||
// Slack doesn't have a direct "typing" API for bots in channels.
|
||||
// We can use a reaction or simply no-op. For DMs, there's no API either.
|
||||
// Best we can do is nothing — Slack bots don't show typing indicators.
|
||||
},
|
||||
|
||||
async send(message: ChannelMessage): Promise<void> {
|
||||
const prefix = message.source ? `*[${message.source}]*\n` : "";
|
||||
const full = prefix + message.text;
|
||||
const threadTs = message.metadata?.threadTs as string | undefined;
|
||||
|
||||
if (full.length <= MAX_LENGTH) {
|
||||
await sendSlack(message.recipient, full, threadTs);
|
||||
return;
|
||||
}
|
||||
|
||||
// Split long messages at newlines
|
||||
let remaining = full;
|
||||
while (remaining.length > 0) {
|
||||
if (remaining.length <= MAX_LENGTH) {
|
||||
await sendSlack(message.recipient, remaining, threadTs);
|
||||
break;
|
||||
}
|
||||
let splitAt = remaining.lastIndexOf("\n", MAX_LENGTH);
|
||||
if (splitAt < MAX_LENGTH / 2) splitAt = MAX_LENGTH;
|
||||
await sendSlack(
|
||||
message.recipient,
|
||||
remaining.slice(0, splitAt),
|
||||
threadTs,
|
||||
);
|
||||
remaining = remaining.slice(splitAt).replace(/^\n/, "");
|
||||
}
|
||||
},
|
||||
|
||||
async start(onMessage: OnIncomingMessage): Promise<void> {
|
||||
if (socketClient) return;
|
||||
|
||||
// Resolve bot user ID (for stripping self-mentions)
|
||||
try {
|
||||
const authResult = await webClient.auth.test();
|
||||
botUserId = (authResult.user_id as string) ?? null;
|
||||
} catch {
|
||||
// Non-fatal — mention stripping just won't work
|
||||
}
|
||||
|
||||
socketClient = new SocketModeClient({
|
||||
appToken,
|
||||
// Suppress noisy internal logging
|
||||
logLevel: "ERROR" as any,
|
||||
});
|
||||
|
||||
// ── Message events ──────────────────────────────
|
||||
// Socket Mode wraps events in envelopes. The client emits
|
||||
// typed events: 'message', 'app_mention', 'slash_commands', etc.
|
||||
// Each handler receives { event, body, ack, ... }
|
||||
|
||||
socketClient.on(
|
||||
"message",
|
||||
async ({
|
||||
event,
|
||||
ack,
|
||||
}: {
|
||||
event: SlackMessageEvent;
|
||||
ack: () => Promise<void>;
|
||||
}) => {
|
||||
try {
|
||||
await ack();
|
||||
|
||||
// Ignore bot messages (including our own)
|
||||
if (event.bot_id || event.subtype === "bot_message") return;
|
||||
// Ignore message_changed, message_deleted, etc.
|
||||
if (event.subtype) return;
|
||||
if (!event.text) return;
|
||||
if (!isAllowed(event.channel)) return;
|
||||
|
||||
// Skip messages that @mention the bot in channels/groups — these are
|
||||
// handled by the app_mention listener to avoid duplicate responses.
|
||||
// DMs (im) and multi-party DMs (mpim) don't fire app_mention, so we
|
||||
// must NOT skip those here.
|
||||
if (
|
||||
botUserId &&
|
||||
(event.channel_type === "channel" ||
|
||||
event.channel_type === "group") &&
|
||||
event.text.includes(`<@${botUserId}>`)
|
||||
)
|
||||
return;
|
||||
|
||||
// In channels/groups, optionally only respond to @mentions
|
||||
// (app_mention events are handled separately below)
|
||||
if (
|
||||
respondToMentionsOnly &&
|
||||
(event.channel_type === "channel" ||
|
||||
event.channel_type === "group")
|
||||
)
|
||||
return;
|
||||
|
||||
// Use channel:threadTs as sender key for threaded conversations
|
||||
const sender = event.thread_ts
|
||||
? `${event.channel}:${event.thread_ts}`
|
||||
: event.channel;
|
||||
|
||||
onMessage({
|
||||
adapter: "slack",
|
||||
sender,
|
||||
text: stripBotMention(event.text),
|
||||
metadata: buildMetadata(event, {
|
||||
eventType: "message",
|
||||
}),
|
||||
});
|
||||
} catch (err) {
|
||||
log?.(
|
||||
"slack-handler-error",
|
||||
{ handler: "message", error: String(err) },
|
||||
"ERROR",
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// ── App mention events ──────────────────────────
|
||||
socketClient.on(
|
||||
"app_mention",
|
||||
async ({
|
||||
event,
|
||||
ack,
|
||||
}: {
|
||||
event: SlackMentionEvent;
|
||||
ack: () => Promise<void>;
|
||||
}) => {
|
||||
try {
|
||||
await ack();
|
||||
|
||||
if (!isAllowed(event.channel)) return;
|
||||
|
||||
const sender = event.thread_ts
|
||||
? `${event.channel}:${event.thread_ts}`
|
||||
: event.channel;
|
||||
|
||||
onMessage({
|
||||
adapter: "slack",
|
||||
sender,
|
||||
text: stripBotMention(event.text),
|
||||
metadata: buildMetadata(event, {
|
||||
eventType: "app_mention",
|
||||
}),
|
||||
});
|
||||
} catch (err) {
|
||||
log?.(
|
||||
"slack-handler-error",
|
||||
{ handler: "app_mention", error: String(err) },
|
||||
"ERROR",
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// ── Slash commands ───────────────────────────────
|
||||
socketClient.on(
|
||||
"slash_commands",
|
||||
async ({
|
||||
body,
|
||||
ack,
|
||||
}: {
|
||||
body: SlackCommandPayload;
|
||||
ack: (response?: any) => Promise<void>;
|
||||
}) => {
|
||||
try {
|
||||
if (body.command !== slashCommand) {
|
||||
await ack();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!body.text?.trim()) {
|
||||
await ack({ text: `Usage: ${slashCommand} [your message]` });
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isAllowed(body.channel_id)) {
|
||||
await ack({
|
||||
text: "⛔ This command is not available in this channel.",
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Acknowledge immediately (Slack requires <3s response)
|
||||
await ack({ text: "🤔 Thinking..." });
|
||||
|
||||
onMessage({
|
||||
adapter: "slack",
|
||||
sender: body.channel_id,
|
||||
text: body.text.trim(),
|
||||
metadata: {
|
||||
channelId: body.channel_id,
|
||||
channelName: body.channel_name,
|
||||
userId: body.user_id,
|
||||
userName: body.user_name,
|
||||
eventType: "slash_command",
|
||||
command: body.command,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
log?.(
|
||||
"slack-handler-error",
|
||||
{ handler: "slash_commands", error: String(err) },
|
||||
"ERROR",
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// ── Interactive payloads (future: button clicks, modals) ──
|
||||
socketClient.on(
|
||||
"interactive",
|
||||
async ({
|
||||
body: _body,
|
||||
ack,
|
||||
}: {
|
||||
body: any;
|
||||
ack: () => Promise<void>;
|
||||
}) => {
|
||||
try {
|
||||
await ack();
|
||||
// TODO: handle interactive payloads (block actions, modals)
|
||||
} catch (err) {
|
||||
log?.(
|
||||
"slack-handler-error",
|
||||
{ handler: "interactive", error: String(err) },
|
||||
"ERROR",
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
await socketClient.start();
|
||||
},
|
||||
|
||||
async stop(): Promise<void> {
|
||||
if (socketClient) {
|
||||
await socketClient.disconnect();
|
||||
socketClient = null;
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
783
packages/companion-channels/src/adapters/telegram.ts
Normal file
783
packages/companion-channels/src/adapters/telegram.ts
Normal file
|
|
@ -0,0 +1,783 @@
|
|||
/**
|
||||
* companion-channels — Built-in Telegram adapter (bidirectional).
|
||||
*
|
||||
* Outgoing: Telegram Bot API sendMessage.
|
||||
* Incoming: Long-polling via getUpdates.
|
||||
*
|
||||
* Supports:
|
||||
* - Text messages
|
||||
* - Photos (downloaded → temp file → passed as image attachment)
|
||||
* - Documents (text files downloaded → content included in message)
|
||||
* - Voice messages (downloaded → transcribed → passed as text)
|
||||
* - Audio files (music/recordings → transcribed → passed as text)
|
||||
* - Audio documents (files with audio MIME → routed through transcription)
|
||||
* - File size validation (1MB for docs/photos, 10MB for voice/audio)
|
||||
* - MIME type filtering (text-like files only for documents)
|
||||
*
|
||||
* Config (in settings.json under companion-channels.adapters.telegram):
|
||||
* {
|
||||
* "type": "telegram",
|
||||
* "botToken": "your-telegram-bot-token",
|
||||
* "parseMode": "Markdown",
|
||||
* "polling": true,
|
||||
* "pollingTimeout": 30,
|
||||
* "allowedChatIds": ["123456789", "-100987654321"]
|
||||
* }
|
||||
*/
|
||||
|
||||
import * as fs from "node:fs";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
import type {
|
||||
AdapterConfig,
|
||||
ChannelAdapter,
|
||||
ChannelMessage,
|
||||
IncomingAttachment,
|
||||
IncomingMessage,
|
||||
OnIncomingMessage,
|
||||
TranscriptionConfig,
|
||||
} from "../types.js";
|
||||
import {
|
||||
createTranscriptionProvider,
|
||||
type TranscriptionProvider,
|
||||
} from "./transcription.js";
|
||||
|
||||
const MAX_LENGTH = 4096;
|
||||
const MAX_FILE_SIZE = 1_048_576; // 1MB
|
||||
const MAX_AUDIO_SIZE = 10_485_760; // 10MB — voice/audio files are larger
|
||||
|
||||
/** MIME types we treat as text documents (content inlined into the prompt). */
|
||||
const TEXT_MIME_TYPES = new Set([
|
||||
"text/plain",
|
||||
"text/markdown",
|
||||
"text/csv",
|
||||
"text/html",
|
||||
"text/xml",
|
||||
"text/css",
|
||||
"text/javascript",
|
||||
"application/json",
|
||||
"application/xml",
|
||||
"application/javascript",
|
||||
"application/typescript",
|
||||
"application/x-yaml",
|
||||
"application/x-toml",
|
||||
"application/x-sh",
|
||||
]);
|
||||
|
||||
/** File extensions we treat as text even if MIME is generic (application/octet-stream). */
|
||||
const TEXT_EXTENSIONS = new Set([
|
||||
".md",
|
||||
".markdown",
|
||||
".txt",
|
||||
".csv",
|
||||
".json",
|
||||
".jsonl",
|
||||
".yaml",
|
||||
".yml",
|
||||
".toml",
|
||||
".xml",
|
||||
".html",
|
||||
".htm",
|
||||
".css",
|
||||
".js",
|
||||
".ts",
|
||||
".tsx",
|
||||
".jsx",
|
||||
".py",
|
||||
".rs",
|
||||
".go",
|
||||
".rb",
|
||||
".php",
|
||||
".java",
|
||||
".kt",
|
||||
".c",
|
||||
".cpp",
|
||||
".h",
|
||||
".sh",
|
||||
".bash",
|
||||
".zsh",
|
||||
".fish",
|
||||
".sql",
|
||||
".graphql",
|
||||
".gql",
|
||||
".env",
|
||||
".ini",
|
||||
".cfg",
|
||||
".conf",
|
||||
".properties",
|
||||
".log",
|
||||
".gitignore",
|
||||
".dockerignore",
|
||||
".editorconfig",
|
||||
]);
|
||||
|
||||
/** Image MIME prefixes. */
|
||||
function isImageMime(mime: string | undefined): boolean {
|
||||
if (!mime) return false;
|
||||
return mime.startsWith("image/");
|
||||
}
|
||||
|
||||
/** Audio MIME types that can be transcribed. */
|
||||
const AUDIO_MIME_PREFIXES = ["audio/"];
|
||||
const AUDIO_MIME_TYPES = new Set([
|
||||
"audio/mpeg",
|
||||
"audio/mp4",
|
||||
"audio/ogg",
|
||||
"audio/wav",
|
||||
"audio/webm",
|
||||
"audio/x-m4a",
|
||||
"audio/flac",
|
||||
"audio/aac",
|
||||
"audio/mp3",
|
||||
"video/ogg", // .ogg containers can be audio-only
|
||||
]);
|
||||
|
||||
function isAudioMime(mime: string | undefined): boolean {
|
||||
if (!mime) return false;
|
||||
if (AUDIO_MIME_TYPES.has(mime)) return true;
|
||||
return AUDIO_MIME_PREFIXES.some((p) => mime.startsWith(p));
|
||||
}
|
||||
|
||||
function isTextDocument(
|
||||
mimeType: string | undefined,
|
||||
filename: string | undefined,
|
||||
): boolean {
|
||||
if (mimeType && TEXT_MIME_TYPES.has(mimeType)) return true;
|
||||
if (filename) {
|
||||
const ext = path.extname(filename).toLowerCase();
|
||||
if (TEXT_EXTENSIONS.has(ext)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export function createTelegramAdapter(config: AdapterConfig): ChannelAdapter {
|
||||
const botToken = config.botToken as string;
|
||||
const parseMode = config.parseMode as string | undefined;
|
||||
const pollingEnabled = config.polling === true;
|
||||
const pollingTimeout = (config.pollingTimeout as number) ?? 30;
|
||||
const allowedChatIds = config.allowedChatIds as string[] | undefined;
|
||||
|
||||
if (!botToken) {
|
||||
throw new Error("Telegram adapter requires botToken");
|
||||
}
|
||||
|
||||
// ── Transcription setup ─────────────────────────────────
|
||||
const transcriptionConfig = config.transcription as
|
||||
| TranscriptionConfig
|
||||
| undefined;
|
||||
let transcriber: TranscriptionProvider | null = null;
|
||||
let transcriberError: string | null = null;
|
||||
if (transcriptionConfig?.enabled) {
|
||||
try {
|
||||
transcriber = createTranscriptionProvider(transcriptionConfig);
|
||||
} catch (err: any) {
|
||||
transcriberError = err.message ?? "Unknown transcription config error";
|
||||
console.error(
|
||||
`[companion-channels] Transcription config error: ${transcriberError}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const apiBase = `https://api.telegram.org/bot${botToken}`;
|
||||
let offset = 0;
|
||||
let running = false;
|
||||
let abortController: AbortController | null = null;
|
||||
|
||||
// Track temp files for cleanup
|
||||
const tempFiles: string[] = [];
|
||||
|
||||
// ── Telegram API helpers ────────────────────────────────
|
||||
|
||||
async function sendTelegram(chatId: string, text: string): Promise<void> {
|
||||
const body: Record<string, unknown> = { chat_id: chatId, text };
|
||||
if (parseMode) body.parse_mode = parseMode;
|
||||
|
||||
const res = await fetch(`${apiBase}/sendMessage`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.text().catch(() => "unknown error");
|
||||
throw new Error(`Telegram API error ${res.status}: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function sendChatAction(
|
||||
chatId: string,
|
||||
action = "typing",
|
||||
): Promise<void> {
|
||||
try {
|
||||
await fetch(`${apiBase}/sendChatAction`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ chat_id: chatId, action }),
|
||||
});
|
||||
} catch {
|
||||
// Best-effort
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Download a file from Telegram by file_id.
|
||||
* Returns { path, size } or null on failure.
|
||||
*/
|
||||
async function downloadFile(
|
||||
fileId: string,
|
||||
suggestedName?: string,
|
||||
maxSize = MAX_FILE_SIZE,
|
||||
): Promise<{ localPath: string; size: number } | null> {
|
||||
try {
|
||||
// Get file info
|
||||
const infoRes = await fetch(`${apiBase}/getFile?file_id=${fileId}`);
|
||||
if (!infoRes.ok) return null;
|
||||
|
||||
const info = (await infoRes.json()) as {
|
||||
ok: boolean;
|
||||
result?: { file_id: string; file_size?: number; file_path?: string };
|
||||
};
|
||||
if (!info.ok || !info.result?.file_path) return null;
|
||||
|
||||
const fileSize = info.result.file_size ?? 0;
|
||||
|
||||
// Size check before downloading
|
||||
if (fileSize > maxSize) return null;
|
||||
|
||||
// Download
|
||||
const fileUrl = `https://api.telegram.org/file/bot${botToken}/${info.result.file_path}`;
|
||||
const fileRes = await fetch(fileUrl);
|
||||
if (!fileRes.ok) return null;
|
||||
|
||||
const buffer = Buffer.from(await fileRes.arrayBuffer());
|
||||
|
||||
// Double-check size after download
|
||||
if (buffer.length > maxSize) return null;
|
||||
|
||||
// Write to temp file
|
||||
const ext =
|
||||
path.extname(info.result.file_path) ||
|
||||
path.extname(suggestedName || "") ||
|
||||
"";
|
||||
const tmpDir = path.join(os.tmpdir(), "companion-channels");
|
||||
fs.mkdirSync(tmpDir, { recursive: true });
|
||||
const localPath = path.join(
|
||||
tmpDir,
|
||||
`${Date.now()}-${Math.random().toString(36).slice(2)}${ext}`,
|
||||
);
|
||||
fs.writeFileSync(localPath, buffer);
|
||||
tempFiles.push(localPath);
|
||||
|
||||
return { localPath, size: buffer.length };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Message building helpers ────────────────────────────
|
||||
|
||||
function buildBaseMetadata(msg: TelegramMessage): Record<string, unknown> {
|
||||
return {
|
||||
messageId: msg.message_id,
|
||||
chatType: msg.chat.type,
|
||||
chatTitle: msg.chat.title,
|
||||
userId: msg.from?.id,
|
||||
username: msg.from?.username,
|
||||
firstName: msg.from?.first_name,
|
||||
date: msg.date,
|
||||
};
|
||||
}
|
||||
|
||||
// ── Incoming (long polling) ─────────────────────────────
|
||||
|
||||
async function poll(onMessage: OnIncomingMessage): Promise<void> {
|
||||
while (running) {
|
||||
try {
|
||||
abortController = new AbortController();
|
||||
const url = `${apiBase}/getUpdates?offset=${offset}&timeout=${pollingTimeout}&allowed_updates=["message"]`;
|
||||
const res = await fetch(url, {
|
||||
signal: abortController.signal,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
await sleep(5000);
|
||||
continue;
|
||||
}
|
||||
|
||||
const data = (await res.json()) as {
|
||||
ok: boolean;
|
||||
result: Array<{ update_id: number; message?: TelegramMessage }>;
|
||||
};
|
||||
|
||||
if (!data.ok || !data.result?.length) continue;
|
||||
|
||||
for (const update of data.result) {
|
||||
offset = update.update_id + 1;
|
||||
const msg = update.message;
|
||||
if (!msg) continue;
|
||||
|
||||
const chatId = String(msg.chat.id);
|
||||
if (allowedChatIds && !allowedChatIds.includes(chatId)) continue;
|
||||
|
||||
const incoming = await processMessage(msg, chatId);
|
||||
if (incoming) onMessage(incoming);
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err.name === "AbortError") break;
|
||||
if (running) await sleep(5000);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single Telegram message into an IncomingMessage.
|
||||
* Handles text, photos, and documents.
|
||||
*/
|
||||
async function processMessage(
|
||||
msg: TelegramMessage,
|
||||
chatId: string,
|
||||
): Promise<IncomingMessage | null> {
|
||||
const metadata = buildBaseMetadata(msg);
|
||||
const caption = msg.caption || "";
|
||||
|
||||
// ── Photo ──────────────────────────────────────────
|
||||
if (msg.photo && msg.photo.length > 0) {
|
||||
// Pick the largest photo (last in array)
|
||||
const largest = msg.photo[msg.photo.length - 1];
|
||||
|
||||
// Size check
|
||||
if (largest.file_size && largest.file_size > MAX_FILE_SIZE) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: "⚠️ Photo too large (max 1MB).",
|
||||
metadata: { ...metadata, rejected: true },
|
||||
};
|
||||
}
|
||||
|
||||
const downloaded = await downloadFile(largest.file_id, "photo.jpg");
|
||||
if (!downloaded) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: caption || "📷 (photo — failed to download)",
|
||||
metadata,
|
||||
};
|
||||
}
|
||||
|
||||
const attachment: IncomingAttachment = {
|
||||
type: "image",
|
||||
path: downloaded.localPath,
|
||||
filename: "photo.jpg",
|
||||
mimeType: "image/jpeg",
|
||||
size: downloaded.size,
|
||||
};
|
||||
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: caption || "Describe this image.",
|
||||
attachments: [attachment],
|
||||
metadata: { ...metadata, hasPhoto: true },
|
||||
};
|
||||
}
|
||||
|
||||
// ── Document ───────────────────────────────────────
|
||||
if (msg.document) {
|
||||
const doc = msg.document;
|
||||
const mimeType = doc.mime_type;
|
||||
const filename = doc.file_name;
|
||||
|
||||
// Size check
|
||||
if (doc.file_size && doc.file_size > MAX_FILE_SIZE) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `⚠️ File too large: ${filename || "document"} (${formatSize(doc.file_size)}, max 1MB).`,
|
||||
metadata: { ...metadata, rejected: true },
|
||||
};
|
||||
}
|
||||
|
||||
// Image documents (e.g. uncompressed photos sent as files)
|
||||
if (isImageMime(mimeType)) {
|
||||
const downloaded = await downloadFile(doc.file_id, filename);
|
||||
if (!downloaded) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: caption || `📎 ${filename || "image"} (failed to download)`,
|
||||
metadata,
|
||||
};
|
||||
}
|
||||
|
||||
const attachment: IncomingAttachment = {
|
||||
type: "image",
|
||||
path: downloaded.localPath,
|
||||
filename: filename || "image",
|
||||
mimeType: mimeType || "image/jpeg",
|
||||
size: downloaded.size,
|
||||
};
|
||||
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: caption || "Describe this image.",
|
||||
attachments: [attachment],
|
||||
metadata: { ...metadata, hasDocument: true, documentType: "image" },
|
||||
};
|
||||
}
|
||||
|
||||
// Text documents — download and inline content
|
||||
if (isTextDocument(mimeType, filename)) {
|
||||
const downloaded = await downloadFile(doc.file_id, filename);
|
||||
if (!downloaded) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text:
|
||||
caption || `📎 ${filename || "document"} (failed to download)`,
|
||||
metadata,
|
||||
};
|
||||
}
|
||||
|
||||
const attachment: IncomingAttachment = {
|
||||
type: "document",
|
||||
path: downloaded.localPath,
|
||||
filename: filename || "document",
|
||||
mimeType: mimeType || "text/plain",
|
||||
size: downloaded.size,
|
||||
};
|
||||
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: caption || `Here is the file ${filename || "document"}.`,
|
||||
attachments: [attachment],
|
||||
metadata: { ...metadata, hasDocument: true, documentType: "text" },
|
||||
};
|
||||
}
|
||||
|
||||
// Audio documents — route through transcription
|
||||
if (isAudioMime(mimeType)) {
|
||||
if (!transcriber) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: transcriberError
|
||||
? `⚠️ Audio transcription misconfigured: ${transcriberError}`
|
||||
: `⚠️ Audio files are not supported. Please type your message.`,
|
||||
metadata: { ...metadata, rejected: true, hasAudio: true },
|
||||
};
|
||||
}
|
||||
|
||||
if (doc.file_size && doc.file_size > MAX_AUDIO_SIZE) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `⚠️ Audio file too large: ${filename || "audio"} (${formatSize(doc.file_size)}, max 10MB).`,
|
||||
metadata: { ...metadata, rejected: true, hasAudio: true },
|
||||
};
|
||||
}
|
||||
|
||||
const downloaded = await downloadFile(
|
||||
doc.file_id,
|
||||
filename,
|
||||
MAX_AUDIO_SIZE,
|
||||
);
|
||||
if (!downloaded) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: caption || `🎵 ${filename || "audio"} (failed to download)`,
|
||||
metadata: { ...metadata, hasAudio: true },
|
||||
};
|
||||
}
|
||||
|
||||
const result = await transcriber.transcribe(downloaded.localPath);
|
||||
if (!result.ok || !result.text) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `🎵 ${filename || "audio"} (transcription failed${result.error ? `: ${result.error}` : ""})`,
|
||||
metadata: { ...metadata, hasAudio: true },
|
||||
};
|
||||
}
|
||||
|
||||
const label = filename ? `Audio: ${filename}` : "Audio file";
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `🎵 [${label}]: ${result.text}`,
|
||||
metadata: { ...metadata, hasAudio: true, audioTitle: filename },
|
||||
};
|
||||
}
|
||||
|
||||
// Unsupported file type
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `⚠️ Unsupported file type: ${filename || "document"} (${mimeType || "unknown"}). I can handle text files, images, and audio.`,
|
||||
metadata: { ...metadata, rejected: true },
|
||||
};
|
||||
}
|
||||
|
||||
// ── Voice message ──────────────────────────────────
|
||||
if (msg.voice) {
|
||||
const voice = msg.voice;
|
||||
|
||||
if (!transcriber) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: transcriberError
|
||||
? `⚠️ Voice transcription misconfigured: ${transcriberError}`
|
||||
: "⚠️ Voice messages are not supported. Please type your message.",
|
||||
metadata: { ...metadata, rejected: true, hasVoice: true },
|
||||
};
|
||||
}
|
||||
|
||||
// Size check
|
||||
if (voice.file_size && voice.file_size > MAX_AUDIO_SIZE) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `⚠️ Voice message too large (${formatSize(voice.file_size)}, max 10MB).`,
|
||||
metadata: { ...metadata, rejected: true, hasVoice: true },
|
||||
};
|
||||
}
|
||||
|
||||
const downloaded = await downloadFile(
|
||||
voice.file_id,
|
||||
"voice.ogg",
|
||||
MAX_AUDIO_SIZE,
|
||||
);
|
||||
if (!downloaded) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: "🎤 (voice message — failed to download)",
|
||||
metadata: { ...metadata, hasVoice: true },
|
||||
};
|
||||
}
|
||||
|
||||
const result = await transcriber.transcribe(downloaded.localPath);
|
||||
if (!result.ok || !result.text) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `🎤 (voice message — transcription failed${result.error ? `: ${result.error}` : ""})`,
|
||||
metadata: {
|
||||
...metadata,
|
||||
hasVoice: true,
|
||||
voiceDuration: voice.duration,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `🎤 [Voice message]: ${result.text}`,
|
||||
metadata: {
|
||||
...metadata,
|
||||
hasVoice: true,
|
||||
voiceDuration: voice.duration,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ── Audio file (sent as music) ─────────────────────
|
||||
if (msg.audio) {
|
||||
const audio = msg.audio;
|
||||
|
||||
if (!transcriber) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: transcriberError
|
||||
? `⚠️ Audio transcription misconfigured: ${transcriberError}`
|
||||
: "⚠️ Audio files are not supported. Please type your message.",
|
||||
metadata: { ...metadata, rejected: true, hasAudio: true },
|
||||
};
|
||||
}
|
||||
|
||||
if (audio.file_size && audio.file_size > MAX_AUDIO_SIZE) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `⚠️ Audio too large (${formatSize(audio.file_size)}, max 10MB).`,
|
||||
metadata: { ...metadata, rejected: true, hasAudio: true },
|
||||
};
|
||||
}
|
||||
|
||||
const audioName = audio.title || audio.performer || "audio";
|
||||
const downloaded = await downloadFile(
|
||||
audio.file_id,
|
||||
`${audioName}.mp3`,
|
||||
MAX_AUDIO_SIZE,
|
||||
);
|
||||
if (!downloaded) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: caption || `🎵 ${audioName} (failed to download)`,
|
||||
metadata: { ...metadata, hasAudio: true },
|
||||
};
|
||||
}
|
||||
|
||||
const result = await transcriber.transcribe(downloaded.localPath);
|
||||
if (!result.ok || !result.text) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `🎵 ${audioName} (transcription failed${result.error ? `: ${result.error}` : ""})`,
|
||||
metadata: {
|
||||
...metadata,
|
||||
hasAudio: true,
|
||||
audioTitle: audio.title,
|
||||
audioDuration: audio.duration,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const label = audio.title
|
||||
? `Audio: ${audio.title}${audio.performer ? ` by ${audio.performer}` : ""}`
|
||||
: "Audio";
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: `🎵 [${label}]: ${result.text}`,
|
||||
metadata: {
|
||||
...metadata,
|
||||
hasAudio: true,
|
||||
audioTitle: audio.title,
|
||||
audioDuration: audio.duration,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ── Text ───────────────────────────────────────────
|
||||
if (msg.text) {
|
||||
return {
|
||||
adapter: "telegram",
|
||||
sender: chatId,
|
||||
text: msg.text,
|
||||
metadata,
|
||||
};
|
||||
}
|
||||
|
||||
// Unsupported message type (sticker, video, etc.) — ignore
|
||||
return null;
|
||||
}
|
||||
|
||||
// ── Cleanup ─────────────────────────────────────────────
|
||||
|
||||
function cleanupTempFiles(): void {
|
||||
for (const f of tempFiles) {
|
||||
try {
|
||||
fs.unlinkSync(f);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
tempFiles.length = 0;
|
||||
}
|
||||
|
||||
// ── Adapter ─────────────────────────────────────────────
|
||||
|
||||
return {
|
||||
direction: "bidirectional" as const,
|
||||
|
||||
async sendTyping(recipient: string): Promise<void> {
|
||||
await sendChatAction(recipient, "typing");
|
||||
},
|
||||
|
||||
async send(message: ChannelMessage): Promise<void> {
|
||||
const prefix = message.source ? `[${message.source}]\n` : "";
|
||||
const full = prefix + message.text;
|
||||
|
||||
if (full.length <= MAX_LENGTH) {
|
||||
await sendTelegram(message.recipient, full);
|
||||
return;
|
||||
}
|
||||
|
||||
// Split long messages at newlines
|
||||
let remaining = full;
|
||||
while (remaining.length > 0) {
|
||||
if (remaining.length <= MAX_LENGTH) {
|
||||
await sendTelegram(message.recipient, remaining);
|
||||
break;
|
||||
}
|
||||
let splitAt = remaining.lastIndexOf("\n", MAX_LENGTH);
|
||||
if (splitAt < MAX_LENGTH / 2) splitAt = MAX_LENGTH;
|
||||
await sendTelegram(message.recipient, remaining.slice(0, splitAt));
|
||||
remaining = remaining.slice(splitAt).replace(/^\n/, "");
|
||||
}
|
||||
},
|
||||
|
||||
async start(onMessage: OnIncomingMessage): Promise<void> {
|
||||
if (!pollingEnabled) return;
|
||||
if (running) return;
|
||||
running = true;
|
||||
poll(onMessage);
|
||||
},
|
||||
|
||||
async stop(): Promise<void> {
|
||||
running = false;
|
||||
abortController?.abort();
|
||||
abortController = null;
|
||||
cleanupTempFiles();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ── Telegram API types (subset) ─────────────────────────────────
|
||||
|
||||
interface TelegramMessage {
|
||||
message_id: number;
|
||||
from?: { id: number; username?: string; first_name?: string };
|
||||
chat: { id: number; type: string; title?: string };
|
||||
date: number;
|
||||
text?: string;
|
||||
caption?: string;
|
||||
photo?: Array<{
|
||||
file_id: string;
|
||||
file_unique_id: string;
|
||||
width: number;
|
||||
height: number;
|
||||
file_size?: number;
|
||||
}>;
|
||||
document?: {
|
||||
file_id: string;
|
||||
file_unique_id: string;
|
||||
file_name?: string;
|
||||
mime_type?: string;
|
||||
file_size?: number;
|
||||
};
|
||||
voice?: {
|
||||
file_id: string;
|
||||
file_unique_id: string;
|
||||
duration: number;
|
||||
mime_type?: string;
|
||||
file_size?: number;
|
||||
};
|
||||
audio?: {
|
||||
file_id: string;
|
||||
file_unique_id: string;
|
||||
duration: number;
|
||||
performer?: string;
|
||||
title?: string;
|
||||
mime_type?: string;
|
||||
file_size?: number;
|
||||
};
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function formatSize(bytes: number): string {
|
||||
if (bytes < 1024) return `${bytes}B`;
|
||||
if (bytes < 1_048_576) return `${(bytes / 1024).toFixed(1)}KB`;
|
||||
return `${(bytes / 1_048_576).toFixed(1)}MB`;
|
||||
}
|
||||
BIN
packages/companion-channels/src/adapters/transcribe-apple
Executable file
BIN
packages/companion-channels/src/adapters/transcribe-apple
Executable file
Binary file not shown.
101
packages/companion-channels/src/adapters/transcribe-apple.swift
Normal file
101
packages/companion-channels/src/adapters/transcribe-apple.swift
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
/// transcribe-apple — macOS speech-to-text via SFSpeechRecognizer.
|
||||
///
|
||||
/// Usage: transcribe-apple <audio-file> [language-code]
|
||||
/// Prints transcribed text to stdout. Exits 1 on error (message to stderr).
|
||||
|
||||
import Foundation
|
||||
import Speech
|
||||
|
||||
guard CommandLine.arguments.count >= 2 else {
|
||||
FileHandle.standardError.write("Usage: transcribe-apple <audio-file> [language-code]\n".data(using: .utf8)!)
|
||||
exit(1)
|
||||
}
|
||||
|
||||
let filePath = CommandLine.arguments[1]
|
||||
let languageCode = CommandLine.arguments.count >= 3 ? CommandLine.arguments[2] : "en-US"
|
||||
|
||||
// Normalize short language codes (e.g. "en" → "en-US", "no" → "nb-NO")
|
||||
func normalizeLocale(_ code: String) -> Locale {
|
||||
let mapping: [String: String] = [
|
||||
"en": "en-US", "no": "nb-NO", "nb": "nb-NO", "nn": "nn-NO",
|
||||
"sv": "sv-SE", "da": "da-DK", "de": "de-DE", "fr": "fr-FR",
|
||||
"es": "es-ES", "it": "it-IT", "pt": "pt-BR", "ja": "ja-JP",
|
||||
"ko": "ko-KR", "zh": "zh-CN", "ru": "ru-RU", "ar": "ar-SA",
|
||||
"hi": "hi-IN", "pl": "pl-PL", "nl": "nl-NL", "fi": "fi-FI",
|
||||
]
|
||||
let resolved = mapping[code] ?? code
|
||||
return Locale(identifier: resolved)
|
||||
}
|
||||
|
||||
let locale = normalizeLocale(languageCode)
|
||||
let fileURL = URL(fileURLWithPath: filePath)
|
||||
|
||||
guard FileManager.default.fileExists(atPath: filePath) else {
|
||||
FileHandle.standardError.write("File not found: \(filePath)\n".data(using: .utf8)!)
|
||||
exit(1)
|
||||
}
|
||||
|
||||
guard let recognizer = SFSpeechRecognizer(locale: locale) else {
|
||||
FileHandle.standardError.write("Speech recognizer not available for locale: \(locale.identifier)\n".data(using: .utf8)!)
|
||||
exit(1)
|
||||
}
|
||||
|
||||
guard recognizer.isAvailable else {
|
||||
FileHandle.standardError.write("Speech recognizer not available (offline model may need download)\n".data(using: .utf8)!)
|
||||
exit(1)
|
||||
}
|
||||
|
||||
// Request authorization (needed even for on-device recognition)
|
||||
let semaphore = DispatchSemaphore(value: 0)
|
||||
var authStatus: SFSpeechRecognizerAuthorizationStatus = .notDetermined
|
||||
|
||||
SFSpeechRecognizer.requestAuthorization { status in
|
||||
authStatus = status
|
||||
semaphore.signal()
|
||||
}
|
||||
semaphore.wait()
|
||||
|
||||
guard authStatus == .authorized else {
|
||||
FileHandle.standardError.write("Speech recognition not authorized (status: \(authStatus.rawValue)). Grant access in System Settings > Privacy > Speech Recognition.\n".data(using: .utf8)!)
|
||||
exit(1)
|
||||
}
|
||||
|
||||
// Perform recognition
|
||||
let request = SFSpeechURLRecognitionRequest(url: fileURL)
|
||||
request.requiresOnDeviceRecognition = true
|
||||
request.shouldReportPartialResults = false
|
||||
|
||||
let resultSemaphore = DispatchSemaphore(value: 0)
|
||||
var transcribedText: String?
|
||||
var recognitionError: Error?
|
||||
|
||||
recognizer.recognitionTask(with: request) { result, error in
|
||||
if let error = error {
|
||||
recognitionError = error
|
||||
resultSemaphore.signal()
|
||||
return
|
||||
}
|
||||
if let result = result, result.isFinal {
|
||||
transcribedText = result.bestTranscription.formattedString
|
||||
resultSemaphore.signal()
|
||||
}
|
||||
}
|
||||
|
||||
// Wait up to 60 seconds
|
||||
let timeout = resultSemaphore.wait(timeout: .now() + 60)
|
||||
if timeout == .timedOut {
|
||||
FileHandle.standardError.write("Transcription timed out after 60 seconds\n".data(using: .utf8)!)
|
||||
exit(1)
|
||||
}
|
||||
|
||||
if let error = recognitionError {
|
||||
FileHandle.standardError.write("Recognition error: \(error.localizedDescription)\n".data(using: .utf8)!)
|
||||
exit(1)
|
||||
}
|
||||
|
||||
guard let text = transcribedText, !text.isEmpty else {
|
||||
FileHandle.standardError.write("No speech detected in audio\n".data(using: .utf8)!)
|
||||
exit(1)
|
||||
}
|
||||
|
||||
print(text)
|
||||
299
packages/companion-channels/src/adapters/transcription.ts
Normal file
299
packages/companion-channels/src/adapters/transcription.ts
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
/**
|
||||
* companion-channels — Pluggable audio transcription.
|
||||
*
|
||||
* Supports three providers:
|
||||
* - "apple" — macOS SFSpeechRecognizer (free, offline, no API key)
|
||||
* - "openai" — Whisper API
|
||||
* - "elevenlabs" — Scribe API
|
||||
*
|
||||
* Usage:
|
||||
* const provider = createTranscriptionProvider(config);
|
||||
* const result = await provider.transcribe("/path/to/audio.ogg", "en");
|
||||
*/
|
||||
|
||||
import { execFile } from "node:child_process";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import type { TranscriptionConfig } from "../types.js";
|
||||
|
||||
// ── Public interface ────────────────────────────────────────────
|
||||
|
||||
export interface TranscriptionResult {
|
||||
ok: boolean;
|
||||
text?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface TranscriptionProvider {
|
||||
transcribe(filePath: string, language?: string): Promise<TranscriptionResult>;
|
||||
}
|
||||
|
||||
/** Create a transcription provider from config. */
|
||||
export function createTranscriptionProvider(
|
||||
config: TranscriptionConfig,
|
||||
): TranscriptionProvider {
|
||||
switch (config.provider) {
|
||||
case "apple":
|
||||
return new AppleProvider(config);
|
||||
case "openai":
|
||||
return new OpenAIProvider(config);
|
||||
case "elevenlabs":
|
||||
return new ElevenLabsProvider(config);
|
||||
default:
|
||||
throw new Error(`Unknown transcription provider: ${config.provider}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Helpers ─────────────────────────────────────────────────────
|
||||
|
||||
/** Resolve "env:VAR_NAME" patterns to actual environment variable values. */
|
||||
function resolveEnvValue(value: string | undefined): string | undefined {
|
||||
if (!value) return undefined;
|
||||
if (value.startsWith("env:")) {
|
||||
const envVar = value.slice(4);
|
||||
return process.env[envVar] || undefined;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function validateFile(filePath: string): TranscriptionResult | null {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
return { ok: false, error: `File not found: ${filePath}` };
|
||||
}
|
||||
const stat = fs.statSync(filePath);
|
||||
// 25MB limit (Whisper max; Telegram max is 20MB)
|
||||
if (stat.size > 25 * 1024 * 1024) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `File too large: ${(stat.size / 1024 / 1024).toFixed(1)}MB (max 25MB)`,
|
||||
};
|
||||
}
|
||||
if (stat.size === 0) {
|
||||
return { ok: false, error: "File is empty" };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ── Apple Provider ──────────────────────────────────────────────
|
||||
|
||||
const SWIFT_HELPER_SRC = path.join(
|
||||
import.meta.dirname,
|
||||
"transcribe-apple.swift",
|
||||
);
|
||||
const SWIFT_HELPER_BIN = path.join(import.meta.dirname, "transcribe-apple");
|
||||
|
||||
class AppleProvider implements TranscriptionProvider {
|
||||
private language: string | undefined;
|
||||
private compilePromise: Promise<TranscriptionResult> | null = null;
|
||||
|
||||
constructor(config: TranscriptionConfig) {
|
||||
this.language = config.language;
|
||||
}
|
||||
|
||||
async transcribe(
|
||||
filePath: string,
|
||||
language?: string,
|
||||
): Promise<TranscriptionResult> {
|
||||
if (process.platform !== "darwin") {
|
||||
return {
|
||||
ok: false,
|
||||
error: "Apple transcription is only available on macOS",
|
||||
};
|
||||
}
|
||||
|
||||
const fileErr = validateFile(filePath);
|
||||
if (fileErr) return fileErr;
|
||||
|
||||
// Compile Swift helper on first use (promise-based lock prevents races)
|
||||
if (!this.compilePromise) {
|
||||
this.compilePromise = this.compileHelper();
|
||||
}
|
||||
const compileResult = await this.compilePromise;
|
||||
if (!compileResult.ok) return compileResult;
|
||||
|
||||
const lang = language || this.language;
|
||||
const args = [filePath];
|
||||
if (lang) args.push(lang);
|
||||
|
||||
return new Promise((resolve) => {
|
||||
execFile(
|
||||
SWIFT_HELPER_BIN,
|
||||
args,
|
||||
{ timeout: 60_000 },
|
||||
(err, stdout, stderr) => {
|
||||
if (err) {
|
||||
resolve({ ok: false, error: stderr?.trim() || err.message });
|
||||
return;
|
||||
}
|
||||
const text = stdout.trim();
|
||||
if (!text) {
|
||||
resolve({
|
||||
ok: false,
|
||||
error: "Transcription returned empty result",
|
||||
});
|
||||
return;
|
||||
}
|
||||
resolve({ ok: true, text });
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
private compileHelper(): Promise<TranscriptionResult> {
|
||||
// Skip if already compiled and binary exists
|
||||
if (fs.existsSync(SWIFT_HELPER_BIN)) {
|
||||
return Promise.resolve({ ok: true });
|
||||
}
|
||||
|
||||
if (!fs.existsSync(SWIFT_HELPER_SRC)) {
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
error: `Swift helper source not found: ${SWIFT_HELPER_SRC}`,
|
||||
});
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
execFile(
|
||||
"swiftc",
|
||||
["-O", "-o", SWIFT_HELPER_BIN, SWIFT_HELPER_SRC],
|
||||
{ timeout: 30_000 },
|
||||
(err, _stdout, stderr) => {
|
||||
if (err) {
|
||||
resolve({
|
||||
ok: false,
|
||||
error: `Failed to compile Swift helper: ${stderr?.trim() || err.message}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
resolve({ ok: true });
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ── OpenAI Provider ─────────────────────────────────────────────
|
||||
|
||||
class OpenAIProvider implements TranscriptionProvider {
|
||||
private apiKey: string;
|
||||
private model: string;
|
||||
private language: string | undefined;
|
||||
|
||||
constructor(config: TranscriptionConfig) {
|
||||
const key = resolveEnvValue(config.apiKey);
|
||||
if (!key) throw new Error("OpenAI transcription requires apiKey");
|
||||
this.apiKey = key;
|
||||
this.model = config.model || "whisper-1";
|
||||
this.language = config.language;
|
||||
}
|
||||
|
||||
async transcribe(
|
||||
filePath: string,
|
||||
language?: string,
|
||||
): Promise<TranscriptionResult> {
|
||||
const fileErr = validateFile(filePath);
|
||||
if (fileErr) return fileErr;
|
||||
|
||||
const lang = language || this.language;
|
||||
|
||||
try {
|
||||
const form = new FormData();
|
||||
const fileBuffer = fs.readFileSync(filePath);
|
||||
const filename = path.basename(filePath);
|
||||
form.append("file", new Blob([fileBuffer]), filename);
|
||||
form.append("model", this.model);
|
||||
if (lang) form.append("language", lang);
|
||||
|
||||
const response = await fetch(
|
||||
"https://api.openai.com/v1/audio/transcriptions",
|
||||
{
|
||||
method: "POST",
|
||||
headers: { Authorization: `Bearer ${this.apiKey}` },
|
||||
body: form,
|
||||
},
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
return {
|
||||
ok: false,
|
||||
error: `OpenAI API error (${response.status}): ${body.slice(0, 200)}`,
|
||||
};
|
||||
}
|
||||
|
||||
const data = (await response.json()) as { text?: string };
|
||||
if (!data.text) {
|
||||
return { ok: false, error: "OpenAI returned empty transcription" };
|
||||
}
|
||||
return { ok: true, text: data.text };
|
||||
} catch (err: any) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `OpenAI transcription failed: ${err.message}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── ElevenLabs Provider ─────────────────────────────────────────
|
||||
|
||||
class ElevenLabsProvider implements TranscriptionProvider {
|
||||
private apiKey: string;
|
||||
private model: string;
|
||||
private language: string | undefined;
|
||||
|
||||
constructor(config: TranscriptionConfig) {
|
||||
const key = resolveEnvValue(config.apiKey);
|
||||
if (!key) throw new Error("ElevenLabs transcription requires apiKey");
|
||||
this.apiKey = key;
|
||||
this.model = config.model || "scribe_v1";
|
||||
this.language = config.language;
|
||||
}
|
||||
|
||||
async transcribe(
|
||||
filePath: string,
|
||||
language?: string,
|
||||
): Promise<TranscriptionResult> {
|
||||
const fileErr = validateFile(filePath);
|
||||
if (fileErr) return fileErr;
|
||||
|
||||
const lang = language || this.language;
|
||||
|
||||
try {
|
||||
const form = new FormData();
|
||||
const fileBuffer = fs.readFileSync(filePath);
|
||||
const filename = path.basename(filePath);
|
||||
form.append("file", new Blob([fileBuffer]), filename);
|
||||
form.append("model_id", this.model);
|
||||
if (lang) form.append("language_code", lang);
|
||||
|
||||
const response = await fetch(
|
||||
"https://api.elevenlabs.io/v1/speech-to-text",
|
||||
{
|
||||
method: "POST",
|
||||
headers: { "xi-api-key": this.apiKey },
|
||||
body: form,
|
||||
},
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
return {
|
||||
ok: false,
|
||||
error: `ElevenLabs API error (${response.status}): ${body.slice(0, 200)}`,
|
||||
};
|
||||
}
|
||||
|
||||
const data = (await response.json()) as { text?: string };
|
||||
if (!data.text) {
|
||||
return { ok: false, error: "ElevenLabs returned empty transcription" };
|
||||
}
|
||||
return { ok: true, text: data.text };
|
||||
} catch (err: any) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `ElevenLabs transcription failed: ${err.message}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
45
packages/companion-channels/src/adapters/webhook.ts
Normal file
45
packages/companion-channels/src/adapters/webhook.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* companion-channels — Built-in webhook adapter.
|
||||
*
|
||||
* POSTs message as JSON. The recipient field is the webhook URL.
|
||||
*
|
||||
* Config:
|
||||
* {
|
||||
* "type": "webhook",
|
||||
* "method": "POST",
|
||||
* "headers": { "Authorization": "Bearer ..." }
|
||||
* }
|
||||
*/
|
||||
|
||||
import type {
|
||||
AdapterConfig,
|
||||
ChannelAdapter,
|
||||
ChannelMessage,
|
||||
} from "../types.js";
|
||||
|
||||
export function createWebhookAdapter(config: AdapterConfig): ChannelAdapter {
|
||||
const method = (config.method as string) ?? "POST";
|
||||
const extraHeaders = (config.headers as Record<string, string>) ?? {};
|
||||
|
||||
return {
|
||||
direction: "outgoing" as const,
|
||||
|
||||
async send(message: ChannelMessage): Promise<void> {
|
||||
const res = await fetch(message.recipient, {
|
||||
method,
|
||||
headers: { "Content-Type": "application/json", ...extraHeaders },
|
||||
body: JSON.stringify({
|
||||
text: message.text,
|
||||
source: message.source,
|
||||
metadata: message.metadata,
|
||||
timestamp: new Date().toISOString(),
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.text().catch(() => "unknown error");
|
||||
throw new Error(`Webhook error ${res.status}: ${err}`);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue