From 92486e026c8087a6ede76142cb0829c12477f441 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Sun, 11 Jan 2026 17:28:39 +0100 Subject: [PATCH] Add session transcript analysis script - Extracts user/assistant messages from session files for a given cwd - Splits into ~100k char files to fit in context - Spawns pi subagents to analyze each file for recurring patterns - Compares against existing AGENTS.md to mark NEW vs EXISTING patterns - Final aggregation step creates FINAL-SUMMARY.txt with consolidated findings - Shows progress with tool call args during analysis --- scripts/session-transcripts.ts | 406 +++++++++++++++++++++++++++++++++ 1 file changed, 406 insertions(+) create mode 100644 scripts/session-transcripts.ts diff --git a/scripts/session-transcripts.ts b/scripts/session-transcripts.ts new file mode 100644 index 00000000..85580918 --- /dev/null +++ b/scripts/session-transcripts.ts @@ -0,0 +1,406 @@ +#!/usr/bin/env npx tsx +/** + * Extracts session transcripts for a given cwd, splits into context-sized files, + * optionally spawns subagents to analyze patterns. + * + * Usage: npx tsx scripts/session-transcripts.ts [--analyze] [--output ] [cwd] + * --analyze Spawn pi subagents to analyze each transcript file + * --output Output directory for transcript files (defaults to ./session-transcripts) + * cwd Working directory to extract sessions for (defaults to current) + */ + +import { readFileSync, readdirSync, writeFileSync, existsSync, mkdirSync } from "fs"; +import { spawn } from "child_process"; +import { createInterface } from "readline"; +import { homedir } from "os"; +import { join, resolve } from "path"; +import { parseSessionEntries, type SessionMessageEntry } from "../packages/coding-agent/src/core/session-manager.js"; +import chalk from "chalk"; + +const MAX_CHARS_PER_FILE = 100_000; // ~20k tokens, leaving room for prompt + analysis + output + +function cwdToSessionDir(cwd: string): string { + const normalized = resolve(cwd).replace(/\//g, "-"); + return `--${normalized.slice(1)}--`; // Remove leading slash, wrap with -- +} + +function extractTextContent(content: string | Array<{ type: string; text?: string }>): string { + if (typeof content === "string") return content; + if (!Array.isArray(content)) return ""; + + return content + .filter((c) => c.type === "text" && c.text) + .map((c) => c.text!) + .join("\n"); +} + +function parseSession(filePath: string): string[] { + const content = readFileSync(filePath, "utf8"); + const entries = parseSessionEntries(content); + const messages: string[] = []; + + for (const entry of entries) { + if (entry.type !== "message") continue; + const msgEntry = entry as SessionMessageEntry; + const { role, content } = msgEntry.message; + + if (role !== "user" && role !== "assistant") continue; + + const text = extractTextContent(content as string | Array<{ type: string; text?: string }>); + if (!text.trim()) continue; + + messages.push(`[${role.toUpperCase()}]\n${text}`); + } + + return messages; +} + +const MAX_DISPLAY_WIDTH = 100; + +function truncateLine(text: string, maxWidth: number): string { + const singleLine = text.replace(/\n/g, " ").replace(/\s+/g, " ").trim(); + if (singleLine.length <= maxWidth) return singleLine; + return singleLine.slice(0, maxWidth - 3) + "..."; +} + +interface JsonEvent { + type: string; + assistantMessageEvent?: { type: string; delta?: string }; + toolName?: string; + args?: { + path?: string; + offset?: number; + limit?: number; + content?: string; + }; +} + +function runSubagent(prompt: string, cwd: string): Promise<{ success: boolean }> { + return new Promise((resolve) => { + const child = spawn("pi", ["--mode", "json", "--tools", "read,write", "-p", prompt], { + cwd, + stdio: ["ignore", "pipe", "pipe"], + }); + + let textBuffer = ""; + + const rl = createInterface({ input: child.stdout }); + + rl.on("line", (line) => { + try { + const event: JsonEvent = JSON.parse(line); + + if (event.type === "message_update" && event.assistantMessageEvent) { + const msgEvent = event.assistantMessageEvent; + if (msgEvent.type === "text_delta" && msgEvent.delta) { + textBuffer += msgEvent.delta; + } + } else if (event.type === "tool_execution_start" && event.toolName) { + // Print accumulated text before tool starts + if (textBuffer.trim()) { + console.log(chalk.dim(" " + truncateLine(textBuffer, MAX_DISPLAY_WIDTH))); + textBuffer = ""; + } + // Format tool call with args + let argsStr = ""; + if (event.args) { + if (event.toolName === "read") { + argsStr = event.args.path || ""; + if (event.args.offset) argsStr += ` offset=${event.args.offset}`; + if (event.args.limit) argsStr += ` limit=${event.args.limit}`; + } else if (event.toolName === "write") { + argsStr = event.args.path || ""; + } + } + console.log(chalk.cyan(` [${event.toolName}] ${argsStr}`)); + } else if (event.type === "turn_end") { + // Print any remaining text at turn end + if (textBuffer.trim()) { + console.log(chalk.dim(" " + truncateLine(textBuffer, MAX_DISPLAY_WIDTH))); + } + textBuffer = ""; + } + } catch { + // Ignore malformed JSON + } + }); + + child.stderr.on("data", (data) => { + process.stderr.write(chalk.red(data.toString())); + }); + + child.on("close", (code) => { + resolve({ success: code === 0 }); + }); + + child.on("error", (err) => { + console.error(chalk.red(` Failed to spawn pi: ${err.message}`)); + resolve({ success: false }); + }); + }); +} + +async function main() { + const args = process.argv.slice(2); + const analyzeFlag = args.includes("--analyze"); + + // Parse --output + const outputIdx = args.indexOf("--output"); + let outputDir = resolve("./session-transcripts"); + if (outputIdx !== -1 && args[outputIdx + 1]) { + outputDir = resolve(args[outputIdx + 1]); + } + + // Find cwd (positional arg that's not a flag or flag value) + const flagIndices = new Set(); + flagIndices.add(args.indexOf("--analyze")); + if (outputIdx !== -1) { + flagIndices.add(outputIdx); + flagIndices.add(outputIdx + 1); + } + const cwdArg = args.find((a, i) => !flagIndices.has(i) && !a.startsWith("--")); + const cwd = resolve(cwdArg || process.cwd()); + + mkdirSync(outputDir, { recursive: true }); + const sessionsBase = join(homedir(), ".pi/agent/sessions"); + const sessionDirName = cwdToSessionDir(cwd); + const sessionDir = join(sessionsBase, sessionDirName); + + if (!existsSync(sessionDir)) { + console.error(`No sessions found for ${cwd}`); + console.error(`Expected: ${sessionDir}`); + process.exit(1); + } + + const sessionFiles = readdirSync(sessionDir) + .filter((f) => f.endsWith(".jsonl")) + .sort(); + + console.log(`Found ${sessionFiles.length} session files in ${sessionDir}`); + + // Collect all transcripts + const allTranscripts: string[] = []; + for (const file of sessionFiles) { + const filePath = join(sessionDir, file); + const messages = parseSession(filePath); + if (messages.length > 0) { + allTranscripts.push(`=== SESSION: ${file} ===\n${messages.join("\n---\n")}\n=== END SESSION ===`); + } + } + + if (allTranscripts.length === 0) { + console.error("No transcripts found"); + process.exit(1); + } + + // Split into files respecting MAX_CHARS_PER_FILE + const outputFiles: string[] = []; + let currentContent = ""; + let fileIndex = 0; + + for (const transcript of allTranscripts) { + // If adding this transcript would exceed limit, write current and start new + if (currentContent.length > 0 && currentContent.length + transcript.length + 2 > MAX_CHARS_PER_FILE) { + const filename = `session-transcripts-${String(fileIndex).padStart(3, "0")}.txt`; + writeFileSync(join(outputDir, filename), currentContent); + outputFiles.push(filename); + console.log(`Wrote ${filename} (${currentContent.length} chars)`); + currentContent = ""; + fileIndex++; + } + + // If this single transcript exceeds limit, write it to its own file + if (transcript.length > MAX_CHARS_PER_FILE) { + // Write any pending content first + if (currentContent.length > 0) { + const filename = `session-transcripts-${String(fileIndex).padStart(3, "0")}.txt`; + writeFileSync(join(outputDir, filename), currentContent); + outputFiles.push(filename); + console.log(`Wrote ${filename} (${currentContent.length} chars)`); + currentContent = ""; + fileIndex++; + } + // Write the large transcript to its own file + const filename = `session-transcripts-${String(fileIndex).padStart(3, "0")}.txt`; + writeFileSync(join(outputDir, filename), transcript); + outputFiles.push(filename); + console.log(chalk.yellow(`Wrote ${filename} (${transcript.length} chars) - oversized`)); + fileIndex++; + continue; + } + + currentContent += (currentContent ? "\n\n" : "") + transcript; + } + + // Write remaining content + if (currentContent.length > 0) { + const filename = `session-transcripts-${String(fileIndex).padStart(3, "0")}.txt`; + writeFileSync(join(outputDir, filename), currentContent); + outputFiles.push(filename); + console.log(`Wrote ${filename} (${currentContent.length} chars)`); + } + + console.log(`\nCreated ${outputFiles.length} transcript file(s) in ${outputDir}`); + + if (!analyzeFlag) { + console.log("\nRun with --analyze to spawn pi subagents for pattern analysis."); + return; + } + + // Find AGENTS.md files to compare against + const globalAgentsMd = join(homedir(), ".pi/agent/AGENTS.md"); + const localAgentsMd = join(cwd, "AGENTS.md"); + const agentsMdFiles = [globalAgentsMd, localAgentsMd].filter(existsSync); + const agentsMdSection = + agentsMdFiles.length > 0 + ? `STEP 1: Read the existing AGENTS.md file(s) to see what's already encoded:\n${agentsMdFiles.join("\n")}\n\nSTEP 2: ` + : ""; + + // Spawn subagents to analyze each file + const analysisPrompt = `You are analyzing session transcripts to identify recurring user instructions that could be automated. + +${agentsMdSection}READING THE TRANSCRIPT: +The transcript file is large. Read it in chunks of 1000 lines using offset/limit parameters: +1. First: read with limit=1000 (lines 1-1000) +2. Then: read with offset=1001, limit=1000 (lines 1001-2000) +3. Continue incrementing offset by 1000 until you reach the end +4. Only after reading the ENTIRE file, perform the analysis and write the summary + +ANALYSIS TASK: +Look for patterns where the user repeatedly gives similar instructions. These could become: +- AGENTS.md entries: coding style rules, behavior guidelines, project conventions +- Skills: multi-step workflows with external tools (search, browser, APIs) +- Prompt templates: reusable prompts for common tasks + +Compare each pattern against the existing AGENTS.md content to determine if it's NEW or EXISTING. + +OUTPUT FORMAT (strict): +Write a file with exactly this structure. Use --- as separator between patterns. + +PATTERN: +STATUS: NEW | EXISTING +TYPE: agents-md | skill | prompt-template +FREQUENCY: +EVIDENCE: +- "" +- "" +- "" +DRAFT: + +--- + +Rules: +- Only include patterns that appear 2+ times +- STATUS is NEW if not in AGENTS.md, EXISTING if already covered +- EVIDENCE must contain exact quotes from the transcripts +- DRAFT must be ready-to-use content +- If no patterns found, write "NO PATTERNS FOUND" +- Do not include any other text outside this format`; + + console.log("\nSpawning subagents for analysis..."); + for (const file of outputFiles) { + const summaryFile = file.replace(".txt", ".summary.txt"); + const filePath = join(outputDir, file); + const summaryPath = join(outputDir, summaryFile); + + const fileContent = readFileSync(filePath, "utf8"); + const fileSize = fileContent.length; + + console.log(`Analyzing ${file} (${fileSize} chars)...`); + + const lineCount = fileContent.split("\n").length; + const fullPrompt = `${analysisPrompt}\n\nThe file ${filePath} has ${lineCount} lines. Read it in full using chunked reads, then write your analysis to ${summaryPath}`; + + const result = await runSubagent(fullPrompt, outputDir); + + if (result.success && existsSync(summaryPath)) { + console.log(chalk.green(` -> ${summaryFile}`)); + } else if (result.success) { + console.error(chalk.yellow(` Agent finished but did not write ${summaryFile}`)); + } else { + console.error(chalk.red(` Failed to analyze ${file}`)); + } + } + + // Collect all created summary files + const summaryFiles = readdirSync(outputDir) + .filter((f) => f.endsWith(".summary.txt")) + .sort(); + + console.log(`\n=== Individual Analysis Complete ===`); + console.log(`Created ${summaryFiles.length} summary files`); + + if (summaryFiles.length === 0) { + console.log(chalk.yellow("No summary files created. Nothing to aggregate.")); + return; + } + + // Final aggregation step + console.log("\nAggregating findings into final summary..."); + + const summaryPaths = summaryFiles.map((f) => join(outputDir, f)).join("\n"); + const finalSummaryPath = join(outputDir, "FINAL-SUMMARY.txt"); + + const aggregationPrompt = `You are aggregating pattern analysis results from multiple summary files. + +STEP 1: Read the existing AGENTS.md file(s) to understand what patterns are already encoded: +${agentsMdFiles.length > 0 ? agentsMdFiles.join("\n") : "(no AGENTS.md files found)"} + +STEP 2: Read ALL of the following summary files: +${summaryPaths} + +STEP 3: Create a consolidated final summary that: +1. Merges duplicate patterns (same pattern found in multiple files) +2. Ranks patterns by total frequency across all files +3. Groups by status (NEW first, then EXISTING) and type +4. Provides the best/most complete DRAFT for each unique pattern +5. Verify STATUS against AGENTS.md content (pattern may be marked NEW in summaries but actually exists) + +OUTPUT FORMAT (strict): +Write the final summary with this structure: + +# NEW PATTERNS (not yet in AGENTS.md) + +## AGENTS.MD: +Total Frequency: +Evidence: +- "" +Draft: + + +## SKILL: +... + +## PROMPT-TEMPLATE: +... + +--- + +# EXISTING PATTERNS (already in AGENTS.md, for reference) + +## +Total Frequency: +Already covered by: + +--- + +# SUMMARY +- New patterns to add: +- Already covered: +- Top 3 new patterns by frequency: + +Write the final summary to ${finalSummaryPath}`; + + const aggregateResult = await runSubagent(aggregationPrompt, outputDir); + + if (aggregateResult.success && existsSync(finalSummaryPath)) { + console.log(chalk.green(`\n=== Final Summary Created ===`)); + console.log(chalk.green(` ${finalSummaryPath}`)); + } else if (aggregateResult.success) { + console.error(chalk.yellow(`Agent finished but did not write final summary`)); + } else { + console.error(chalk.red(`Failed to create final summary`)); + } +} + +main().catch(console.error);