diff --git a/scripts/merge-agent-history-remote.py b/scripts/merge-agent-history-remote.py new file mode 100755 index 0000000..8c44799 --- /dev/null +++ b/scripts/merge-agent-history-remote.py @@ -0,0 +1,470 @@ +#!/usr/bin/env python3 +import json +import os +import shutil +import sys +from pathlib import Path + + +SOURCE_HOME = os.environ.get("AGENT_MERGE_SOURCE_HOME", "/Users/rathi") +TARGET_HOME = os.environ.get("AGENT_MERGE_TARGET_HOME", str(Path.home())) + + +def translate_path(value): + if isinstance(value, str) and (value == SOURCE_HOME or value.startswith(f"{SOURCE_HOME}/")): + return f"{TARGET_HOME}{value[len(SOURCE_HOME):]}" + return value + + +def ensure_parent(path): + path.parent.mkdir(parents=True, exist_ok=True) + + +def read_jsonl(path): + if not path.exists(): + return [] + return [line.rstrip("\n") for line in path.read_text().splitlines() if line.strip()] + + +def write_text(path, text): + ensure_parent(path) + path.write_text(text) + + +def write_json(path, value): + write_text(path, json.dumps(value, indent=2) + "\n") + + +def append_jsonl(path, lines): + if not lines: + return + ensure_parent(path) + with path.open("a") as handle: + for line in lines: + handle.write(line) + handle.write("\n") + + +def translate_project_dir_name(name): + if name == "-Users-rathi": + return "-home-rathi" + if name.startswith("-Users-rathi-"): + return f"-home-rathi-{name[len('-Users-rathi-'):]}" + return name + + +def translate_selected_fields(value, key=None): + if isinstance(value, dict): + return {child_key: translate_selected_fields(child_value, child_key) for child_key, child_value in value.items()} + if isinstance(value, list): + return [translate_selected_fields(item, key) for item in value] + if isinstance(value, str) and key in {"cwd", "project", "projectPath", "originalPath", "rollout_path"}: + return translate_path(value) + return value + + +def extract_claude_prompt(message): + if not isinstance(message, dict): + return "" + content = message.get("content") + if isinstance(content, str): + return content.strip() + if isinstance(content, list): + parts = [] + for item in content: + if isinstance(item, dict): + text = item.get("text") or item.get("content") + if isinstance(text, str): + parts.append(text.strip()) + return " ".join(part for part in parts if part).strip() + return "" + + +def build_claude_entry_from_file(path, project_path): + first_prompt = "" + created = "" + modified = "" + git_branch = "" + is_sidechain = False + message_count = 0 + + for raw_line in path.read_text().splitlines(): + if not raw_line.strip(): + continue + try: + record = json.loads(raw_line) + except json.JSONDecodeError: + continue + + timestamp = record.get("timestamp") + if timestamp and not created: + created = timestamp + if timestamp: + modified = timestamp + if record.get("gitBranch") and not git_branch: + git_branch = record["gitBranch"] + if record.get("isSidechain") is True: + is_sidechain = True + if record.get("type") in {"user", "assistant"}: + message_count += 1 + if record.get("type") == "user" and not first_prompt: + first_prompt = extract_claude_prompt(record.get("message")) + + return { + "sessionId": path.stem, + "fullPath": str(path), + "fileMtime": int(path.stat().st_mtime * 1000), + "firstPrompt": first_prompt, + "messageCount": message_count, + "created": created, + "modified": modified, + "gitBranch": git_branch, + "projectPath": project_path, + "isSidechain": is_sidechain, + } + + +def merge_claude_history(stage_root, target_root): + source = stage_root / "history.jsonl" + target = target_root / "history.jsonl" + existing_keys = set() + + for raw_line in read_jsonl(target): + try: + record = json.loads(raw_line) + except json.JSONDecodeError: + continue + existing_keys.add((record.get("timestamp"), record.get("sessionId"), record.get("display"), record.get("project"))) + + additions = [] + for raw_line in read_jsonl(source): + try: + record = translate_selected_fields(json.loads(raw_line)) + except json.JSONDecodeError: + continue + key = (record.get("timestamp"), record.get("sessionId"), record.get("display"), record.get("project")) + if key in existing_keys: + continue + existing_keys.add(key) + additions.append(json.dumps(record, ensure_ascii=False)) + + append_jsonl(target, additions) + + +def merge_claude_transcripts(stage_root, target_root): + source_dir = stage_root / "transcripts" + target_dir = target_root / "transcripts" + if not source_dir.exists(): + return + target_dir.mkdir(parents=True, exist_ok=True) + for source in source_dir.rglob("*"): + if not source.is_file(): + continue + destination = target_dir / source.relative_to(source_dir) + ensure_parent(destination) + shutil.copy2(source, destination) + + +def copy_transformed_claude_jsonl(source, destination): + ensure_parent(destination) + with source.open() as reader, destination.open("w") as writer: + for raw_line in reader: + if not raw_line.strip(): + writer.write(raw_line) + continue + try: + record = translate_selected_fields(json.loads(raw_line)) + except json.JSONDecodeError: + writer.write(raw_line) + continue + writer.write(json.dumps(record, ensure_ascii=False)) + writer.write("\n") + + +def merge_claude_projects(stage_root, target_root): + source_projects = stage_root / "projects" + target_projects = target_root / "projects" + if not source_projects.exists(): + return + target_projects.mkdir(parents=True, exist_ok=True) + + for source_project in source_projects.iterdir(): + if not source_project.is_dir(): + continue + + target_project = target_projects / translate_project_dir_name(source_project.name) + target_project.mkdir(parents=True, exist_ok=True) + + for source in source_project.rglob("*"): + if not source.is_file(): + continue + relative = source.relative_to(source_project) + if relative.name == "sessions-index.json": + continue + destination = target_project / relative + if source.suffix == ".jsonl": + copy_transformed_claude_jsonl(source, destination) + else: + ensure_parent(destination) + shutil.copy2(source, destination) + + target_index = target_project / "sessions-index.json" + existing_index = {} + if target_index.exists(): + try: + existing_index = json.loads(target_index.read_text()) + except json.JSONDecodeError: + existing_index = {} + + source_index = {} + stage_index_path = source_project / "sessions-index.json" + if stage_index_path.exists(): + try: + source_index = json.loads(stage_index_path.read_text()) + except json.JSONDecodeError: + source_index = {} + + metadata_by_filename = {} + for index_data in [existing_index, source_index]: + for entry in index_data.get("entries", []): + filename = Path(entry.get("fullPath", "")).name + if not filename: + continue + entry = translate_selected_fields(entry) + entry["fullPath"] = str(target_project / filename) + candidate = target_project / filename + if candidate.exists(): + entry["fileMtime"] = int(candidate.stat().st_mtime * 1000) + metadata_by_filename[filename] = entry + + original_path = translate_path(source_index.get("originalPath") or existing_index.get("originalPath") or "") + + entries = [] + for candidate in sorted(target_project.glob("*.jsonl")): + entry = metadata_by_filename.get(candidate.name) + if entry is None: + project_path = original_path + if not project_path: + for raw_line in candidate.read_text().splitlines(): + if not raw_line.strip(): + continue + try: + record = json.loads(raw_line) + except json.JSONDecodeError: + continue + if isinstance(record.get("cwd"), str): + project_path = record["cwd"] + break + entry = build_claude_entry_from_file(candidate, project_path) + else: + entry = {**entry, "fullPath": str(candidate), "fileMtime": int(candidate.stat().st_mtime * 1000)} + entries.append(entry) + if not original_path and entry.get("projectPath"): + original_path = entry["projectPath"] + + write_json( + target_index, + { + "version": 1, + "entries": entries, + "originalPath": original_path, + }, + ) + + +def merge_codex_history(stage_root, target_root): + source = stage_root / "history.jsonl" + target = target_root / "history.jsonl" + existing_keys = set() + + for raw_line in read_jsonl(target): + try: + record = json.loads(raw_line) + except json.JSONDecodeError: + continue + existing_keys.add((record.get("session_id"), record.get("ts"), record.get("text"))) + + additions = [] + for raw_line in read_jsonl(source): + try: + record = json.loads(raw_line) + except json.JSONDecodeError: + continue + key = (record.get("session_id"), record.get("ts"), record.get("text")) + if key in existing_keys: + continue + existing_keys.add(key) + additions.append(json.dumps(record, ensure_ascii=False)) + + append_jsonl(target, additions) + + +def transform_codex_record(record): + record = translate_selected_fields(record) + + if record.get("type") == "session_meta": + payload = record.get("payload") + if isinstance(payload, dict) and isinstance(payload.get("cwd"), str): + payload["cwd"] = translate_path(payload["cwd"]) + + if record.get("type") == "response_item": + payload = record.get("payload") + if isinstance(payload, dict) and payload.get("type") == "message": + for item in payload.get("content", []): + if isinstance(item, dict) and item.get("type") == "input_text" and isinstance(item.get("text"), str): + if "" in item["text"] and "" in item["text"]: + item["text"] = item["text"].replace(SOURCE_HOME, TARGET_HOME) + + return record + + +def merge_codex_sessions(stage_root, target_root): + source_dir = stage_root / "sessions" + target_dir = target_root / "sessions" + if not source_dir.exists(): + return + target_dir.mkdir(parents=True, exist_ok=True) + + for source in source_dir.rglob("*"): + if not source.is_file(): + continue + destination = target_dir / source.relative_to(source_dir) + ensure_parent(destination) + with source.open() as reader, destination.open("w") as writer: + for raw_line in reader: + if not raw_line.strip(): + writer.write(raw_line) + continue + try: + record = transform_codex_record(json.loads(raw_line)) + except json.JSONDecodeError: + writer.write(raw_line) + continue + writer.write(json.dumps(record, ensure_ascii=False)) + writer.write("\n") + + +def merge_codex_session_index(stage_root, target_root): + source = stage_root / "session_index.jsonl" + target = target_root / "session_index.jsonl" + merged = {} + + for current in [target, source]: + for raw_line in read_jsonl(current): + try: + record = json.loads(raw_line) + except json.JSONDecodeError: + continue + identifier = record.get("id") + if identifier: + merged[identifier] = record + + ordered = sorted(merged.values(), key=lambda item: (item.get("updated_at") or "", item.get("id") or "")) + write_text(target, "".join(f"{json.dumps(item, ensure_ascii=False)}\n" for item in ordered)) + + +def copy_translated_text_tree(source_dir, target_dir): + if not source_dir.exists(): + return + for source in source_dir.rglob("*"): + if not source.is_file(): + continue + destination = target_dir / source.relative_to(source_dir) + ensure_parent(destination) + write_text(destination, source.read_text().replace(SOURCE_HOME, TARGET_HOME)) + + +def split_markdown_sections(text, prefix): + header_lines = [] + sections = [] + current = None + + for line in text.splitlines(): + if line.startswith(prefix): + if current is not None: + sections.append("\n".join(current).rstrip() + "\n") + current = [line] + elif current is None: + header_lines.append(line) + else: + current.append(line) + + if current is not None: + sections.append("\n".join(current).rstrip() + "\n") + + header = "\n".join(header_lines).rstrip() + if header: + header += "\n\n" + return header, sections + + +def section_identity(section): + return section.splitlines()[0].strip() + + +def merge_markdown_sections(target, source, prefix): + if not source.exists(): + return + + source_text = source.read_text().replace(SOURCE_HOME, TARGET_HOME) + source_header, source_sections = split_markdown_sections(source_text, prefix) + + if target.exists(): + target_text = target.read_text() + target_header, target_sections = split_markdown_sections(target_text, prefix) + else: + target_header, target_sections = "", [] + + header = target_header or source_header + existing_ids = {section_identity(section) for section in target_sections} + merged_sections = [section for section in source_sections if section_identity(section) not in existing_ids] + target_sections + write_text(target, header + "\n".join(section.rstrip() for section in merged_sections if section).rstrip() + "\n") + + +def merge_unique_lines(target, source): + if not source.exists(): + return + + source_lines = source.read_text().replace(SOURCE_HOME, TARGET_HOME).splitlines() + target_lines = target.read_text().splitlines() if target.exists() else [] + existing = set(target_lines) + merged = list(target_lines) + for line in source_lines: + if line not in existing: + merged.append(line) + existing.add(line) + write_text(target, "\n".join(merged).rstrip() + "\n") + + +def merge_codex_memories(stage_root, target_root): + source_dir = stage_root / "memories" + target_dir = target_root / "memories" + if not source_dir.exists(): + return + target_dir.mkdir(parents=True, exist_ok=True) + + copy_translated_text_tree(source_dir / "rollout_summaries", target_dir / "rollout_summaries") + merge_markdown_sections(target_dir / "raw_memories.md", source_dir / "raw_memories.md", "## Thread ") + merge_markdown_sections(target_dir / "MEMORY.md", source_dir / "MEMORY.md", "# Task Group:") + merge_unique_lines(target_dir / "memory_summary.md", source_dir / "memory_summary.md") + + +def main(): + if len(sys.argv) != 2: + raise SystemExit("usage: merge-agent-history-remote.py ") + + stage_root = Path(sys.argv[1]).expanduser() + home = Path(TARGET_HOME) + + merge_claude_history(stage_root / ".claude", home / ".claude") + merge_claude_transcripts(stage_root / ".claude", home / ".claude") + merge_claude_projects(stage_root / ".claude", home / ".claude") + + merge_codex_history(stage_root / ".codex", home / ".codex") + merge_codex_session_index(stage_root / ".codex", home / ".codex") + merge_codex_sessions(stage_root / ".codex", home / ".codex") + merge_codex_memories(stage_root / ".codex", home / ".codex") + + +if __name__ == "__main__": + main() diff --git a/scripts/search-agent-history-remote.sh b/scripts/search-agent-history-remote.sh index 5c7e9bf..7cf9e62 100755 --- a/scripts/search-agent-history-remote.sh +++ b/scripts/search-agent-history-remote.sh @@ -1,14 +1,8 @@ #!/usr/bin/env bash set -euo pipefail -root="${AGENT_HISTORY_ROOT:-$HOME/.local/share/agent-history/raw}" initial_query="${INITIAL_QUERY:-}" -if [[ ! -d "$root" ]]; then - printf 'Agent history root not found: %s\n' "$root" >&2 - exit 1 -fi - search_script="$(mktemp)" cleanup() { rm -f "$search_script" @@ -19,14 +13,30 @@ cat > "$search_script" <<'EOF' #!/usr/bin/env bash set -euo pipefail -root="${AGENT_HISTORY_ROOT:?}" query="${1:-}" if [[ -z "$query" ]]; then exit 0 fi -rg --json --line-number --smart-case --glob '*.jsonl' -- "$query" "$root" 2>/dev/null \ +paths=( + "$HOME/.claude/history.jsonl" + "$HOME/.claude/transcripts" + "$HOME/.claude/projects" + "$HOME/.codex/history.jsonl" + "$HOME/.codex/session_index.jsonl" + "$HOME/.codex/sessions" + "$HOME/.codex/memories" +) + +args=() +for path in "${paths[@]}"; do + [[ -e "$path" ]] && args+=("$path") +done + +[[ "${#args[@]}" -gt 0 ]] || exit 0 + +rg --json --line-number --smart-case --glob '*.jsonl' --glob '*.md' -- "$query" "${args[@]}" 2>/dev/null \ | jq -r ' select(.type == "match") | [ @@ -39,7 +49,6 @@ rg --json --line-number --smart-case --glob '*.jsonl' -- "$query" "$root" 2>/dev EOF chmod +x "$search_script" -export AGENT_HISTORY_ROOT="$root" fzf --phony --ansi --disabled \ --query "$initial_query" \ @@ -56,6 +65,6 @@ fzf --phony --ansi --disabled \ sed -n "${start},${end}p" "$file" ' \ --preview-window=right:70%:wrap \ - --header 'Type to search archived Claude and Codex logs on netty' \ + --header 'Type to search netty default Claude and Codex state' \ --bind "start:reload:$search_script {q} || true" \ --bind "change:reload:sleep 0.1; $search_script {q} || true" diff --git a/scripts/search-agent-history.sh b/scripts/search-agent-history.sh index a3336ba..2191696 100755 --- a/scripts/search-agent-history.sh +++ b/scripts/search-agent-history.sh @@ -2,11 +2,9 @@ set -euo pipefail remote="${AGENT_HISTORY_REMOTE:-netty}" -remote_root="${AGENT_HISTORY_REMOTE_ROOT:-/home/rathi/.local/share/agent-history/raw}" initial_query="${1:-}" script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -remote_root_q="$(printf '%q' "$remote_root")" initial_query_q="$(printf '%q' "$initial_query")" -ssh -t "$remote" "AGENT_HISTORY_ROOT=${remote_root_q} INITIAL_QUERY=${initial_query_q} bash -s" < "${script_dir}/search-agent-history-remote.sh" +ssh -t "$remote" "INITIAL_QUERY=${initial_query_q} bash -s" < "${script_dir}/search-agent-history-remote.sh" diff --git a/scripts/sync-agent-history.sh b/scripts/sync-agent-history.sh index 8d7b471..55bdd62 100755 --- a/scripts/sync-agent-history.sh +++ b/scripts/sync-agent-history.sh @@ -2,7 +2,7 @@ set -euo pipefail remote="${AGENT_HISTORY_REMOTE:-netty}" -remote_root="${AGENT_HISTORY_REMOTE_ROOT:-/home/rathi/.local/share/agent-history/raw/darwin}" +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" local_rsync="$(command -v rsync || true)" remote_rsync="$(ssh "$remote" 'command -v rsync || true')" @@ -17,14 +17,20 @@ if [[ -z "$remote_rsync" ]]; then exit 1 fi -remote_root_q="$(printf '%q' "$remote_root")" +remote_stage_root="$( + ssh "$remote" 'mkdir -p /home/rathi/.local/share/agent-history && mktemp -d /home/rathi/.local/share/agent-history/incoming.XXXXXX' +)" +remote_stage_root="$(printf '%s' "$remote_stage_root" | tr -d '\r\n')" +remote_stage_root_q="$(printf '%q' "$remote_stage_root")" ssh "$remote" "mkdir -p \ - ${remote_root_q}/claude \ - ${remote_root_q}/claude/transcripts \ - ${remote_root_q}/claude/projects \ - ${remote_root_q}/codex \ - ${remote_root_q}/codex/sessions" + ${remote_stage_root_q}/.claude \ + ${remote_stage_root_q}/.claude/transcripts \ + ${remote_stage_root_q}/.claude/projects \ + ${remote_stage_root_q}/.codex \ + ${remote_stage_root_q}/.codex/sessions \ + ${remote_stage_root_q}/.codex/memories \ + ${remote_stage_root_q}/.codex/memories/rollout_summaries" sync_path() { local src="$1" @@ -39,11 +45,20 @@ sync_path() { "$local_rsync" -az --rsync-path="$remote_rsync" "$src" "$remote:$dest" } -sync_path "$HOME/.claude/history.jsonl" "${remote_root}/claude/" -sync_path "$HOME/.claude/transcripts/" "${remote_root}/claude/transcripts/" -sync_path "$HOME/.claude/projects/" "${remote_root}/claude/projects/" -sync_path "$HOME/.codex/history.jsonl" "${remote_root}/codex/" -sync_path "$HOME/.codex/session_index.jsonl" "${remote_root}/codex/" -sync_path "$HOME/.codex/sessions/" "${remote_root}/codex/sessions/" +sync_path "$HOME/.claude/history.jsonl" "${remote_stage_root}/.claude/" +sync_path "$HOME/.claude/transcripts/" "${remote_stage_root}/.claude/transcripts/" +sync_path "$HOME/.claude/projects/" "${remote_stage_root}/.claude/projects/" +sync_path "$HOME/.codex/history.jsonl" "${remote_stage_root}/.codex/" +sync_path "$HOME/.codex/session_index.jsonl" "${remote_stage_root}/.codex/" +sync_path "$HOME/.codex/sessions/" "${remote_stage_root}/.codex/sessions/" +sync_path "$HOME/.codex/memories/MEMORY.md" "${remote_stage_root}/.codex/memories/" +sync_path "$HOME/.codex/memories/raw_memories.md" "${remote_stage_root}/.codex/memories/" +sync_path "$HOME/.codex/memories/memory_summary.md" "${remote_stage_root}/.codex/memories/" +sync_path "$HOME/.codex/memories/rollout_summaries/" "${remote_stage_root}/.codex/memories/rollout_summaries/" + +printf 'Merging staged history into %s default harness locations...\n' "$remote" +ssh "$remote" "python3 - ${remote_stage_root_q}" < "${script_dir}/merge-agent-history-remote.py" + +ssh "$remote" "case ${remote_stage_root_q} in /home/rathi/.local/share/agent-history/incoming.*) rm -rf ${remote_stage_root_q} ;; *) exit 1 ;; esac" printf 'Agent history sync complete.\n'