#!/usr/bin/env python3 import json import os import shutil import sys from pathlib import Path SOURCE_HOME = os.environ.get("AGENT_MERGE_SOURCE_HOME", "/Users/rathi") TARGET_HOME = os.environ.get("AGENT_MERGE_TARGET_HOME", str(Path.home())) def translate_path(value): if isinstance(value, str) and (value == SOURCE_HOME or value.startswith(f"{SOURCE_HOME}/")): return f"{TARGET_HOME}{value[len(SOURCE_HOME):]}" return value def ensure_parent(path): path.parent.mkdir(parents=True, exist_ok=True) def read_jsonl(path): if not path.exists(): return [] return [line.rstrip("\n") for line in path.read_text().splitlines() if line.strip()] def write_text(path, text): ensure_parent(path) path.write_text(text) def write_json(path, value): write_text(path, json.dumps(value, indent=2) + "\n") def append_jsonl(path, lines): if not lines: return ensure_parent(path) with path.open("a") as handle: for line in lines: handle.write(line) handle.write("\n") def translate_project_dir_name(name): if name == "-Users-rathi": return "-home-rathi" if name.startswith("-Users-rathi-"): return f"-home-rathi-{name[len('-Users-rathi-'):]}" return name def translate_selected_fields(value, key=None): if isinstance(value, dict): return {child_key: translate_selected_fields(child_value, child_key) for child_key, child_value in value.items()} if isinstance(value, list): return [translate_selected_fields(item, key) for item in value] if isinstance(value, str) and key in {"cwd", "project", "projectPath", "originalPath", "rollout_path"}: return translate_path(value) return value def extract_claude_prompt(message): if not isinstance(message, dict): return "" content = message.get("content") if isinstance(content, str): return content.strip() if isinstance(content, list): parts = [] for item in content: if isinstance(item, dict): text = item.get("text") or item.get("content") if isinstance(text, str): parts.append(text.strip()) return " ".join(part for part in parts if part).strip() return "" def build_claude_entry_from_file(path, project_path): first_prompt = "" created = "" modified = "" git_branch = "" is_sidechain = False message_count = 0 for raw_line in path.read_text().splitlines(): if not raw_line.strip(): continue try: record = json.loads(raw_line) except json.JSONDecodeError: continue timestamp = record.get("timestamp") if timestamp and not created: created = timestamp if timestamp: modified = timestamp if record.get("gitBranch") and not git_branch: git_branch = record["gitBranch"] if record.get("isSidechain") is True: is_sidechain = True if record.get("type") in {"user", "assistant"}: message_count += 1 if record.get("type") == "user" and not first_prompt: first_prompt = extract_claude_prompt(record.get("message")) return { "sessionId": path.stem, "fullPath": str(path), "fileMtime": int(path.stat().st_mtime * 1000), "firstPrompt": first_prompt, "messageCount": message_count, "created": created, "modified": modified, "gitBranch": git_branch, "projectPath": project_path, "isSidechain": is_sidechain, } def merge_claude_history(stage_root, target_root): source = stage_root / "history.jsonl" target = target_root / "history.jsonl" existing_keys = set() for raw_line in read_jsonl(target): try: record = json.loads(raw_line) except json.JSONDecodeError: continue existing_keys.add((record.get("timestamp"), record.get("sessionId"), record.get("display"), record.get("project"))) additions = [] for raw_line in read_jsonl(source): try: record = translate_selected_fields(json.loads(raw_line)) except json.JSONDecodeError: continue key = (record.get("timestamp"), record.get("sessionId"), record.get("display"), record.get("project")) if key in existing_keys: continue existing_keys.add(key) additions.append(json.dumps(record, ensure_ascii=False)) append_jsonl(target, additions) def merge_claude_transcripts(stage_root, target_root): source_dir = stage_root / "transcripts" target_dir = target_root / "transcripts" if not source_dir.exists(): return target_dir.mkdir(parents=True, exist_ok=True) for source in source_dir.rglob("*"): if not source.is_file(): continue destination = target_dir / source.relative_to(source_dir) ensure_parent(destination) shutil.copy2(source, destination) def copy_transformed_claude_jsonl(source, destination): ensure_parent(destination) with source.open() as reader, destination.open("w") as writer: for raw_line in reader: if not raw_line.strip(): writer.write(raw_line) continue try: record = translate_selected_fields(json.loads(raw_line)) except json.JSONDecodeError: writer.write(raw_line) continue writer.write(json.dumps(record, ensure_ascii=False)) writer.write("\n") def merge_claude_projects(stage_root, target_root): source_projects = stage_root / "projects" target_projects = target_root / "projects" if not source_projects.exists(): return target_projects.mkdir(parents=True, exist_ok=True) for source_project in source_projects.iterdir(): if not source_project.is_dir(): continue target_project = target_projects / translate_project_dir_name(source_project.name) target_project.mkdir(parents=True, exist_ok=True) for source in source_project.rglob("*"): if not source.is_file(): continue relative = source.relative_to(source_project) if relative.name == "sessions-index.json": continue destination = target_project / relative if source.suffix == ".jsonl": copy_transformed_claude_jsonl(source, destination) else: ensure_parent(destination) shutil.copy2(source, destination) target_index = target_project / "sessions-index.json" existing_index = {} if target_index.exists(): try: existing_index = json.loads(target_index.read_text()) except json.JSONDecodeError: existing_index = {} source_index = {} stage_index_path = source_project / "sessions-index.json" if stage_index_path.exists(): try: source_index = json.loads(stage_index_path.read_text()) except json.JSONDecodeError: source_index = {} metadata_by_filename = {} for index_data in [existing_index, source_index]: for entry in index_data.get("entries", []): filename = Path(entry.get("fullPath", "")).name if not filename: continue entry = translate_selected_fields(entry) entry["fullPath"] = str(target_project / filename) candidate = target_project / filename if candidate.exists(): entry["fileMtime"] = int(candidate.stat().st_mtime * 1000) metadata_by_filename[filename] = entry original_path = translate_path(source_index.get("originalPath") or existing_index.get("originalPath") or "") entries = [] for candidate in sorted(target_project.glob("*.jsonl")): entry = metadata_by_filename.get(candidate.name) if entry is None: project_path = original_path if not project_path: for raw_line in candidate.read_text().splitlines(): if not raw_line.strip(): continue try: record = json.loads(raw_line) except json.JSONDecodeError: continue if isinstance(record.get("cwd"), str): project_path = record["cwd"] break entry = build_claude_entry_from_file(candidate, project_path) else: entry = {**entry, "fullPath": str(candidate), "fileMtime": int(candidate.stat().st_mtime * 1000)} entries.append(entry) if not original_path and entry.get("projectPath"): original_path = entry["projectPath"] write_json( target_index, { "version": 1, "entries": entries, "originalPath": original_path, }, ) def merge_codex_history(stage_root, target_root): source = stage_root / "history.jsonl" target = target_root / "history.jsonl" existing_keys = set() for raw_line in read_jsonl(target): try: record = json.loads(raw_line) except json.JSONDecodeError: continue existing_keys.add((record.get("session_id"), record.get("ts"), record.get("text"))) additions = [] for raw_line in read_jsonl(source): try: record = json.loads(raw_line) except json.JSONDecodeError: continue key = (record.get("session_id"), record.get("ts"), record.get("text")) if key in existing_keys: continue existing_keys.add(key) additions.append(json.dumps(record, ensure_ascii=False)) append_jsonl(target, additions) def transform_codex_record(record): record = translate_selected_fields(record) if record.get("type") == "session_meta": payload = record.get("payload") if isinstance(payload, dict) and isinstance(payload.get("cwd"), str): payload["cwd"] = translate_path(payload["cwd"]) if record.get("type") == "response_item": payload = record.get("payload") if isinstance(payload, dict) and payload.get("type") == "message": for item in payload.get("content", []): if isinstance(item, dict) and item.get("type") == "input_text" and isinstance(item.get("text"), str): if "" in item["text"] and "" in item["text"]: item["text"] = item["text"].replace(SOURCE_HOME, TARGET_HOME) return record def merge_codex_sessions(stage_root, target_root): source_dir = stage_root / "sessions" target_dir = target_root / "sessions" if not source_dir.exists(): return target_dir.mkdir(parents=True, exist_ok=True) for source in source_dir.rglob("*"): if not source.is_file(): continue destination = target_dir / source.relative_to(source_dir) ensure_parent(destination) with source.open() as reader, destination.open("w") as writer: for raw_line in reader: if not raw_line.strip(): writer.write(raw_line) continue try: record = transform_codex_record(json.loads(raw_line)) except json.JSONDecodeError: writer.write(raw_line) continue writer.write(json.dumps(record, ensure_ascii=False)) writer.write("\n") def merge_codex_session_index(stage_root, target_root): source = stage_root / "session_index.jsonl" target = target_root / "session_index.jsonl" merged = {} for current in [target, source]: for raw_line in read_jsonl(current): try: record = json.loads(raw_line) except json.JSONDecodeError: continue identifier = record.get("id") if identifier: merged[identifier] = record ordered = sorted(merged.values(), key=lambda item: (item.get("updated_at") or "", item.get("id") or "")) write_text(target, "".join(f"{json.dumps(item, ensure_ascii=False)}\n" for item in ordered)) def copy_translated_text_tree(source_dir, target_dir): if not source_dir.exists(): return for source in source_dir.rglob("*"): if not source.is_file(): continue destination = target_dir / source.relative_to(source_dir) ensure_parent(destination) write_text(destination, source.read_text().replace(SOURCE_HOME, TARGET_HOME)) def split_markdown_sections(text, prefix): header_lines = [] sections = [] current = None for line in text.splitlines(): if line.startswith(prefix): if current is not None: sections.append("\n".join(current).rstrip() + "\n") current = [line] elif current is None: header_lines.append(line) else: current.append(line) if current is not None: sections.append("\n".join(current).rstrip() + "\n") header = "\n".join(header_lines).rstrip() if header: header += "\n\n" return header, sections def section_identity(section): return section.splitlines()[0].strip() def merge_markdown_sections(target, source, prefix): if not source.exists(): return source_text = source.read_text().replace(SOURCE_HOME, TARGET_HOME) source_header, source_sections = split_markdown_sections(source_text, prefix) if target.exists(): target_text = target.read_text() target_header, target_sections = split_markdown_sections(target_text, prefix) else: target_header, target_sections = "", [] header = target_header or source_header existing_ids = {section_identity(section) for section in target_sections} merged_sections = [section for section in source_sections if section_identity(section) not in existing_ids] + target_sections write_text(target, header + "\n".join(section.rstrip() for section in merged_sections if section).rstrip() + "\n") def merge_unique_lines(target, source): if not source.exists(): return source_lines = source.read_text().replace(SOURCE_HOME, TARGET_HOME).splitlines() target_lines = target.read_text().splitlines() if target.exists() else [] existing = set(target_lines) merged = list(target_lines) for line in source_lines: if line not in existing: merged.append(line) existing.add(line) write_text(target, "\n".join(merged).rstrip() + "\n") def merge_codex_memories(stage_root, target_root): source_dir = stage_root / "memories" target_dir = target_root / "memories" if not source_dir.exists(): return target_dir.mkdir(parents=True, exist_ok=True) copy_translated_text_tree(source_dir / "rollout_summaries", target_dir / "rollout_summaries") merge_markdown_sections(target_dir / "raw_memories.md", source_dir / "raw_memories.md", "## Thread ") merge_markdown_sections(target_dir / "MEMORY.md", source_dir / "MEMORY.md", "# Task Group:") merge_unique_lines(target_dir / "memory_summary.md", source_dir / "memory_summary.md") def main(): if len(sys.argv) != 2: raise SystemExit("usage: merge-agent-history-remote.py ") stage_root = Path(sys.argv[1]).expanduser() home = Path(TARGET_HOME) merge_claude_history(stage_root / ".claude", home / ".claude") merge_claude_transcripts(stage_root / ".claude", home / ".claude") merge_claude_projects(stage_root / ".claude", home / ".claude") merge_codex_history(stage_root / ".codex", home / ".codex") merge_codex_session_index(stage_root / ".codex", home / ".codex") merge_codex_sessions(stage_root / ".codex", home / ".codex") merge_codex_memories(stage_root / ".codex", home / ".codex") if __name__ == "__main__": main()