diff --git a/README.md b/README.md index 8a06d71..900320c 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Research agents hallucinate paper details and forget what they learn in a sessio ## Quick Start ```bash -npm install -g @alpha-hub/cli +npm install -g @companion-ai/alpha-hub alpha login # sign in with alphaXiv alpha search "attention mechanism" # search papers alpha get 1706.03762 # fetch paper report @@ -47,6 +47,7 @@ alpha ask 1706.03762 "What datasets were used for evaluation?" | `alpha search ` | Search papers (semantic, keyword, or agentic) | | `alpha get ` | Fetch paper report + local annotation | | `alpha ask ` | Ask a question about a paper | +| `alpha code [path]` | Read files from a paper repository | | `alpha annotate ` | Attach a note to a paper | | `alpha annotate --clear` | Remove a note | | `alpha annotate --list` | List all notes | @@ -75,11 +76,13 @@ Alpha Hub is designed for a loop where agents get better over time. ### Semantic Search -Three search modes — semantic (embedding similarity), keyword (exact terms), and agentic (multi-turn retrieval) — so agents find the right papers regardless of how they phrase the query. +Three search modes — semantic (embedding similarity), keyword (exact terms), and agentic (multi-turn retrieval) — so agents find the right papers regardless of how they phrase the query. `--mode all` runs all three in parallel for maximum recall. ```bash alpha search "methods for reducing hallucination in LLMs" # semantic alpha search "LoRA" --mode keyword # keyword +alpha search "retrieval-augmented generation for QA" --mode agentic +alpha search "alignment of vision language models" --mode all ``` ### Paper Q&A @@ -94,6 +97,15 @@ alpha ask 2106.09685 "What is the rank used for the low-rank matrices?" Local notes that agents attach to papers — they persist across sessions and appear automatically on future fetches. See the annotation as a gap the agent discovered and recorded so it doesn't repeat the same mistake. +### Repository Reading + +Read files directly from a paper's GitHub repository when the implementation matters. + +```bash +alpha code https://github.com/openai/gpt-2 / +alpha code https://github.com/openai/gpt-2 src/model.py +``` + ## License [MIT](LICENSE) diff --git a/cli/LICENSE b/cli/LICENSE new file mode 100644 index 0000000..850f51c --- /dev/null +++ b/cli/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Companion AI + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 0000000..d721d61 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,30 @@ +# Alpha Hub + +Unofficial alphaXiv-powered CLI and library for research agents. + +## Install + +```bash +npm install -g @companion-ai/alpha-hub +``` + +## Quick Start + +```bash +alpha login +alpha search "attention mechanism" +alpha get 1706.03762 +alpha ask 1706.03762 "What datasets were used for evaluation?" +alpha code https://github.com/openai/gpt-2 / +``` + +## Package Exports + +This package exposes: + +- `alpha` CLI +- `alpha-mcp` CLI +- library helpers from `@companion-ai/alpha-hub/lib` + +Repository: +https://github.com/getcompanion-ai/alpha-hub diff --git a/cli/package.json b/cli/package.json index ad0717b..b30748f 100644 --- a/cli/package.json +++ b/cli/package.json @@ -1,13 +1,38 @@ { - "name": "@alpha-hub/cli", - "version": "0.1.0", - "description": "CLI for Alpha Hub - search papers and share community annotations", + "name": "@companion-ai/alpha-hub", + "version": "0.1.2", + "description": "Unofficial alphaXiv-powered CLI and library for research agents", "type": "module", + "repository": { + "type": "git", + "url": "git+https://github.com/getcompanion-ai/alpha-hub.git", + "directory": "cli" + }, + "homepage": "https://github.com/getcompanion-ai/alpha-hub#readme", + "bugs": { + "url": "https://github.com/getcompanion-ai/alpha-hub/issues" + }, + "exports": { + ".": "./src/index.js", + "./lib": { + "types": "./src/lib/index.d.ts", + "default": "./src/lib/index.js" + }, + "./lib/auth": { + "types": "./src/lib/auth.d.ts", + "default": "./src/lib/auth.js" + }, + "./lib/alphaxiv": "./src/lib/alphaxiv.js", + "./lib/annotations": "./src/lib/annotations.js", + "./lib/papers": "./src/lib/papers.js" + }, "bin": { "alpha": "./bin/alpha", "alpha-mcp": "./bin/alpha-mcp" }, "files": [ + "README.md", + "LICENSE", "bin/", "src/" ], diff --git a/cli/src/commands/code.js b/cli/src/commands/code.js new file mode 100644 index 0000000..f0e0f55 --- /dev/null +++ b/cli/src/commands/code.js @@ -0,0 +1,24 @@ +import { readGithubRepo, disconnect } from '../lib/alphaxiv.js'; +import { output, error } from '../lib/output.js'; + +function formatResult(data) { + const text = typeof data === 'string' ? data : JSON.stringify(data, null, 2); + console.log(text); +} + +export function registerCodeCommand(program) { + program + .command('code [path]') + .description("Read files from a paper's GitHub repository") + .action(async (githubUrl, path, cmdOpts) => { + const opts = { ...program.opts(), ...cmdOpts }; + try { + const result = await readGithubRepo(githubUrl, path || '/'); + output(result, formatResult, opts); + } catch (err) { + error(err.message, opts); + } finally { + await disconnect(); + } + }); +} diff --git a/cli/src/commands/search.js b/cli/src/commands/search.js index c3e1e17..0b236bf 100644 --- a/cli/src/commands/search.js +++ b/cli/src/commands/search.js @@ -1,5 +1,5 @@ import chalk from 'chalk'; -import { searchByEmbedding, searchByKeyword, disconnect } from '../lib/alphaxiv.js'; +import { searchByEmbedding, searchByKeyword, agenticSearch, disconnect } from '../lib/alphaxiv.js'; import { output, error } from '../lib/output.js'; function formatResults(data) { @@ -10,20 +10,29 @@ function formatResults(data) { export function registerSearchCommand(program) { program .command('search ') - .description('Search papers via alphaXiv (semantic + keyword)') - .option('-m, --mode ', 'Search mode: semantic, keyword, both', 'semantic') + .description('Search papers via alphaXiv (semantic, keyword, both, agentic, or all)') + .option('-m, --mode ', 'Search mode: semantic, keyword, both, agentic, all', 'semantic') .action(async (query, cmdOpts) => { const opts = { ...program.opts(), ...cmdOpts }; try { let results; if (opts.mode === 'keyword') { results = await searchByKeyword(query); + } else if (opts.mode === 'agentic') { + results = await agenticSearch(query); } else if (opts.mode === 'both') { const [semantic, keyword] = await Promise.all([ searchByEmbedding(query), searchByKeyword(query), ]); results = { semantic, keyword }; + } else if (opts.mode === 'all') { + const [semantic, keyword, agentic] = await Promise.all([ + searchByEmbedding(query), + searchByKeyword(query), + agenticSearch(query), + ]); + results = { semantic, keyword, agentic }; } else { results = await searchByEmbedding(query); } diff --git a/cli/src/index.js b/cli/src/index.js index 889a45d..cfb4c3c 100644 --- a/cli/src/index.js +++ b/cli/src/index.js @@ -7,6 +7,7 @@ import { registerSearchCommand } from './commands/search.js'; import { registerGetCommand } from './commands/get.js'; import { registerAskCommand } from './commands/ask.js'; import { registerAnnotateCommand } from './commands/annotate.js'; +import { registerCodeCommand } from './commands/code.js'; import { registerLoginCommand, registerLogoutCommand } from './commands/login.js'; const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -21,9 +22,12 @@ ${chalk.bold.underline('Usage')} ${chalk.dim('$')} alpha search "transformer attention mechanisms" ${chalk.dim('# semantic search')} ${chalk.dim('$')} alpha search "LoRA" --mode keyword ${chalk.dim('# keyword search')} + ${chalk.dim('$')} alpha search "hallucination in LLMs" --mode agentic ${chalk.dim('# agentic retrieval')} + ${chalk.dim('$')} alpha search "RAG for QA" --mode all ${chalk.dim('# semantic + keyword + agentic')} ${chalk.dim('$')} alpha get 1706.03762 ${chalk.dim('# paper content + annotation')} ${chalk.dim('$')} alpha get https://arxiv.org/abs/2106.09685 ${chalk.dim('# by URL')} ${chalk.dim('$')} alpha ask 1706.03762 "How does attention work?" ${chalk.dim('# ask about a paper')} + ${chalk.dim('$')} alpha code https://github.com/openai/gpt-2 / ${chalk.dim('# inspect repo structure')} ${chalk.dim('$')} alpha annotate 1706.03762 "key insight" ${chalk.dim('# save a note')} ${chalk.dim('$')} alpha annotate --list ${chalk.dim('# see all notes')} @@ -31,9 +35,10 @@ ${chalk.bold.underline('Commands')} ${chalk.bold('login')} Log in to alphaXiv (opens browser) ${chalk.bold('logout')} Log out - ${chalk.bold('search')} Search papers (semantic, keyword, or agentic) + ${chalk.bold('search')} Search papers (semantic, keyword, both, agentic, or all) ${chalk.bold('get')} Paper content + local annotation ${chalk.bold('ask')} Ask a question about a paper + ${chalk.bold('code')} [path] Read files from a paper repository ${chalk.bold('annotate')} [paper-id] [note] Save a note — appears on future fetches ${chalk.bold('annotate')} --clear Remove a note ${chalk.bold('annotate')} --list List all notes @@ -41,7 +46,7 @@ ${chalk.bold.underline('Commands')} ${chalk.bold.underline('Flags')} --json JSON output (for agents and piping) - -m, --mode Search mode: semantic, keyword, both (default: semantic) + -m, --mode Search mode: semantic, keyword, both, agentic, all (default: semantic) --full-text Get raw text instead of AI report (for get) `); } @@ -62,6 +67,7 @@ registerLogoutCommand(program); registerSearchCommand(program); registerGetCommand(program); registerAskCommand(program); +registerCodeCommand(program); registerAnnotateCommand(program); program.parse(); diff --git a/cli/src/lib/alphaxiv.js b/cli/src/lib/alphaxiv.js index 5306b7d..bc582eb 100644 --- a/cli/src/lib/alphaxiv.js +++ b/cli/src/lib/alphaxiv.js @@ -83,6 +83,16 @@ export async function agenticSearch(query) { return await callTool('agentic_paper_retrieval', { query }); } +export async function searchAll(query) { + const [semantic, keyword, agentic] = await Promise.all([ + searchByEmbedding(query), + searchByKeyword(query), + agenticSearch(query), + ]); + + return { semantic, keyword, agentic }; +} + export async function getPaperContent(url, { fullText = false } = {}) { const args = { url }; if (fullText) args.fullText = true; @@ -90,7 +100,15 @@ export async function getPaperContent(url, { fullText = false } = {}) { } export async function answerPdfQuery(url, query) { - return await callTool('answer_pdf_queries', { url, query }); + try { + return await callTool('answer_pdf_queries', { urls: [url], queries: [query] }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + if (message.includes('Input validation error') || message.includes('Invalid arguments')) { + return await callTool('answer_pdf_queries', { url, query }); + } + throw err; + } } export async function readGithubRepo(githubUrl, path = '/') { diff --git a/cli/src/lib/auth.d.ts b/cli/src/lib/auth.d.ts new file mode 100644 index 0000000..f0699ae --- /dev/null +++ b/cli/src/lib/auth.d.ts @@ -0,0 +1,11 @@ +export declare function getAccessToken(): string | null; +export declare function getUserId(): string | null; +export declare function getUserName(): string | null; +export declare function refreshAccessToken(): Promise; +export declare function login(): Promise<{ + tokens: unknown; + userInfo: unknown; +}>; +export declare function getValidToken(): Promise; +export declare function isLoggedIn(): boolean; +export declare function logout(): void; diff --git a/cli/src/lib/index.d.ts b/cli/src/lib/index.d.ts new file mode 100644 index 0000000..f442720 --- /dev/null +++ b/cli/src/lib/index.d.ts @@ -0,0 +1,74 @@ +export declare function disconnect(): Promise; +export declare function getUserName(): string | null; +export declare function isLoggedIn(): boolean; +export declare function login(): Promise<{ + tokens: unknown; + userInfo: unknown; +}>; +export declare function logout(): void; +export declare function normalizePaperId(input: string): string; + +export declare function searchAll(query: string): Promise; +export declare function searchByEmbedding(query: string): Promise; +export declare function searchByKeyword(query: string): Promise; +export declare function agenticSearch(query: string): Promise; + +export declare function searchPapers( + query: string, + mode?: "semantic" | "keyword" | "both" | "agentic" | "all" | string, +): Promise; + +export declare function getPaper( + identifier: string, + options?: { fullText?: boolean }, +): Promise<{ + paperId: string; + url: string; + content: unknown; + annotation: unknown; +}>; + +export declare function askPaper( + identifier: string, + question: string, +): Promise<{ + paperId: string; + url: string; + question: string; + answer: unknown; +}>; + +export declare function annotatePaper( + identifier: string, + note: string, +): Promise<{ + status: "saved"; + annotation: unknown; +}>; + +export declare function clearPaperAnnotation( + identifier: string, +): Promise<{ + status: "cleared" | "not_found"; + paperId: string; +}>; + +export declare function getPaperAnnotation( + identifier: string, +): Promise<{ + status: "found" | "no_annotation"; + annotation?: unknown; + paperId?: string; +}>; + +export declare function listPaperAnnotations(): Promise<{ + total: number; + annotations: unknown[]; +}>; + +export declare function readPaperCode(githubUrl: string, path?: string): Promise; + +export declare function readAnnotation(id: string): unknown; +export declare function writeAnnotation(id: string, note: string): unknown; +export declare function clearAnnotation(id: string): boolean; +export declare function listAnnotations(): unknown[]; diff --git a/cli/src/lib/index.js b/cli/src/lib/index.js new file mode 100644 index 0000000..edeea30 --- /dev/null +++ b/cli/src/lib/index.js @@ -0,0 +1,176 @@ +import { + agenticSearch, + answerPdfQuery, + disconnect, + getPaperContent, + readGithubRepo, + searchAll, + searchByEmbedding, + searchByKeyword, +} from './alphaxiv.js'; +import { + clearAnnotation, + listAnnotations, + readAnnotation, + writeAnnotation, +} from './annotations.js'; +import { getUserName, isLoggedIn, login, logout } from './auth.js'; +import { normalizePaperId, toArxivUrl } from './papers.js'; + +export { + disconnect, + getUserName, + isLoggedIn, + login, + logout, + normalizePaperId, + searchAll, + searchByEmbedding, + searchByKeyword, + agenticSearch, + readAnnotation, + writeAnnotation, + clearAnnotation, + listAnnotations, + readGithubRepo, +}; + +function parseMetricNumber(fragment, label) { + const match = fragment.match(new RegExp(`(\\d+)\\s+${label}`, 'i')); + return match ? Number(match[1]) : null; +} + +function parsePublishedAt(fragment) { + const match = fragment.match(/Published on ([^,]+)(?:,|$)/i); + return match ? match[1].trim() : null; +} + +function parsePaperListText(text) { + if (typeof text !== 'string') { + return { raw: text, results: [] }; + } + + const blocks = text + .split(/\n(?=\d+\.\s+\*\*)/g) + .map((block) => block.trim()) + .filter(Boolean); + + const results = blocks.map((block, index) => { + const lines = block.split('\n').map((line) => line.trim()).filter(Boolean); + const header = lines[0] || ''; + const headerMatch = header.match(/^\d+\.\s+\*\*(.+?)\*\*\s+\((.+)\)$/); + + const fieldValue = (prefix) => { + const line = lines.find((entry) => entry.startsWith(prefix)); + return line ? line.slice(prefix.length).trim() : null; + }; + + const arxivId = fieldValue('- arXiv Id:'); + + return { + rank: index + 1, + title: headerMatch ? headerMatch[1].trim() : header, + visits: headerMatch ? parseMetricNumber(headerMatch[2], 'Visits') : null, + likes: headerMatch ? parseMetricNumber(headerMatch[2], 'Likes') : null, + publishedAt: headerMatch ? parsePublishedAt(headerMatch[2]) : null, + organizations: fieldValue('- Organizations:'), + authors: fieldValue('- Authors:'), + abstract: fieldValue('- Abstract:'), + arxivId, + arxivUrl: arxivId ? `https://arxiv.org/abs/${arxivId}` : null, + alphaXivUrl: arxivId ? `https://www.alphaxiv.org/overview/${arxivId}` : null, + raw: block, + }; + }); + + return { raw: text, results }; +} + +function normalizeSearchPayload(query, mode, payload) { + if (mode === 'all' || mode === 'both') { + const normalized = {}; + for (const [key, value] of Object.entries(payload)) { + normalized[key] = parsePaperListText(value); + } + return { + query, + mode, + ...normalized, + }; + } + + const parsed = parsePaperListText(payload); + return { + query, + mode, + ...parsed, + }; +} + +export async function searchPapers(query, mode = 'semantic') { + if (mode === 'keyword') return normalizeSearchPayload(query, mode, await searchByKeyword(query)); + if (mode === 'agentic') return normalizeSearchPayload(query, mode, await agenticSearch(query)); + if (mode === 'both') { + const [semantic, keyword] = await Promise.all([ + searchByEmbedding(query), + searchByKeyword(query), + ]); + return normalizeSearchPayload(query, mode, { semantic, keyword }); + } + if (mode === 'all') return normalizeSearchPayload(query, mode, await searchAll(query)); + return normalizeSearchPayload(query, mode, await searchByEmbedding(query)); +} + +export async function getPaper(identifier, options = {}) { + const paperId = normalizePaperId(identifier); + const url = toArxivUrl(identifier); + const content = await getPaperContent(url, { fullText: Boolean(options.fullText) }); + const annotation = readAnnotation(paperId); + return { + paperId, + url, + alphaXivUrl: `https://www.alphaxiv.org/overview/${paperId}`, + content, + annotation, + }; +} + +export async function askPaper(identifier, question) { + const paperId = normalizePaperId(identifier); + const url = toArxivUrl(identifier); + const answer = await answerPdfQuery(url, question); + return { + paperId, + url, + alphaXivUrl: `https://www.alphaxiv.org/overview/${paperId}`, + question, + answer, + }; +} + +export async function annotatePaper(identifier, note) { + const paperId = normalizePaperId(identifier); + const annotation = writeAnnotation(paperId, note); + return { status: 'saved', annotation }; +} + +export async function clearPaperAnnotation(identifier) { + const paperId = normalizePaperId(identifier); + const cleared = clearAnnotation(paperId); + return { status: cleared ? 'cleared' : 'not_found', paperId }; +} + +export async function getPaperAnnotation(identifier) { + const paperId = normalizePaperId(identifier); + const annotation = readAnnotation(paperId); + return annotation ? { status: 'found', annotation } : { status: 'no_annotation', paperId }; +} + +export async function listPaperAnnotations() { + const annotations = listAnnotations(); + return { total: annotations.length, annotations }; +} + +export async function readPaperCode(githubUrl, path = '/') { + return readGithubRepo(githubUrl, path); +}