Improve alphaXiv client outputs and exports

This commit is contained in:
Advait Paliwal 2026-03-20 12:03:27 -07:00
parent 9a708a1ab9
commit 270eaa1dc5
11 changed files with 417 additions and 11 deletions

View file

@ -8,7 +8,7 @@ Research agents hallucinate paper details and forget what they learn in a sessio
## Quick Start
```bash
npm install -g @alpha-hub/cli
npm install -g @companion-ai/alpha-hub
alpha login # sign in with alphaXiv
alpha search "attention mechanism" # search papers
alpha get 1706.03762 # fetch paper report
@ -47,6 +47,7 @@ alpha ask 1706.03762 "What datasets were used for evaluation?"
| `alpha search <query>` | Search papers (semantic, keyword, or agentic) |
| `alpha get <id\|url>` | Fetch paper report + local annotation |
| `alpha ask <id\|url> <question>` | Ask a question about a paper |
| `alpha code <github-url> [path]` | Read files from a paper repository |
| `alpha annotate <id> <note>` | Attach a note to a paper |
| `alpha annotate <id> --clear` | Remove a note |
| `alpha annotate --list` | List all notes |
@ -75,11 +76,13 @@ Alpha Hub is designed for a loop where agents get better over time.
### Semantic Search
Three search modes — semantic (embedding similarity), keyword (exact terms), and agentic (multi-turn retrieval) — so agents find the right papers regardless of how they phrase the query.
Three search modes — semantic (embedding similarity), keyword (exact terms), and agentic (multi-turn retrieval) — so agents find the right papers regardless of how they phrase the query. `--mode all` runs all three in parallel for maximum recall.
```bash
alpha search "methods for reducing hallucination in LLMs" # semantic
alpha search "LoRA" --mode keyword # keyword
alpha search "retrieval-augmented generation for QA" --mode agentic
alpha search "alignment of vision language models" --mode all
```
### Paper Q&A
@ -94,6 +97,15 @@ alpha ask 2106.09685 "What is the rank used for the low-rank matrices?"
Local notes that agents attach to papers — they persist across sessions and appear automatically on future fetches. See the annotation as a gap the agent discovered and recorded so it doesn't repeat the same mistake.
### Repository Reading
Read files directly from a paper's GitHub repository when the implementation matters.
```bash
alpha code https://github.com/openai/gpt-2 /
alpha code https://github.com/openai/gpt-2 src/model.py
```
## License
[MIT](LICENSE)

21
cli/LICENSE Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2026 Companion AI
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

30
cli/README.md Normal file
View file

@ -0,0 +1,30 @@
# Alpha Hub
Unofficial alphaXiv-powered CLI and library for research agents.
## Install
```bash
npm install -g @companion-ai/alpha-hub
```
## Quick Start
```bash
alpha login
alpha search "attention mechanism"
alpha get 1706.03762
alpha ask 1706.03762 "What datasets were used for evaluation?"
alpha code https://github.com/openai/gpt-2 /
```
## Package Exports
This package exposes:
- `alpha` CLI
- `alpha-mcp` CLI
- library helpers from `@companion-ai/alpha-hub/lib`
Repository:
https://github.com/getcompanion-ai/alpha-hub

View file

@ -1,13 +1,38 @@
{
"name": "@alpha-hub/cli",
"version": "0.1.0",
"description": "CLI for Alpha Hub - search papers and share community annotations",
"name": "@companion-ai/alpha-hub",
"version": "0.1.2",
"description": "Unofficial alphaXiv-powered CLI and library for research agents",
"type": "module",
"repository": {
"type": "git",
"url": "git+https://github.com/getcompanion-ai/alpha-hub.git",
"directory": "cli"
},
"homepage": "https://github.com/getcompanion-ai/alpha-hub#readme",
"bugs": {
"url": "https://github.com/getcompanion-ai/alpha-hub/issues"
},
"exports": {
".": "./src/index.js",
"./lib": {
"types": "./src/lib/index.d.ts",
"default": "./src/lib/index.js"
},
"./lib/auth": {
"types": "./src/lib/auth.d.ts",
"default": "./src/lib/auth.js"
},
"./lib/alphaxiv": "./src/lib/alphaxiv.js",
"./lib/annotations": "./src/lib/annotations.js",
"./lib/papers": "./src/lib/papers.js"
},
"bin": {
"alpha": "./bin/alpha",
"alpha-mcp": "./bin/alpha-mcp"
},
"files": [
"README.md",
"LICENSE",
"bin/",
"src/"
],

24
cli/src/commands/code.js Normal file
View file

@ -0,0 +1,24 @@
import { readGithubRepo, disconnect } from '../lib/alphaxiv.js';
import { output, error } from '../lib/output.js';
function formatResult(data) {
const text = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
console.log(text);
}
export function registerCodeCommand(program) {
program
.command('code <github-url> [path]')
.description("Read files from a paper's GitHub repository")
.action(async (githubUrl, path, cmdOpts) => {
const opts = { ...program.opts(), ...cmdOpts };
try {
const result = await readGithubRepo(githubUrl, path || '/');
output(result, formatResult, opts);
} catch (err) {
error(err.message, opts);
} finally {
await disconnect();
}
});
}

View file

@ -1,5 +1,5 @@
import chalk from 'chalk';
import { searchByEmbedding, searchByKeyword, disconnect } from '../lib/alphaxiv.js';
import { searchByEmbedding, searchByKeyword, agenticSearch, disconnect } from '../lib/alphaxiv.js';
import { output, error } from '../lib/output.js';
function formatResults(data) {
@ -10,20 +10,29 @@ function formatResults(data) {
export function registerSearchCommand(program) {
program
.command('search <query>')
.description('Search papers via alphaXiv (semantic + keyword)')
.option('-m, --mode <mode>', 'Search mode: semantic, keyword, both', 'semantic')
.description('Search papers via alphaXiv (semantic, keyword, both, agentic, or all)')
.option('-m, --mode <mode>', 'Search mode: semantic, keyword, both, agentic, all', 'semantic')
.action(async (query, cmdOpts) => {
const opts = { ...program.opts(), ...cmdOpts };
try {
let results;
if (opts.mode === 'keyword') {
results = await searchByKeyword(query);
} else if (opts.mode === 'agentic') {
results = await agenticSearch(query);
} else if (opts.mode === 'both') {
const [semantic, keyword] = await Promise.all([
searchByEmbedding(query),
searchByKeyword(query),
]);
results = { semantic, keyword };
} else if (opts.mode === 'all') {
const [semantic, keyword, agentic] = await Promise.all([
searchByEmbedding(query),
searchByKeyword(query),
agenticSearch(query),
]);
results = { semantic, keyword, agentic };
} else {
results = await searchByEmbedding(query);
}

View file

@ -7,6 +7,7 @@ import { registerSearchCommand } from './commands/search.js';
import { registerGetCommand } from './commands/get.js';
import { registerAskCommand } from './commands/ask.js';
import { registerAnnotateCommand } from './commands/annotate.js';
import { registerCodeCommand } from './commands/code.js';
import { registerLoginCommand, registerLogoutCommand } from './commands/login.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
@ -21,9 +22,12 @@ ${chalk.bold.underline('Usage')}
${chalk.dim('$')} alpha search "transformer attention mechanisms" ${chalk.dim('# semantic search')}
${chalk.dim('$')} alpha search "LoRA" --mode keyword ${chalk.dim('# keyword search')}
${chalk.dim('$')} alpha search "hallucination in LLMs" --mode agentic ${chalk.dim('# agentic retrieval')}
${chalk.dim('$')} alpha search "RAG for QA" --mode all ${chalk.dim('# semantic + keyword + agentic')}
${chalk.dim('$')} alpha get 1706.03762 ${chalk.dim('# paper content + annotation')}
${chalk.dim('$')} alpha get https://arxiv.org/abs/2106.09685 ${chalk.dim('# by URL')}
${chalk.dim('$')} alpha ask 1706.03762 "How does attention work?" ${chalk.dim('# ask about a paper')}
${chalk.dim('$')} alpha code https://github.com/openai/gpt-2 / ${chalk.dim('# inspect repo structure')}
${chalk.dim('$')} alpha annotate 1706.03762 "key insight" ${chalk.dim('# save a note')}
${chalk.dim('$')} alpha annotate --list ${chalk.dim('# see all notes')}
@ -31,9 +35,10 @@ ${chalk.bold.underline('Commands')}
${chalk.bold('login')} Log in to alphaXiv (opens browser)
${chalk.bold('logout')} Log out
${chalk.bold('search')} <query> Search papers (semantic, keyword, or agentic)
${chalk.bold('search')} <query> Search papers (semantic, keyword, both, agentic, or all)
${chalk.bold('get')} <url|arxiv-id> Paper content + local annotation
${chalk.bold('ask')} <url|arxiv-id> <question> Ask a question about a paper
${chalk.bold('code')} <github-url> [path] Read files from a paper repository
${chalk.bold('annotate')} [paper-id] [note] Save a note appears on future fetches
${chalk.bold('annotate')} <paper-id> --clear Remove a note
${chalk.bold('annotate')} --list List all notes
@ -41,7 +46,7 @@ ${chalk.bold.underline('Commands')}
${chalk.bold.underline('Flags')}
--json JSON output (for agents and piping)
-m, --mode <mode> Search mode: semantic, keyword, both (default: semantic)
-m, --mode <mode> Search mode: semantic, keyword, both, agentic, all (default: semantic)
--full-text Get raw text instead of AI report (for get)
`);
}
@ -62,6 +67,7 @@ registerLogoutCommand(program);
registerSearchCommand(program);
registerGetCommand(program);
registerAskCommand(program);
registerCodeCommand(program);
registerAnnotateCommand(program);
program.parse();

View file

@ -83,6 +83,16 @@ export async function agenticSearch(query) {
return await callTool('agentic_paper_retrieval', { query });
}
export async function searchAll(query) {
const [semantic, keyword, agentic] = await Promise.all([
searchByEmbedding(query),
searchByKeyword(query),
agenticSearch(query),
]);
return { semantic, keyword, agentic };
}
export async function getPaperContent(url, { fullText = false } = {}) {
const args = { url };
if (fullText) args.fullText = true;
@ -90,7 +100,15 @@ export async function getPaperContent(url, { fullText = false } = {}) {
}
export async function answerPdfQuery(url, query) {
try {
return await callTool('answer_pdf_queries', { urls: [url], queries: [query] });
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (message.includes('Input validation error') || message.includes('Invalid arguments')) {
return await callTool('answer_pdf_queries', { url, query });
}
throw err;
}
}
export async function readGithubRepo(githubUrl, path = '/') {

11
cli/src/lib/auth.d.ts vendored Normal file
View file

@ -0,0 +1,11 @@
export declare function getAccessToken(): string | null;
export declare function getUserId(): string | null;
export declare function getUserName(): string | null;
export declare function refreshAccessToken(): Promise<string | null>;
export declare function login(): Promise<{
tokens: unknown;
userInfo: unknown;
}>;
export declare function getValidToken(): Promise<string | null>;
export declare function isLoggedIn(): boolean;
export declare function logout(): void;

74
cli/src/lib/index.d.ts vendored Normal file
View file

@ -0,0 +1,74 @@
export declare function disconnect(): Promise<void>;
export declare function getUserName(): string | null;
export declare function isLoggedIn(): boolean;
export declare function login(): Promise<{
tokens: unknown;
userInfo: unknown;
}>;
export declare function logout(): void;
export declare function normalizePaperId(input: string): string;
export declare function searchAll(query: string): Promise<unknown>;
export declare function searchByEmbedding(query: string): Promise<unknown>;
export declare function searchByKeyword(query: string): Promise<unknown>;
export declare function agenticSearch(query: string): Promise<unknown>;
export declare function searchPapers(
query: string,
mode?: "semantic" | "keyword" | "both" | "agentic" | "all" | string,
): Promise<unknown>;
export declare function getPaper(
identifier: string,
options?: { fullText?: boolean },
): Promise<{
paperId: string;
url: string;
content: unknown;
annotation: unknown;
}>;
export declare function askPaper(
identifier: string,
question: string,
): Promise<{
paperId: string;
url: string;
question: string;
answer: unknown;
}>;
export declare function annotatePaper(
identifier: string,
note: string,
): Promise<{
status: "saved";
annotation: unknown;
}>;
export declare function clearPaperAnnotation(
identifier: string,
): Promise<{
status: "cleared" | "not_found";
paperId: string;
}>;
export declare function getPaperAnnotation(
identifier: string,
): Promise<{
status: "found" | "no_annotation";
annotation?: unknown;
paperId?: string;
}>;
export declare function listPaperAnnotations(): Promise<{
total: number;
annotations: unknown[];
}>;
export declare function readPaperCode(githubUrl: string, path?: string): Promise<unknown>;
export declare function readAnnotation(id: string): unknown;
export declare function writeAnnotation(id: string, note: string): unknown;
export declare function clearAnnotation(id: string): boolean;
export declare function listAnnotations(): unknown[];

176
cli/src/lib/index.js Normal file
View file

@ -0,0 +1,176 @@
import {
agenticSearch,
answerPdfQuery,
disconnect,
getPaperContent,
readGithubRepo,
searchAll,
searchByEmbedding,
searchByKeyword,
} from './alphaxiv.js';
import {
clearAnnotation,
listAnnotations,
readAnnotation,
writeAnnotation,
} from './annotations.js';
import { getUserName, isLoggedIn, login, logout } from './auth.js';
import { normalizePaperId, toArxivUrl } from './papers.js';
export {
disconnect,
getUserName,
isLoggedIn,
login,
logout,
normalizePaperId,
searchAll,
searchByEmbedding,
searchByKeyword,
agenticSearch,
readAnnotation,
writeAnnotation,
clearAnnotation,
listAnnotations,
readGithubRepo,
};
function parseMetricNumber(fragment, label) {
const match = fragment.match(new RegExp(`(\\d+)\\s+${label}`, 'i'));
return match ? Number(match[1]) : null;
}
function parsePublishedAt(fragment) {
const match = fragment.match(/Published on ([^,]+)(?:,|$)/i);
return match ? match[1].trim() : null;
}
function parsePaperListText(text) {
if (typeof text !== 'string') {
return { raw: text, results: [] };
}
const blocks = text
.split(/\n(?=\d+\.\s+\*\*)/g)
.map((block) => block.trim())
.filter(Boolean);
const results = blocks.map((block, index) => {
const lines = block.split('\n').map((line) => line.trim()).filter(Boolean);
const header = lines[0] || '';
const headerMatch = header.match(/^\d+\.\s+\*\*(.+?)\*\*\s+\((.+)\)$/);
const fieldValue = (prefix) => {
const line = lines.find((entry) => entry.startsWith(prefix));
return line ? line.slice(prefix.length).trim() : null;
};
const arxivId = fieldValue('- arXiv Id:');
return {
rank: index + 1,
title: headerMatch ? headerMatch[1].trim() : header,
visits: headerMatch ? parseMetricNumber(headerMatch[2], 'Visits') : null,
likes: headerMatch ? parseMetricNumber(headerMatch[2], 'Likes') : null,
publishedAt: headerMatch ? parsePublishedAt(headerMatch[2]) : null,
organizations: fieldValue('- Organizations:'),
authors: fieldValue('- Authors:'),
abstract: fieldValue('- Abstract:'),
arxivId,
arxivUrl: arxivId ? `https://arxiv.org/abs/${arxivId}` : null,
alphaXivUrl: arxivId ? `https://www.alphaxiv.org/overview/${arxivId}` : null,
raw: block,
};
});
return { raw: text, results };
}
function normalizeSearchPayload(query, mode, payload) {
if (mode === 'all' || mode === 'both') {
const normalized = {};
for (const [key, value] of Object.entries(payload)) {
normalized[key] = parsePaperListText(value);
}
return {
query,
mode,
...normalized,
};
}
const parsed = parsePaperListText(payload);
return {
query,
mode,
...parsed,
};
}
export async function searchPapers(query, mode = 'semantic') {
if (mode === 'keyword') return normalizeSearchPayload(query, mode, await searchByKeyword(query));
if (mode === 'agentic') return normalizeSearchPayload(query, mode, await agenticSearch(query));
if (mode === 'both') {
const [semantic, keyword] = await Promise.all([
searchByEmbedding(query),
searchByKeyword(query),
]);
return normalizeSearchPayload(query, mode, { semantic, keyword });
}
if (mode === 'all') return normalizeSearchPayload(query, mode, await searchAll(query));
return normalizeSearchPayload(query, mode, await searchByEmbedding(query));
}
export async function getPaper(identifier, options = {}) {
const paperId = normalizePaperId(identifier);
const url = toArxivUrl(identifier);
const content = await getPaperContent(url, { fullText: Boolean(options.fullText) });
const annotation = readAnnotation(paperId);
return {
paperId,
url,
alphaXivUrl: `https://www.alphaxiv.org/overview/${paperId}`,
content,
annotation,
};
}
export async function askPaper(identifier, question) {
const paperId = normalizePaperId(identifier);
const url = toArxivUrl(identifier);
const answer = await answerPdfQuery(url, question);
return {
paperId,
url,
alphaXivUrl: `https://www.alphaxiv.org/overview/${paperId}`,
question,
answer,
};
}
export async function annotatePaper(identifier, note) {
const paperId = normalizePaperId(identifier);
const annotation = writeAnnotation(paperId, note);
return { status: 'saved', annotation };
}
export async function clearPaperAnnotation(identifier) {
const paperId = normalizePaperId(identifier);
const cleared = clearAnnotation(paperId);
return { status: cleared ? 'cleared' : 'not_found', paperId };
}
export async function getPaperAnnotation(identifier) {
const paperId = normalizePaperId(identifier);
const annotation = readAnnotation(paperId);
return annotation ? { status: 'found', annotation } : { status: 'no_annotation', paperId };
}
export async function listPaperAnnotations() {
const annotations = listAnnotations();
return { total: annotations.length, annotations };
}
export async function readPaperCode(githubUrl, path = '/') {
return readGithubRepo(githubUrl, path);
}