Improve alphaXiv client outputs and exports

This commit is contained in:
Advait Paliwal 2026-03-20 12:03:27 -07:00
parent 9a708a1ab9
commit 270eaa1dc5
11 changed files with 417 additions and 11 deletions

24
cli/src/commands/code.js Normal file
View file

@ -0,0 +1,24 @@
import { readGithubRepo, disconnect } from '../lib/alphaxiv.js';
import { output, error } from '../lib/output.js';
function formatResult(data) {
const text = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
console.log(text);
}
export function registerCodeCommand(program) {
program
.command('code <github-url> [path]')
.description("Read files from a paper's GitHub repository")
.action(async (githubUrl, path, cmdOpts) => {
const opts = { ...program.opts(), ...cmdOpts };
try {
const result = await readGithubRepo(githubUrl, path || '/');
output(result, formatResult, opts);
} catch (err) {
error(err.message, opts);
} finally {
await disconnect();
}
});
}

View file

@ -1,5 +1,5 @@
import chalk from 'chalk';
import { searchByEmbedding, searchByKeyword, disconnect } from '../lib/alphaxiv.js';
import { searchByEmbedding, searchByKeyword, agenticSearch, disconnect } from '../lib/alphaxiv.js';
import { output, error } from '../lib/output.js';
function formatResults(data) {
@ -10,20 +10,29 @@ function formatResults(data) {
export function registerSearchCommand(program) {
program
.command('search <query>')
.description('Search papers via alphaXiv (semantic + keyword)')
.option('-m, --mode <mode>', 'Search mode: semantic, keyword, both', 'semantic')
.description('Search papers via alphaXiv (semantic, keyword, both, agentic, or all)')
.option('-m, --mode <mode>', 'Search mode: semantic, keyword, both, agentic, all', 'semantic')
.action(async (query, cmdOpts) => {
const opts = { ...program.opts(), ...cmdOpts };
try {
let results;
if (opts.mode === 'keyword') {
results = await searchByKeyword(query);
} else if (opts.mode === 'agentic') {
results = await agenticSearch(query);
} else if (opts.mode === 'both') {
const [semantic, keyword] = await Promise.all([
searchByEmbedding(query),
searchByKeyword(query),
]);
results = { semantic, keyword };
} else if (opts.mode === 'all') {
const [semantic, keyword, agentic] = await Promise.all([
searchByEmbedding(query),
searchByKeyword(query),
agenticSearch(query),
]);
results = { semantic, keyword, agentic };
} else {
results = await searchByEmbedding(query);
}

View file

@ -7,6 +7,7 @@ import { registerSearchCommand } from './commands/search.js';
import { registerGetCommand } from './commands/get.js';
import { registerAskCommand } from './commands/ask.js';
import { registerAnnotateCommand } from './commands/annotate.js';
import { registerCodeCommand } from './commands/code.js';
import { registerLoginCommand, registerLogoutCommand } from './commands/login.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
@ -21,9 +22,12 @@ ${chalk.bold.underline('Usage')}
${chalk.dim('$')} alpha search "transformer attention mechanisms" ${chalk.dim('# semantic search')}
${chalk.dim('$')} alpha search "LoRA" --mode keyword ${chalk.dim('# keyword search')}
${chalk.dim('$')} alpha search "hallucination in LLMs" --mode agentic ${chalk.dim('# agentic retrieval')}
${chalk.dim('$')} alpha search "RAG for QA" --mode all ${chalk.dim('# semantic + keyword + agentic')}
${chalk.dim('$')} alpha get 1706.03762 ${chalk.dim('# paper content + annotation')}
${chalk.dim('$')} alpha get https://arxiv.org/abs/2106.09685 ${chalk.dim('# by URL')}
${chalk.dim('$')} alpha ask 1706.03762 "How does attention work?" ${chalk.dim('# ask about a paper')}
${chalk.dim('$')} alpha code https://github.com/openai/gpt-2 / ${chalk.dim('# inspect repo structure')}
${chalk.dim('$')} alpha annotate 1706.03762 "key insight" ${chalk.dim('# save a note')}
${chalk.dim('$')} alpha annotate --list ${chalk.dim('# see all notes')}
@ -31,9 +35,10 @@ ${chalk.bold.underline('Commands')}
${chalk.bold('login')} Log in to alphaXiv (opens browser)
${chalk.bold('logout')} Log out
${chalk.bold('search')} <query> Search papers (semantic, keyword, or agentic)
${chalk.bold('search')} <query> Search papers (semantic, keyword, both, agentic, or all)
${chalk.bold('get')} <url|arxiv-id> Paper content + local annotation
${chalk.bold('ask')} <url|arxiv-id> <question> Ask a question about a paper
${chalk.bold('code')} <github-url> [path] Read files from a paper repository
${chalk.bold('annotate')} [paper-id] [note] Save a note appears on future fetches
${chalk.bold('annotate')} <paper-id> --clear Remove a note
${chalk.bold('annotate')} --list List all notes
@ -41,7 +46,7 @@ ${chalk.bold.underline('Commands')}
${chalk.bold.underline('Flags')}
--json JSON output (for agents and piping)
-m, --mode <mode> Search mode: semantic, keyword, both (default: semantic)
-m, --mode <mode> Search mode: semantic, keyword, both, agentic, all (default: semantic)
--full-text Get raw text instead of AI report (for get)
`);
}
@ -62,6 +67,7 @@ registerLogoutCommand(program);
registerSearchCommand(program);
registerGetCommand(program);
registerAskCommand(program);
registerCodeCommand(program);
registerAnnotateCommand(program);
program.parse();

View file

@ -83,6 +83,16 @@ export async function agenticSearch(query) {
return await callTool('agentic_paper_retrieval', { query });
}
export async function searchAll(query) {
const [semantic, keyword, agentic] = await Promise.all([
searchByEmbedding(query),
searchByKeyword(query),
agenticSearch(query),
]);
return { semantic, keyword, agentic };
}
export async function getPaperContent(url, { fullText = false } = {}) {
const args = { url };
if (fullText) args.fullText = true;
@ -90,7 +100,15 @@ export async function getPaperContent(url, { fullText = false } = {}) {
}
export async function answerPdfQuery(url, query) {
return await callTool('answer_pdf_queries', { url, query });
try {
return await callTool('answer_pdf_queries', { urls: [url], queries: [query] });
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (message.includes('Input validation error') || message.includes('Invalid arguments')) {
return await callTool('answer_pdf_queries', { url, query });
}
throw err;
}
}
export async function readGithubRepo(githubUrl, path = '/') {

11
cli/src/lib/auth.d.ts vendored Normal file
View file

@ -0,0 +1,11 @@
export declare function getAccessToken(): string | null;
export declare function getUserId(): string | null;
export declare function getUserName(): string | null;
export declare function refreshAccessToken(): Promise<string | null>;
export declare function login(): Promise<{
tokens: unknown;
userInfo: unknown;
}>;
export declare function getValidToken(): Promise<string | null>;
export declare function isLoggedIn(): boolean;
export declare function logout(): void;

74
cli/src/lib/index.d.ts vendored Normal file
View file

@ -0,0 +1,74 @@
export declare function disconnect(): Promise<void>;
export declare function getUserName(): string | null;
export declare function isLoggedIn(): boolean;
export declare function login(): Promise<{
tokens: unknown;
userInfo: unknown;
}>;
export declare function logout(): void;
export declare function normalizePaperId(input: string): string;
export declare function searchAll(query: string): Promise<unknown>;
export declare function searchByEmbedding(query: string): Promise<unknown>;
export declare function searchByKeyword(query: string): Promise<unknown>;
export declare function agenticSearch(query: string): Promise<unknown>;
export declare function searchPapers(
query: string,
mode?: "semantic" | "keyword" | "both" | "agentic" | "all" | string,
): Promise<unknown>;
export declare function getPaper(
identifier: string,
options?: { fullText?: boolean },
): Promise<{
paperId: string;
url: string;
content: unknown;
annotation: unknown;
}>;
export declare function askPaper(
identifier: string,
question: string,
): Promise<{
paperId: string;
url: string;
question: string;
answer: unknown;
}>;
export declare function annotatePaper(
identifier: string,
note: string,
): Promise<{
status: "saved";
annotation: unknown;
}>;
export declare function clearPaperAnnotation(
identifier: string,
): Promise<{
status: "cleared" | "not_found";
paperId: string;
}>;
export declare function getPaperAnnotation(
identifier: string,
): Promise<{
status: "found" | "no_annotation";
annotation?: unknown;
paperId?: string;
}>;
export declare function listPaperAnnotations(): Promise<{
total: number;
annotations: unknown[];
}>;
export declare function readPaperCode(githubUrl: string, path?: string): Promise<unknown>;
export declare function readAnnotation(id: string): unknown;
export declare function writeAnnotation(id: string, note: string): unknown;
export declare function clearAnnotation(id: string): boolean;
export declare function listAnnotations(): unknown[];

176
cli/src/lib/index.js Normal file
View file

@ -0,0 +1,176 @@
import {
agenticSearch,
answerPdfQuery,
disconnect,
getPaperContent,
readGithubRepo,
searchAll,
searchByEmbedding,
searchByKeyword,
} from './alphaxiv.js';
import {
clearAnnotation,
listAnnotations,
readAnnotation,
writeAnnotation,
} from './annotations.js';
import { getUserName, isLoggedIn, login, logout } from './auth.js';
import { normalizePaperId, toArxivUrl } from './papers.js';
export {
disconnect,
getUserName,
isLoggedIn,
login,
logout,
normalizePaperId,
searchAll,
searchByEmbedding,
searchByKeyword,
agenticSearch,
readAnnotation,
writeAnnotation,
clearAnnotation,
listAnnotations,
readGithubRepo,
};
function parseMetricNumber(fragment, label) {
const match = fragment.match(new RegExp(`(\\d+)\\s+${label}`, 'i'));
return match ? Number(match[1]) : null;
}
function parsePublishedAt(fragment) {
const match = fragment.match(/Published on ([^,]+)(?:,|$)/i);
return match ? match[1].trim() : null;
}
function parsePaperListText(text) {
if (typeof text !== 'string') {
return { raw: text, results: [] };
}
const blocks = text
.split(/\n(?=\d+\.\s+\*\*)/g)
.map((block) => block.trim())
.filter(Boolean);
const results = blocks.map((block, index) => {
const lines = block.split('\n').map((line) => line.trim()).filter(Boolean);
const header = lines[0] || '';
const headerMatch = header.match(/^\d+\.\s+\*\*(.+?)\*\*\s+\((.+)\)$/);
const fieldValue = (prefix) => {
const line = lines.find((entry) => entry.startsWith(prefix));
return line ? line.slice(prefix.length).trim() : null;
};
const arxivId = fieldValue('- arXiv Id:');
return {
rank: index + 1,
title: headerMatch ? headerMatch[1].trim() : header,
visits: headerMatch ? parseMetricNumber(headerMatch[2], 'Visits') : null,
likes: headerMatch ? parseMetricNumber(headerMatch[2], 'Likes') : null,
publishedAt: headerMatch ? parsePublishedAt(headerMatch[2]) : null,
organizations: fieldValue('- Organizations:'),
authors: fieldValue('- Authors:'),
abstract: fieldValue('- Abstract:'),
arxivId,
arxivUrl: arxivId ? `https://arxiv.org/abs/${arxivId}` : null,
alphaXivUrl: arxivId ? `https://www.alphaxiv.org/overview/${arxivId}` : null,
raw: block,
};
});
return { raw: text, results };
}
function normalizeSearchPayload(query, mode, payload) {
if (mode === 'all' || mode === 'both') {
const normalized = {};
for (const [key, value] of Object.entries(payload)) {
normalized[key] = parsePaperListText(value);
}
return {
query,
mode,
...normalized,
};
}
const parsed = parsePaperListText(payload);
return {
query,
mode,
...parsed,
};
}
export async function searchPapers(query, mode = 'semantic') {
if (mode === 'keyword') return normalizeSearchPayload(query, mode, await searchByKeyword(query));
if (mode === 'agentic') return normalizeSearchPayload(query, mode, await agenticSearch(query));
if (mode === 'both') {
const [semantic, keyword] = await Promise.all([
searchByEmbedding(query),
searchByKeyword(query),
]);
return normalizeSearchPayload(query, mode, { semantic, keyword });
}
if (mode === 'all') return normalizeSearchPayload(query, mode, await searchAll(query));
return normalizeSearchPayload(query, mode, await searchByEmbedding(query));
}
export async function getPaper(identifier, options = {}) {
const paperId = normalizePaperId(identifier);
const url = toArxivUrl(identifier);
const content = await getPaperContent(url, { fullText: Boolean(options.fullText) });
const annotation = readAnnotation(paperId);
return {
paperId,
url,
alphaXivUrl: `https://www.alphaxiv.org/overview/${paperId}`,
content,
annotation,
};
}
export async function askPaper(identifier, question) {
const paperId = normalizePaperId(identifier);
const url = toArxivUrl(identifier);
const answer = await answerPdfQuery(url, question);
return {
paperId,
url,
alphaXivUrl: `https://www.alphaxiv.org/overview/${paperId}`,
question,
answer,
};
}
export async function annotatePaper(identifier, note) {
const paperId = normalizePaperId(identifier);
const annotation = writeAnnotation(paperId, note);
return { status: 'saved', annotation };
}
export async function clearPaperAnnotation(identifier) {
const paperId = normalizePaperId(identifier);
const cleared = clearAnnotation(paperId);
return { status: cleared ? 'cleared' : 'not_found', paperId };
}
export async function getPaperAnnotation(identifier) {
const paperId = normalizePaperId(identifier);
const annotation = readAnnotation(paperId);
return annotation ? { status: 'found', annotation } : { status: 'no_annotation', paperId };
}
export async function listPaperAnnotations() {
const annotations = listAnnotations();
return { total: annotations.length, annotations };
}
export async function readPaperCode(githubUrl, path = '/') {
return readGithubRepo(githubUrl, path);
}