Add retry logic for transient login failures

Adds configurable retry mechanism with basic exponential backoff to handle intermittent failures when authenticating to container registries, particularly GCP (GAR/GCR) where I'm seeing errors intermittently.

- Add retry-attempts input (default: 0 for backward compatibility, making it opt in)
- Add retry-delay input (default: 5000ms)
- Implement exponential backoff retry logic in docker login
  - Chose to just write a simple retry function vs. going with a library
- Retry all errors except 5xxs
  - I'm seeing intermittent 401 failures
- Add tests for retry behavior
- Update README with new input parameters

Signed-off-by: Naush Korai <naush.korai@mixpanel.com>
This commit is contained in:
Naush Korai 2026-01-30 13:32:24 -05:00
parent 3227f5311c
commit 47690b2d19
7 changed files with 201 additions and 32 deletions

View file

@ -618,15 +618,17 @@ credentials, while authenticated access is used only to push `myorg/myimage`.
The following inputs can be used as `step.with` keys:
| Name | Type | Default | Description |
|-----------------|--------|-------------|-------------------------------------------------------------------------------|
| `registry` | String | `docker.io` | Server address of Docker registry. If not set then will default to Docker Hub |
| `username` | String | | Username for authenticating to the Docker registry |
| `password` | String | | Password or personal access token for authenticating the Docker registry |
| `scope` | String | | Scope for the authentication token |
| `ecr` | String | `auto` | Specifies whether the given registry is ECR (`auto`, `true` or `false`) |
| `logout` | Bool | `true` | Log out from the Docker registry at the end of a job |
| `registry-auth` | YAML | | Raw authentication to registries, defined as YAML objects |
| Name | Type | Default | Description |
|------------------|--------|-------------|-------------------------------------------------------------------------------|
| `registry` | String | `docker.io` | Server address of Docker registry. If not set then will default to Docker Hub |
| `username` | String | | Username for authenticating to the Docker registry |
| `password` | String | | Password or personal access token for authenticating the Docker registry |
| `scope` | String | | Scope for the authentication token |
| `ecr` | String | `auto` | Specifies whether the given registry is ECR (`auto`, `true` or `false`) |
| `logout` | Bool | `true` | Log out from the Docker registry at the end of a job |
| `registry-auth` | YAML | | Raw authentication to registries, defined as YAML objects |
| `retry-attempts` | Number | `0` | Number of retry attempts for transient failures |
| `retry-delay` | Number | `5000` | Delay between retries in milliseconds (uses exponential backoff) |
> [!NOTE]
> The `registry-auth` input cannot be used with other inputs except `logout`.

View file

@ -2,6 +2,7 @@ import {expect, jest, test} from '@jest/globals';
import * as path from 'path';
import {loginStandard, logout} from '../src/docker';
import {RetryArgs} from '../src/context';
import {Docker} from '@docker/actions-toolkit/lib/docker/docker';
@ -62,3 +63,103 @@ test('logout calls exec', async () => {
ignoreReturnCode: true
});
});
test('loginStandard retries on failure', async () => {
jest.useFakeTimers();
let attemptCount = 0;
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const execSpy = jest.spyOn(Docker, 'getExecOutput').mockImplementation(async () => {
attemptCount++;
if (attemptCount < 3) {
return {
exitCode: 1,
stdout: '',
stderr: 'Error: timeout exceeded'
};
}
return {
exitCode: 0,
stdout: 'Login Succeeded',
stderr: ''
};
});
const username = 'dbowie';
const password = 'groundcontrol';
const registry = 'https://ghcr.io';
const retryArgs: RetryArgs = {attempts: 3, delayMs: 100};
const loginPromise = loginStandard(registry, username, password, undefined, retryArgs);
await jest.runAllTimersAsync();
await loginPromise;
expect(execSpy).toHaveBeenCalledTimes(3);
expect(attemptCount).toBe(3);
jest.useRealTimers();
});
test('loginStandard does not retry when attempts is 0', async () => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const execSpy = jest.spyOn(Docker, 'getExecOutput').mockImplementation(async () => {
return {
exitCode: 1,
stdout: '',
stderr: 'Error: timeout exceeded'
};
});
const username = 'dbowie';
const password = 'groundcontrol';
const registry = 'https://ghcr.io';
const retryArgs: RetryArgs = {attempts: 0, delayMs: 100};
await expect(loginStandard(registry, username, password, undefined, retryArgs)).rejects.toThrow('timeout exceeded');
expect(execSpy).toHaveBeenCalledTimes(1);
});
test('loginStandard fails after max retries', async () => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const execSpy = jest.spyOn(Docker, 'getExecOutput').mockImplementation(async () => {
return {
exitCode: 1,
stdout: '',
stderr: 'Error: timeout exceeded'
};
});
const username = 'dbowie';
const password = 'groundcontrol';
const registry = 'https://ghcr.io';
const retryArgs: RetryArgs = {attempts: 2, delayMs: 10};
await expect(loginStandard(registry, username, password, undefined, retryArgs)).rejects.toThrow('timeout exceeded');
expect(execSpy).toHaveBeenCalledTimes(3);
});
test('loginStandard does not retry on 5xx errors', async () => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const execSpy = jest.spyOn(Docker, 'getExecOutput').mockImplementation(async () => {
return {
exitCode: 1,
stdout: '',
stderr: 'Error: 500 Internal Server Error'
};
});
const username = 'dbowie';
const password = 'groundcontrol';
const registry = 'https://ghcr.io';
const retryArgs: RetryArgs = {attempts: 3, delayMs: 100};
await expect(loginStandard(registry, username, password, undefined, retryArgs)).rejects.toThrow('500 Internal Server Error');
expect(execSpy).toHaveBeenCalledTimes(1);
});

View file

@ -29,6 +29,14 @@ inputs:
registry-auth:
description: 'Raw authentication to registries, defined as YAML objects'
required: false
retry-attempts:
description: 'Number of retry attempts for transient failures'
default: '0'
required: false
retry-delay:
description: 'Delay between retries in milliseconds'
default: '5000'
required: false
runs:
using: 'node20'

2
dist/index.js generated vendored

File diff suppressed because one or more lines are too long

2
dist/index.js.map generated vendored

File diff suppressed because one or more lines are too long

View file

@ -5,6 +5,11 @@ import * as yaml from 'js-yaml';
import {Buildx} from '@docker/actions-toolkit/lib/buildx/buildx';
import {Util} from '@docker/actions-toolkit/lib/util';
export interface RetryArgs {
attempts: number;
delayMs: number;
}
export interface Inputs {
registry: string;
username: string;
@ -13,6 +18,7 @@ export interface Inputs {
ecr: string;
logout: boolean;
registryAuth: string;
retryArgs: RetryArgs;
}
export interface Auth {
@ -22,6 +28,7 @@ export interface Auth {
scope: string;
ecr: string;
configDir: string;
retryArgs: RetryArgs;
}
export function getInputs(): Inputs {
@ -32,7 +39,11 @@ export function getInputs(): Inputs {
scope: core.getInput('scope'),
ecr: core.getInput('ecr'),
logout: core.getBooleanInput('logout'),
registryAuth: core.getInput('registry-auth')
registryAuth: core.getInput('registry-auth'),
retryArgs: {
attempts: parseInt(core.getInput('retry-attempts') || '0', 10),
delayMs: parseInt(core.getInput('retry-delay') || '5000', 10)
}
};
}
@ -48,7 +59,8 @@ export function getAuthList(inputs: Inputs): Array<Auth> {
password: inputs.password,
scope: inputs.scope,
ecr: inputs.ecr || 'auto',
configDir: scopeToConfigDir(inputs.registry, inputs.scope)
configDir: scopeToConfigDir(inputs.registry, inputs.scope),
retryArgs: inputs.retryArgs
});
} else {
auths = (yaml.load(inputs.registryAuth) as Array<Auth>).map(auth => {
@ -59,7 +71,8 @@ export function getAuthList(inputs: Inputs): Array<Auth> {
password: auth.password,
scope: auth.scope,
ecr: auth.ecr || 'auto',
configDir: scopeToConfigDir(auth.registry || 'docker.io', auth.scope)
configDir: scopeToConfigDir(auth.registry || 'docker.io', auth.scope),
retryArgs: auth.retryArgs || inputs.retryArgs
};
});
}

View file

@ -5,11 +5,47 @@ import * as context from './context';
import {Docker} from '@docker/actions-toolkit/lib/docker/docker';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
function isRetryableError(error: Error): boolean {
const errorMsg = error.message.toLowerCase();
const statusCode5xxPattern = /\b5\d{2}\b/;
return !statusCode5xxPattern.test(errorMsg);
}
async function withRetry<T>(fn: () => Promise<T>, retryArgs: context.RetryArgs, context: string): Promise<T> {
const maxAttempts = Math.max(1, retryArgs.attempts + 1);
let lastError: Error;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
return await fn();
} catch (error) {
lastError = error as Error;
if (attempt === maxAttempts || !isRetryableError(lastError)) {
if (attempt > 1) {
core.info(`${context}: Failed after ${attempt} attempts`);
}
throw lastError;
}
const delay = retryArgs.delayMs * Math.pow(2, attempt - 1);
core.warning(`${context}: Attempt ${attempt}/${maxAttempts} failed: ${lastError.message}. Retrying in ${delay}ms...`);
await sleep(delay);
}
}
throw lastError!;
}
export async function login(auth: context.Auth): Promise<void> {
if (/true/i.test(auth.ecr) || (auth.ecr == 'auto' && aws.isECR(auth.registry))) {
await loginECR(auth.registry, auth.username, auth.password, auth.scope);
await loginECR(auth.registry, auth.username, auth.password, auth.scope, auth.retryArgs);
} else {
await loginStandard(auth.registry, auth.username, auth.password, auth.scope);
await loginStandard(auth.registry, auth.username, auth.password, auth.scope, auth.retryArgs);
}
}
@ -33,7 +69,7 @@ export async function logout(registry: string, configDir: string): Promise<void>
});
}
export async function loginStandard(registry: string, username: string, password: string, scope?: string): Promise<void> {
export async function loginStandard(registry: string, username: string, password: string, scope?: string, retryArgs?: context.RetryArgs): Promise<void> {
if (!username && !password) {
throw new Error('Username and password required');
}
@ -43,18 +79,18 @@ export async function loginStandard(registry: string, username: string, password
if (!password) {
throw new Error('Password required');
}
await loginExec(registry, username, password, scope);
await loginExec(registry, username, password, scope, retryArgs);
}
export async function loginECR(registry: string, username: string, password: string, scope?: string): Promise<void> {
export async function loginECR(registry: string, username: string, password: string, scope?: string, retryArgs?: context.RetryArgs): Promise<void> {
core.info(`Retrieving registries data through AWS SDK...`);
const regDatas = await aws.getRegistriesData(registry, username, password);
for (const regData of regDatas) {
await loginExec(regData.registry, regData.username, regData.password, scope);
await loginExec(regData.registry, regData.username, regData.password, scope, retryArgs);
}
}
async function loginExec(registry: string, username: string, password: string, scope?: string): Promise<void> {
async function loginExec(registry: string, username: string, password: string, scope?: string, retryArgs?: context.RetryArgs): Promise<void> {
let envs: {[key: string]: string} | undefined;
const configDir = context.scopeToConfigDir(registry, scope);
if (configDir !== '') {
@ -67,15 +103,24 @@ async function loginExec(registry: string, username: string, password: string, s
} else {
core.info(`Logging into ${registry}...`);
}
await Docker.getExecOutput(['login', '--password-stdin', '--username', username, registry], {
ignoreReturnCode: true,
silent: true,
input: Buffer.from(password),
env: envs
}).then(res => {
if (res.stderr.length > 0 && res.exitCode != 0) {
throw new Error(res.stderr.trim());
}
core.info('Login Succeeded!');
});
const retry = retryArgs || {attempts: 0, delayMs: 5000};
await withRetry(
async () => {
await Docker.getExecOutput(['login', '--password-stdin', '--username', username, registry], {
ignoreReturnCode: true,
silent: true,
input: Buffer.from(password),
env: envs
}).then(res => {
if (res.stderr.length > 0 && res.exitCode != 0) {
throw new Error(res.stderr.trim());
}
core.info('Login Succeeded!');
});
},
retry,
`Login to ${registry}`
);
}