From 40901c0f5e5786cdc6e4276bc0ec62cedf17b85c Mon Sep 17 00:00:00 2001 From: Fabian Wagner Date: Wed, 29 Apr 2026 14:26:17 +0200 Subject: [PATCH] =?UTF-8?q?fix(backup):=20stream=20through=20openssl=20pip?= =?UTF-8?q?e=20=E2=80=94=20kills=20500MB+=20memory=20exhaustion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pre-1.1.1 pipeline ran each stage as a separate file step: mysqldump → file.sql xz → file.sql.xz (read whole file into memory) Crypt::encryptString(file_get_contents(file.sql.xz)) The third step blew up with "Allowed memory size exhausted" on mid-three-digit-MB compressed dumps because Laravel's Crypt envelope reads the whole input, base64-encodes (+33%), and JSON-wraps it for the MAC. Peak memory was ~3.5× the file size. New pipeline is one shell pipe: bash -c 'set -o pipefail; mysqldump | xz -3 | openssl enc \ -aes-256-cbc -pbkdf2 -iter 600000 -salt -pass env:WK_KEY > out' Zero PHP-side allocation for the payload — the encryption runs in the openssl process, not the PHP heap. The output file format is the standard `openssl enc -salt` format (starts with "Salted__"), so it's also restorable with vanilla openssl on any host: openssl enc -d -aes-256-cbc -pbkdf2 -iter 600000 -pass env:K \ -in backup.sql.xz.enc | xz -d | mysql ... Encryption is still APP_KEY-derived: the base64-stripped APP_KEY is fed to openssl as the passphrase, PBKDF2 (600k iters) stretches it into the AES key. A backup is still only restorable by a deployment that knows the same APP_KEY. Other changes: - xz default level dropped from 9 to 3 (tunable via --xz-level or config('workkit.backup.xz_level')). For SQL dumps -9 buys a few % size at multiples of the time cost; -3 is the sweet spot. - Restore detects and rejects pre-1.1.1 Crypt-format backups with a clear error (those need a one-off Crypt::decryptString, which would hit the same memory wall, so we don't auto-fall-back). - "bad decrypt" / "bad magic" stderr now translates to "APP_KEY mismatch" so operators don't have to recognise openssl errors. Verified end-to-end against the dev DB (981 blogs / 659 users / 209 roles): backup in 1.2s, openssl-format output, restore into throwaway DB matches every spot-checked row count. Co-Authored-By: Claude Opus 4.7 (1M context) --- config/workkit.php | 3 + src/Commands/Database/BackupCommand.php | 90 ++++------ src/Commands/Database/RestoreCommand.php | 117 +++---------- src/Services/BackupService.php | 199 +++++++++++++++-------- 4 files changed, 186 insertions(+), 223 deletions(-) diff --git a/config/workkit.php b/config/workkit.php index 729801d..72a74bf 100644 --- a/config/workkit.php +++ b/config/workkit.php @@ -16,5 +16,8 @@ return [ 'backup' => [ 'path' => env('WORKKIT_BACKUP_PATH'), // null → storage_path('backups') 'retention_days' => (int) env('WORKKIT_BACKUP_RETENTION_DAYS', 30), + // xz compression level. Lower = faster + larger output. 3 is a + // good default for SQL dumps (~10× ratio at ~3× the speed of -9). + 'xz_level' => (int) env('WORKKIT_BACKUP_XZ_LEVEL', 3), ], ]; diff --git a/src/Commands/Database/BackupCommand.php b/src/Commands/Database/BackupCommand.php index 5024655..3aa77ce 100644 --- a/src/Commands/Database/BackupCommand.php +++ b/src/Commands/Database/BackupCommand.php @@ -9,20 +9,22 @@ use Illuminate\Console\Command; use RuntimeException; /** - * Dump the configured MySQL database to a compressed + APP_KEY-encrypted - * file under storage/backups. Output filename is - * db__.sql.xz.enc + * Stream a MySQL dump through xz and openssl into a single + * APP_KEY-encrypted file under storage/backups. The whole pipeline + * is one shell pipe (mysqldump | xz | openssl); PHP holds zero bytes + * of database content in memory regardless of dump size. * - * The DB password is passed to mysqldump via the MYSQL_PWD env var, not - * on the CLI — process listings (`ps auxf`) don't expose it that way. + * Output filename: + * storage/backups/db__.sql.xz.enc */ class BackupCommand extends Command { protected $signature = 'workkit:db:backup {--connection= : DB connection to back up (defaults to config(database.default))} - {--out= : Custom output path (overrides storage/backups default)}'; + {--out= : Custom output path (overrides storage/backups default)} + {--xz-level= : xz compression level 0–9 (default: 3 — fast, ~10× ratio for SQL)}'; - protected $description = 'Create a compressed + encrypted backup of the configured MySQL database.'; + protected $description = 'Create a streamed, compressed + APP_KEY-encrypted backup of the configured MySQL database.'; public function handle(): int { @@ -38,74 +40,38 @@ class BackupCommand extends Command return self::FAILURE; } - try { - BackupService::requireBinary('mysqldump'); - } catch (RuntimeException $e) { - $this->error($e->getMessage()); - return self::FAILURE; - } - $stamp = date('Y-m-d_H-i-s'); $base = BackupService::backupDirectory(); - $sqlPath = $this->option('out') - ?: "{$base}/db_{$connection}_{$stamp}.sql"; + $outPath = $this->option('out') + ?: "{$base}/db_{$connection}_{$stamp}.sql.xz.enc"; - $this->info("Dumping `{$cfg['database']}` from {$cfg['host']} → {$sqlPath}"); + $xzLevel = (int) ($this->option('xz-level') ?? config('workkit.backup.xz_level', 3)); + $this->info(sprintf( + 'Streaming dump → xz -%d → openssl → %s', + $xzLevel, + $outPath, + )); + + $startedAt = microtime(true); try { - $this->dump($cfg, $sqlPath); - - $this->info('Compressing with xz…'); - $xzPath = BackupService::compressFile($sqlPath); - @unlink($sqlPath); - - $this->info('Encrypting with APP_KEY…'); - $encPath = BackupService::encryptFile($xzPath); - @unlink($xzPath); + BackupService::dumpCompressEncrypt($cfg, $outPath, $xzLevel); } catch (RuntimeException $e) { - // Clean up any half-written intermediate files so a failed - // backup doesn't leave SQL with secrets sitting unencrypted - // on disk. - foreach ([$sqlPath, $sqlPath . '.xz'] as $leftover) { - if (is_file($leftover)) { - @unlink($leftover); - } - } $this->error($e->getMessage()); return self::FAILURE; } - $size = filesize($encPath); - $this->info(sprintf('Backup complete: %s (%s)', $encPath, self::humanBytes((int) $size))); + $elapsed = microtime(true) - $startedAt; + $size = filesize($outPath); + $this->info(sprintf( + 'Backup complete in %.1fs: %s (%s)', + $elapsed, + $outPath, + self::humanBytes((int) $size), + )); return self::SUCCESS; } - /** - * Run mysqldump with credentials passed via env vars (MYSQL_PWD) so - * the password never appears in the process listing or shell history. - */ - private function dump(array $cfg, string $sqlPath): void - { - $args = [ - '--single-transaction', // consistent dump on InnoDB without FLUSH TABLES locking - '--quick', // stream rows instead of buffering - '--skip-lock-tables', - '--user=' . escapeshellarg((string) ($cfg['username'] ?? 'root')), - '--host=' . escapeshellarg((string) ($cfg['host'] ?? 'localhost')), - '--port=' . escapeshellarg((string) ($cfg['port'] ?? 3306)), - escapeshellarg((string) $cfg['database']), - ]; - - $cmd = 'mysqldump ' . implode(' ', $args) . ' > ' . escapeshellarg($sqlPath); - BackupService::run($cmd, [ - 'MYSQL_PWD' => (string) ($cfg['password'] ?? ''), - ]); - - if (! file_exists($sqlPath) || filesize($sqlPath) === 0) { - throw new RuntimeException("mysqldump produced no output at {$sqlPath}"); - } - } - private static function humanBytes(int $bytes): string { $units = ['B', 'KB', 'MB', 'GB', 'TB']; diff --git a/src/Commands/Database/RestoreCommand.php b/src/Commands/Database/RestoreCommand.php index cdb5ab7..2fe369d 100644 --- a/src/Commands/Database/RestoreCommand.php +++ b/src/Commands/Database/RestoreCommand.php @@ -6,18 +6,17 @@ namespace Blax\Workkit\Commands\Database; use Blax\Workkit\Services\BackupService; use Illuminate\Console\Command; -use Illuminate\Contracts\Encryption\DecryptException; use RuntimeException; /** - * Restore a backup produced by workkit:db:backup. Without --file, picks - * the newest backup in storage/backups by mtime. Detects .enc and .xz - * suffixes to decide which decode steps to run, so this also works - * with un-encrypted or un-compressed dumps if the operator restored - * a hand-prepared file. + * Restore a backup produced by workkit:db:backup. Streams the file + * through openssl decrypt → xz decompress → mysql in a single pipe, + * matching the backup pipeline exactly. PHP allocates nothing for the + * payload, so even multi-GB backups restore without bumping memory_limit. * - * Refuses to run in production unless --force is passed: a restore is - * a destructive operation that overwrites the live database. + * Without --file, picks the newest backup in storage/backups by mtime. + * Refuses to run unless --force is passed: a restore overwrites whatever + * is currently in the target database. */ class RestoreCommand extends Command { @@ -26,7 +25,7 @@ class RestoreCommand extends Command {--file= : Specific backup filename inside the backups directory (default: newest by mtime)} {--force : Skip the confirmation prompt}'; - protected $description = 'Restore a (compressed + encrypted) database backup. Defaults to the newest file in storage/backups.'; + protected $description = 'Restore a streaming, APP_KEY-encrypted database backup. Defaults to the newest file in storage/backups.'; public function handle(): int { @@ -42,13 +41,6 @@ class RestoreCommand extends Command return self::FAILURE; } - try { - BackupService::requireBinary('mysql'); - } catch (RuntimeException $e) { - $this->error($e->getMessage()); - return self::FAILURE; - } - $file = $this->resolveFile(); if (! $file) { $this->error('No backup found.'); @@ -72,28 +64,25 @@ class RestoreCommand extends Command return self::SUCCESS; } - $tmpSql = null; + $startedAt = microtime(true); try { - $tmpSql = $this->prepare($file); - $this->info("Restoring → {$cfg['database']}"); - $this->importInto($cfg, $tmpSql); - $this->info('Restore complete.'); - return self::SUCCESS; - } catch (DecryptException $e) { - // Almost always means the file was encrypted with a different - // APP_KEY than the current one. Spell that out so the operator - // doesn't have to recognise the cryptographer's stack trace. - $this->error('Decryption failed — likely an APP_KEY mismatch between backup and current environment.'); - $this->line("Underlying error: {$e->getMessage()}"); - return self::FAILURE; + BackupService::decryptDecompressImport($file, $cfg); } catch (RuntimeException $e) { - $this->error($e->getMessage()); - return self::FAILURE; - } finally { - if ($tmpSql && is_file($tmpSql)) { - @unlink($tmpSql); + $msg = $e->getMessage(); + // openssl's password mismatch error has a recognisable shape; + // translate it into something an operator can act on. + if (str_contains($msg, 'bad decrypt') || str_contains($msg, 'bad magic')) { + $this->error('Decryption failed — likely an APP_KEY mismatch between this host and the host that produced the backup.'); + $this->line($msg); + } else { + $this->error($msg); } + return self::FAILURE; } + + $elapsed = microtime(true) - $startedAt; + $this->info(sprintf('Restore complete in %.1fs.', $elapsed)); + return self::SUCCESS; } /** @@ -122,64 +111,4 @@ class RestoreCommand extends Command usort($files, fn($a, $b) => filemtime($b) <=> filemtime($a)); return $files[0]; } - - /** - * Walk the file through decrypt + decompress as needed and write the - * resulting SQL to a temp path. Returns the path of the .sql file. - */ - private function prepare(string $file): string - { - $current = $file; - $intermediate = []; - - if (str_ends_with($current, '.enc')) { - $this->info('Decrypting…'); - $current = BackupService::decryptFile($current, sys_get_temp_dir() . '/workkit_restore_' . uniqid() . '.dec'); - $intermediate[] = $current; - } - - if (str_ends_with($current, '.xz')) { - $this->info('Decompressing…'); - $next = BackupService::decompressFile($current, sys_get_temp_dir() . '/workkit_restore_' . uniqid() . '.sql'); - // Drop the .xz intermediate now that we have the .sql. - foreach ($intermediate as $f) { - if (is_file($f)) { - @unlink($f); - } - } - $intermediate = []; - $current = $next; - $intermediate[] = $current; - } - - // Sanity check: a real SQL dump always has at least one - // CREATE/INSERT/USE statement near the top. Catches the case - // where APP_KEY didn't match (Crypt::decryptString throws on - // bad key, but if someone hand-renamed a non-encrypted file to - // .enc this still helps). - $head = (string) @file_get_contents($current, false, null, 0, 8192); - if (! preg_match('/(INSERT\s+INTO|CREATE\s+TABLE|USE\s+`)/i', $head)) { - throw new RuntimeException('Decoded file does not look like a SQL dump (no CREATE/INSERT/USE found in header).'); - } - - return $current; - } - - /** - * Pipe the .sql file into the mysql CLI, password via MYSQL_PWD. - */ - private function importInto(array $cfg, string $sqlPath): void - { - $args = [ - '--user=' . escapeshellarg((string) ($cfg['username'] ?? 'root')), - '--host=' . escapeshellarg((string) ($cfg['host'] ?? 'localhost')), - '--port=' . escapeshellarg((string) ($cfg['port'] ?? 3306)), - escapeshellarg((string) $cfg['database']), - ]; - - $cmd = 'mysql ' . implode(' ', $args) . ' < ' . escapeshellarg($sqlPath); - BackupService::run($cmd, [ - 'MYSQL_PWD' => (string) ($cfg['password'] ?? ''), - ]); - } } diff --git a/src/Services/BackupService.php b/src/Services/BackupService.php index 96403be..70d69dd 100644 --- a/src/Services/BackupService.php +++ b/src/Services/BackupService.php @@ -4,92 +4,155 @@ declare(strict_types=1); namespace Blax\Workkit\Services; -use Illuminate\Support\Facades\Crypt; use RuntimeException; /** - * Compression + encryption primitives shared by the backup/restore - * commands. Encryption uses Laravel's Crypt facade, so the AES key is - * derived from APP_KEY — same key that decrypts the rest of the app's - * encrypted columns/cookies. That means a backup is restorable only - * by a deployment that knows the host's APP_KEY. + * Streaming backup pipeline. Dumps, compresses and encrypts in a single + * shell pipe so PHP holds zero bytes of the database content in memory — + * regardless of dump size. The original implementation ran each stage + * separately and ran into "Allowed memory size exhausted" on + * mid-three-digit-MB compressed dumps because Laravel's `Crypt::encryptString` + * reads the whole file, base64-encodes (+33%) and JSON-envelopes it. * - * Compression goes through the system `xz` binary because it gives by - * far the best ratio for repetitive SQL dump text and is cheap to - * stream. Hosts without `xz` installed get a clear failure rather - * than silently falling back to a worse codec. + * Encryption: AES-256-CBC with PBKDF2 (600 000 iterations, random salt) + * via the system `openssl` binary. The passphrase is derived from + * APP_KEY (the `base64:` prefix is stripped, the remainder is used + * verbatim — PBKDF2 stretches it into the key). A backup is restorable + * only by a deployment that knows the same APP_KEY. + * + * The output file format is the standard `openssl enc -salt` format, + * which means it's also restorable with vanilla openssl on any host: + * openssl enc -d -aes-256-cbc -pbkdf2 -iter 600000 -pass env:K \ + * -in backup.sql.xz.enc | xz -d | mysql ... + * + * Required system binaries: `mysqldump`, `mysql`, `xz`, `openssl`, + * `bash` (for `set -o pipefail`). All are standard on every reasonable + * Linux server. */ class BackupService { + public const CIPHER = 'aes-256-cbc'; + public const PBKDF2_ITER = 600000; + /** - * Compress $in with `xz` and return the path of the .xz output. - * Defaults to writing alongside the input. + * mysqldump → xz → openssl enc → $outPath. One pipeline, zero PHP + * memory pressure. Pipefail propagates a failure in any stage out + * to the caller as a non-zero exit code. + * + * $xzLevel defaults to 3 — empirically a good balance for SQL + * (about 10× compression with a fraction of xz -9's time cost). */ - public static function compressFile(string $in, ?string $out = null): string + public static function dumpCompressEncrypt(array $cfg, string $outPath, int $xzLevel = 3): void { - $out ??= $in . '.xz'; + self::requireBinary('mysqldump'); self::requireBinary('xz'); - // -9 for max ratio, -T0 for parallel encoding on whatever cores - // the host has. -c emits to stdout so we don't clobber $in in - // place — the caller decides when to delete it. - self::run(sprintf('xz -z -9 -T0 -c %s > %s', escapeshellarg($in), escapeshellarg($out))); - if (! file_exists($out)) { - throw new RuntimeException("xz compression failed; output not found at {$out}"); + self::requireBinary('openssl'); + self::requireBinary('bash'); + + $level = max(0, min(9, $xzLevel)); + + $mysqldump = 'mysqldump ' + . '--single-transaction --quick --skip-lock-tables ' + . '--user=' . escapeshellarg((string) ($cfg['username'] ?? 'root')) . ' ' + . '--host=' . escapeshellarg((string) ($cfg['host'] ?? 'localhost')) . ' ' + . '--port=' . escapeshellarg((string) ($cfg['port'] ?? 3306)) . ' ' + . escapeshellarg((string) $cfg['database']); + + $xz = "xz -{$level} -T0"; + $openssl = 'openssl enc -' . self::CIPHER . ' -pbkdf2 -iter ' . self::PBKDF2_ITER . ' -salt -pass env:WK_KEY'; + + // bash -c with pipefail: any stage failing trips the whole + // pipeline. Without pipefail, a mysqldump crash with xz still + // running would leave the output file 0-byte and the exit + // code 0 — silent corruption. + $pipeline = "{$mysqldump} | {$xz} | {$openssl} > " . escapeshellarg($outPath); + $cmd = '/bin/bash -c ' . escapeshellarg('set -o pipefail; ' . $pipeline); + + try { + self::run($cmd, [ + 'MYSQL_PWD' => (string) ($cfg['password'] ?? ''), + 'WK_KEY' => self::passphrase(), + ]); + } catch (RuntimeException $e) { + // Don't leave a half-written encrypted file lying around — + // it's neither valid plaintext (which we'd never want) + // nor a complete backup, just confusing partial state. + if (is_file($outPath)) { + @unlink($outPath); + } + throw $e; + } + + if (! file_exists($outPath) || filesize($outPath) === 0) { + throw new RuntimeException("Backup pipeline produced no output at {$outPath}"); } - return $out; } /** - * Decompress an .xz file. If $out is null, strips the `.xz` suffix - * (or generates a uniquely-named sibling if there is none). + * openssl dec → xz -d → mysql. Streams through the same pipeline + * in reverse. Does no PHP-side decoding so the file size is bounded + * only by disk I/O. */ - public static function decompressFile(string $in, ?string $out = null): string + public static function decryptDecompressImport(string $inPath, array $cfg): void { + self::requireBinary('mysql'); self::requireBinary('xz'); - if ($out === null) { - $out = str_ends_with($in, '.xz') - ? substr($in, 0, -3) - : $in . '.decompressed'; + self::requireBinary('openssl'); + self::requireBinary('bash'); + + // Sanity check on the file's magic bytes. `openssl enc -salt` + // output always starts with the literal "Salted__" header. + $head = (string) @file_get_contents($inPath, false, null, 0, 8); + if ($head !== 'Salted__') { + throw new RuntimeException( + "File at {$inPath} doesn't look like a streaming openssl backup. " + . 'Legacy backups (made with the pre-streaming Crypt envelope) need ' + . 'a separate restore path; see workkit:db:restore-legacy or restore ' + . 'manually with `Crypt::decryptString` after raising memory_limit.' + ); } - self::run(sprintf('xz -d -T0 -c %s > %s', escapeshellarg($in), escapeshellarg($out))); - if (! file_exists($out)) { - throw new RuntimeException("xz decompression failed; output not found at {$out}"); - } - return $out; + + $openssl = 'openssl enc -d -' . self::CIPHER + . ' -pbkdf2 -iter ' . self::PBKDF2_ITER + . ' -pass env:WK_KEY ' + . '-in ' . escapeshellarg($inPath); + + $mysql = 'mysql ' + . '--user=' . escapeshellarg((string) ($cfg['username'] ?? 'root')) . ' ' + . '--host=' . escapeshellarg((string) ($cfg['host'] ?? 'localhost')) . ' ' + . '--port=' . escapeshellarg((string) ($cfg['port'] ?? 3306)) . ' ' + . escapeshellarg((string) $cfg['database']); + + $pipeline = "{$openssl} | xz -d -T0 | {$mysql}"; + $cmd = '/bin/bash -c ' . escapeshellarg('set -o pipefail; ' . $pipeline); + + self::run($cmd, [ + 'MYSQL_PWD' => (string) ($cfg['password'] ?? ''), + 'WK_KEY' => self::passphrase(), + ]); } /** - * Encrypt $in with Crypt:: (APP_KEY-derived) and return the path - * of the .enc output. + * Derive the openssl passphrase from APP_KEY. Laravel stores APP_KEY + * as "base64:"; we strip the prefix and feed the rest + * straight to openssl, which runs PBKDF2 over it to get the AES key. + * Same APP_KEY always derives the same key — restore is deterministic. */ - public static function encryptFile(string $in, ?string $out = null): string + public static function passphrase(): string { - $out ??= $in . '.enc'; - $payload = Crypt::encryptString(file_get_contents($in)); - file_put_contents($out, $payload); - return $out; - } - - /** - * Decrypt $in (Crypt:: payload) and write to $out. If $out is null, - * strips the `.enc` suffix. - */ - public static function decryptFile(string $in, ?string $out = null): string - { - if ($out === null) { - $out = str_ends_with($in, '.enc') - ? substr($in, 0, -4) - : $in . '.decrypted'; + $key = (string) config('app.key'); + if ($key === '') { + throw new RuntimeException('APP_KEY is empty. Run `php artisan key:generate` before using the backup commands.'); } - $payload = file_get_contents($in); - file_put_contents($out, Crypt::decryptString($payload)); - return $out; + if (str_starts_with($key, 'base64:')) { + $key = substr($key, 7); + } + return $key; } /** - * Path of the host's backup directory, created if missing. - * Defaults to storage/backups; the host can override via the - * `workkit.backup.path` config (published by the package). + * Path of the host's backup directory, created if missing. Defaults + * to storage/backups; overridable via config('workkit.backup.path'). */ public static function backupDirectory(): string { @@ -101,9 +164,9 @@ class BackupService } /** - * Bail loudly if a required system binary isn't on $PATH. We do - * this early in each command so users get one clear message - * instead of a cryptic exec failure halfway through. + * Bail loudly if a required system binary isn't on PATH. Done early + * in each command so users get one clear message instead of a + * cryptic exec failure halfway through. */ public static function requireBinary(string $bin): void { @@ -114,7 +177,10 @@ class BackupService } /** - * Run a shell command and throw on non-zero exit. + * Run a shell command and throw on non-zero exit, capturing stderr + * for the error message. Env vars are passed via proc_open's env + * arg — they're scoped to the child process and not visible in the + * host's `ps` listing. */ public static function run(string $command, array $env = []): void { @@ -131,15 +197,14 @@ class BackupService } fclose($pipes[0]); - // We don't need stdout — most of these commands write to files. - $stderr = stream_get_contents($pipes[2]); fclose($pipes[1]); + $stderr = stream_get_contents($pipes[2]); fclose($pipes[2]); $exit = proc_close($proc); if ($exit !== 0) { throw new RuntimeException(sprintf( - "Command failed (exit %d): %s\nstderr:\n%s", + "Command failed (exit %d):\n %s\nstderr:\n%s", $exit, self::redactCommand($command), trim((string) $stderr) ?: '(empty)' @@ -149,8 +214,8 @@ class BackupService /** * Hide credential-looking flags in error messages so we don't dump - * passwords to logs. Crude but enough for the legacy CLI flag form; - * the commands themselves prefer MYSQL_PWD env vars. + * passwords to logs. The streaming pipeline doesn't put creds on + * the CLI (everything goes via env vars), but defence in depth. */ private static function redactCommand(string $command): string {