Copying a production database to a development environment without a proper process is either a GDPR violation or a security incident waiting to happen. I show a complete pipeline: fast dump with mydumper, automatic anonymisation of personal data, and DDEV hooks that make the sanitised database available to the whole team with a single command.
Why mysqldump is not enough
# mysqldump on a 10GB Magento database: time mysqldump -u root -p magento > dump.sql # real: 18m42s (single-threaded, serial export) # mydumper - parallel export: time mydumper -u root -p password -B magento -o /tmp/dump/ # real: 2m15s (8 parallel threads) - 8x faster
mydumper – parallel dump and restore
# Install mydumper
apt-get install mydumper # Ubuntu
brew install mydumper # macOS
# Full parallel dump with compression
mydumper \
--host=prod-db.example.com \
--user=backup_user \
--password=secret \
--database=magento \
--outputdir=/tmp/magento-dump \
--compress \
--threads=8 \
--chunk-filesize=100 \ # split tables into 100MB chunks
--verbose=3
# Parallel restore
myloader \
--host=localhost \
--user=magento \
--password=magento \
--database=magento_dev \
--directory=/tmp/magento-dump \
--threads=8 \
--compress-protocol \
--verbose=3
GDPR anonymisation script
<?php
declare(strict_types=1);
// anonymise.php - run after restore, before dev use
// Replaces all PII with fake data
class MagentoAnonymiser
{
public function __construct(
private \PDO $pdo
) {}
public function anonymise(): void
{
$this->pdo->beginTransaction();
try {
$this->anonymiseCustomers();
$this->anonymiseOrders();
$this->anonymiseAddresses();
$this->anonymiseQuotes();
$this->clearSessions();
$this->clearEmails();
$this->pdo->commit();
echo "Anonymisation complete\n";
} catch (\Exception $e) {
$this->pdo->rollBack();
throw $e;
}
}
private function anonymiseCustomers(): void
{
echo "Anonymising customers...\n";
// Replace email with deterministic fake (keeps uniqueness)
$this->pdo->exec("
UPDATE customer_entity
SET
email = CONCAT('customer_', entity_id, '@example.invalid'),
firstname = 'Test',
lastname = CONCAT('User', entity_id),
password_hash = '\$2y\$10\$fakehashfakehashfakehashfakehashfakehashfakeh',
rp_token = NULL,
rp_token_created_at = NULL
");
// Clear EAV attributes with personal data
$this->pdo->exec("
UPDATE customer_entity_varchar
SET value = 'Anonymised'
WHERE attribute_id IN (
SELECT attribute_id FROM eav_attribute
WHERE entity_type_id = 1
AND attribute_code IN ('prefix', 'suffix', 'taxvat', 'middlename')
)
");
}
private function anonymiseOrders(): void
{
echo "Anonymising orders...\n";
$this->pdo->exec("
UPDATE sales_order
SET
customer_email = CONCAT('order_', entity_id, '@example.invalid'),
customer_firstname = 'Test',
customer_lastname = CONCAT('Customer', entity_id),
customer_taxvat = NULL,
remote_ip = '127.0.0.1',
x_forwarded_for = NULL
");
}
private function anonymiseAddresses(): void
{
echo "Anonymising addresses...\n";
// Order addresses
$this->pdo->exec("
UPDATE sales_order_address
SET
firstname = 'Test',
lastname = CONCAT('Address', entity_id),
email = CONCAT('addr_', entity_id, '@example.invalid'),
street = '123 Test Street',
telephone = '+48 000 000 000',
company = NULL,
vat_id = NULL
");
// Customer addresses
$this->pdo->exec("
UPDATE customer_address_entity
SET
firstname = 'Test',
lastname = CONCAT('Addr', entity_id),
street = '123 Test Street',
telephone = '+48 000 000 000',
company = NULL,
vat_id = NULL
");
}
private function anonymiseQuotes(): void
{
$this->pdo->exec("
UPDATE quote
SET
customer_email = CONCAT('quote_', entity_id, '@example.invalid'),
customer_firstname = 'Test',
customer_lastname = 'Quote',
remote_ip = '127.0.0.1'
WHERE customer_email IS NOT NULL
");
}
private function clearSessions(): void
{
$this->pdo->exec("TRUNCATE TABLE session");
}
private function clearEmails(): void
{
// Email logs contain personal data
$this->pdo->exec("TRUNCATE TABLE email_log");
}
}
$pdo = new \PDO('mysql:host=localhost;dbname=magento_dev', 'magento', 'magento');
(new MagentoAnonymiser($pdo))->anonymise();
DDEV hooks – automate the full pipeline
# .ddev/config.yaml - hooks for automated dev DB refresh
hooks:
post-start:
- exec-host: "echo 'Run: ddev refresh-db to sync from production'"
# Custom command: ddev refresh-db
# Place this in .ddev/commands/host/refresh-db
#!/bin/bash
# .ddev/commands/host/refresh-db
# Usage: ddev refresh-db
set -euo pipefail
DUMP_DIR="/tmp/magento-prod-dump"
DATE=$(date +%Y%m%d)
echo "=== Refreshing dev database from production - ${DATE} ==="
# 1. Dump from production (requires SSH access or VPN)
echo "Dumping production database..."
ssh deploy@prod.example.com \
"mydumper -u backup -p\$(cat /etc/magento-db-pass) -B magento -o /tmp/dump-${DATE} --compress -t 8 && tar czf /tmp/dump-${DATE}.tar.gz -C /tmp dump-${DATE} && rm -rf /tmp/dump-${DATE}"
echo "Downloading dump..."
scp deploy@prod.example.com:/tmp/dump-${DATE}.tar.gz /tmp/
ssh deploy@prod.example.com "rm /tmp/dump-${DATE}.tar.gz"
# 2. Extract
mkdir -p ${DUMP_DIR}
tar xzf /tmp/dump-${DATE}.tar.gz -C ${DUMP_DIR}
# 3. Restore into DDEV
echo "Restoring into DDEV..."
ddev exec "myloader -u db -p db -B db -d /tmp/dump-dir -t 4 --overwrite-tables"
# 4. Anonymise
echo "Anonymising personal data (GDPR)..."
ddev exec "php /var/www/html/bin/anonymise.php"
# 5. Adjust for dev environment
echo "Configuring for development..."
ddev exec "bin/magento config:set web/secure/base_url https://\${DDEV_SITENAME}.ddev.site/"
ddev exec "bin/magento config:set web/unsecure/base_url http://\${DDEV_SITENAME}.ddev.site/"
ddev exec "bin/magento config:set catalog/search/engine opensearch"
ddev exec "bin/magento config:set catalog/search/opensearch_server_hostname opensearch"
ddev exec "bin/magento cache:flush"
ddev exec "bin/magento indexer:reindex"
echo "=== Database refresh complete ==="
Summary
A proper prod-to-dev migration pipeline has three mandatory steps: fast parallel dump (mydumper), GDPR-compliant anonymisation, and environment reconfiguration. Automating all three in a DDEV hook means the whole team runs one command and gets a fresh, compliant development database in under 10 minutes. Never give a developer access to unmasked production data – the anonymisation script is the technical enforcement of that policy.
