docker/backup.py

152 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python
# file backup.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 10 août 2024
"""Abstract
doc.
"""
import yaml
from pathlib import Path
import subprocess
from datetime import datetime
COMPOSE_PATH = Path('docker-compose.yml')
BACKUP_VOL_PATH = Path('./backup/docker_named_volumes')
BACKUP_SQL_PATH = Path('./backup/docker_sql_dumps')
VOLUME_PREFIX = 'docker_'
class UnionFind:
def __init__(self):
self.parent = {}
def make(self, service):
self.parent.setdefault(service, service)
def find(self, service):
"""return root"""
if self.parent[service] != service:
self.parent[service] = self.find(self.parent[service])
return self.parent[service]
def union(self, service1, service2):
root1 = self.find(service1)
root2 = self.find(service2)
if root1 != root2:
self.parent[root2] = root1 # compress!
def build_services_graph(services):
uf = UnionFind()
for service in services:
uf.make(service)
for dependency in services[service].get('depends_on', []):
uf.make(dependency)
uf.union(service, dependency)
return uf
def group_services(services, graph):
grouped_services = {}
for service in services:
# Do not include if SQL
if is_sql(services[service]):
continue
root = graph.find(service)
if root not in grouped_services:
grouped_services[root] = {'services': []}
grouped_services[root]['services'].append(service)
return grouped_services
def group_volumes(services, volumes, services_group):
for group_name, group in services_group.items():
group_volumes = group.setdefault('volumes', [])
for service in group['services']:
for volume in [v.split(':')[0] for v in services[service].get('volumes', [])]:
if volume in volumes: group_volumes += [volume]
return services_group
def is_sql(service):
return 'postgres' in service['image']
def get_date_string():
current_date = datetime.now()
return current_date.strftime("%Y-%m-%d")
def backup_named_volume(volume):
date_string = get_date_string()
archive_name = f'{date_string}_{volume}.tar'
print(f'backup volume {volume} to {BACKUP_VOL_PATH}/{archive_name}')
subprocess.run(f'docker run --rm --volume {VOLUME_PREFIX}{volume}:/data --volume {BACKUP_VOL_PATH.resolve()}:/bkp ubuntu tar -cf /bkp/{archive_name} -C /data .'.split())
def run_docker_compose(cmd): subprocess.run(f'docker compose {cmd}'.split())
def backup_named_volumes(services, volumes):
services_graph = build_services_graph(services)
services_group = group_services(services, services_graph)
services_group = group_volumes(services, volumes, services_group)
for group_name, group in services_group.items():
print(f'Service group {group_name} ', end='')
if not group['volumes']:
print('no volumes')
continue
print(f'{group["volumes"]}: run backup...')
run_docker_compose(f'stop {" ".join(group["services"])}')
for volume in group['volumes']:
backup_named_volume(volume)
run_docker_compose(f'start {" ".join(group["services"])}')
def backup_sql_dumps(services, volumes):
for service_name, service in services.items():
if not is_sql(service):
continue
#print(f'Service {service_name} run sql dump...')
backup_sql_dump(service_name, service)
def backup_sql_dump(service_name, service):
date_string = get_date_string()
dump_name = f'{date_string}_{service_name}.sql'
dump_path = BACKUP_SQL_PATH / dump_name
dump_path.parent.mkdir(exist_ok=True)
user = service['environment']['POSTGRES_USER']
db = service['environment']['POSTGRES_DB']
print(f'Service {service_name} dump database to {dump_path}')
with dump_path.open('wb') as f:
subprocess.run(f'docker compose exec -T {service_name} pg_dump -U {user} {db}'.split(),
check=True,
stdout=f)
if __name__ == '__main__':
with COMPOSE_PATH.open() as cf:
compose = yaml.safe_load(cf)
services = compose['services']
volumes = compose['volumes']
backup_named_volumes(services, volumes)
backup_sql_dumps(services, volumes)