#!/usr/bin/env python # file backup.py # author Florent Guiotte # version 0.0 # date 10 août 2024 """Abstract doc. """ import yaml from pathlib import Path import subprocess from datetime import datetime COMPOSE_PATH = Path('docker-compose.yml') BACKUP_VOL_PATH = Path('./backup/docker_named_volumes') BACKUP_SQL_PATH = Path('./backup/docker_sql_dumps') VOLUME_PREFIX = 'docker_' class UnionFind: def __init__(self): self.parent = {} def make(self, service): self.parent.setdefault(service, service) def find(self, service): """return root""" if self.parent[service] != service: self.parent[service] = self.find(self.parent[service]) return self.parent[service] def union(self, service1, service2): root1 = self.find(service1) root2 = self.find(service2) if root1 != root2: self.parent[root2] = root1 # compress! def build_services_graph(services): uf = UnionFind() for service in services: uf.make(service) for dependency in services[service].get('depends_on', []): uf.make(dependency) uf.union(service, dependency) return uf def group_services(services, graph): grouped_services = {} for service in services: # Do not include if SQL if is_sql(services[service]): continue root = graph.find(service) if root not in grouped_services: grouped_services[root] = {'services': []} grouped_services[root]['services'].append(service) return grouped_services def group_volumes(services, volumes, services_group): for group_name, group in services_group.items(): group_volumes = group.setdefault('volumes', []) for service in group['services']: for volume in [v.split(':')[0] for v in services[service].get('volumes', [])]: if volume in volumes: group_volumes += [volume] return services_group def is_sql(service): return 'postgres' in service['image'] def get_date_string(): current_date = datetime.now() return current_date.strftime("%Y-%m-%d") def backup_named_volume(volume): date_string = get_date_string() archive_name = f'{date_string}_{volume}.tar' print(f'backup volume {volume} to {BACKUP_VOL_PATH}/{archive_name}') subprocess.run(f'docker run --rm --volume {VOLUME_PREFIX}{volume}:/data --volume {BACKUP_VOL_PATH.resolve()}:/bkp ubuntu tar -cf /bkp/{archive_name} -C /data .'.split()) def run_docker_compose(cmd): subprocess.run(f'docker compose {cmd}'.split()) def backup_named_volumes(services, volumes): services_graph = build_services_graph(services) services_group = group_services(services, services_graph) services_group = group_volumes(services, volumes, services_group) for group_name, group in services_group.items(): print(f'Service group {group_name} ', end='') if not group['volumes']: print('no volumes') continue print(f'{group["volumes"]}: run backup...') run_docker_compose(f'stop {" ".join(group["services"])}') for volume in group['volumes']: backup_named_volume(volume) run_docker_compose(f'start {" ".join(group["services"])}') def backup_sql_dumps(services, volumes): for service_name, service in services.items(): if not is_sql(service): continue #print(f'Service {service_name} run sql dump...') backup_sql_dump(service_name, service) def backup_sql_dump(service_name, service): date_string = get_date_string() dump_name = f'{date_string}_{service_name}.sql' dump_path = BACKUP_SQL_PATH / dump_name dump_path.parent.mkdir(exist_ok=True) user = service['environment']['POSTGRES_USER'] db = service['environment']['POSTGRES_DB'] print(f'Service {service_name} dump database to {dump_path}') with dump_path.open('wb') as f: subprocess.run(f'docker compose exec -T {service_name} pg_dump -U {user} {db}'.split(), check=True, stdout=f) if __name__ == '__main__': with COMPOSE_PATH.open() as cf: compose = yaml.safe_load(cf) services = compose['services'] volumes = compose['volumes'] backup_named_volumes(services, volumes) backup_sql_dumps(services, volumes)