#!/usr/bin/env python2.4 # svn-fast-backup: use rsync snapshots for very fast FSFS repository backup. # Multiple FSFS backups share data via hardlinks, meaning old backups are # almost free, since a newer revision of a repository is almost a complete # superset of an older revision. # This is good for replacing incremental log-dump+restore-style backups # because it is just as space-conserving and even faster; there is no # inter-backup state (old backups are essentially caches); each backup # directory is self-contained. It keeps the same interface as svn-hot-backup # (if you use --force), but only works for FSFS repositories. # Author: Karl Chen ## quarl 2005-08-17 initial version ## quarl 2005-09-01 refactor, documentation; new options: --force, --keep, ## --simulate, --trace # $HeadURL$ # $LastChangedRevision$ # $LastChangedDate$ # $LastChangedBy$ # Originally based on svn-hot-backup.py, whose copyright notice states: # ==================================================================== # Copyright (c) 2000-2004 CollabNet. All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://subversion.tigris.org/license-1.html. # If newer versions of this license are posted there, you may use a # newer version instead, at your option. # # This software consists of voluntary contributions made by many # individuals. For exact contribution history, see the revision # history and logs, available at http://subversion.tigris.org/. # ==================================================================== ###################################################################### import sys, os, re import getopt import subprocess # python2.4 ###################################################################### # Global Settings svnlook = "svnlook" # Path to svnlook svnadmin = "svnadmin" # Path to svnadmin rsync = "rsync" # Path to rsync ###################################################################### # Command line arguments def usage(): raise SystemExit("""Syntax: %s [OPTIONS] repos_path backup_dir Makes a hot backup of a Subversion FSFS repository at REPOS_PATH to BACKUP_DIR/repos-rev. If a previous version exists, make hard links of its files using rsync. As multiple FSFS backups share data via hardlinks, old backups use almost no space, since a newer revision of a repository is almost a complete superset of an older revision (excluding direct repository modifications). Keeps up to N backups and deletes the rest. (N includes the current backup.) OPTIONS: -h, --help This screen -q, --quiet Quieter than usual -k, --keep=N Keep N backups instead of 64 -k, --keep=all Keep all backups (never delete any) -f, --force Make a new backup even if one with current revision exists -t, --trace Show actions -s, --simulate Don't perform actions """ %sys.argv[0]) class Options: pass def default_options(): options = Options() options.force = False options.trace = False options.simulate = False options.quiet = False options.keep = 64 # Number of backups to keep around return options def parse_commandline(): options = default_options() try: opts, args = getopt.getopt(sys.argv[1:], 'qhk:fts', ['quiet', 'help', 'keep=', 'force', 'trace', 'simulate']) except getopt.GetoptError, e: print >>sys.stderr, "Error:", e usage() for (o,a) in opts: if o == '-h' or o == '--help': usage() elif o == '-q' or o == '--quiet': options.quiet = True elif o == '-f' or o == '--force': options.force = True elif o == '-t' or o == '--trace': options.trace = True elif o == '-s' or o == '--simulate': options.simulate = True elif o == '-k' or o == '--keep': if a.strip().lower() == 'all': options.keep = 0 else: options.keep = int(a) else: raise Exception("Internal error") if len(args) != 2: usage() # Path to repository options.repo_dir = args[0] # Where to store the repository backup. The backup will be placed in a # *subdirectory* of this location, named after the youngest revision. options.backup_dir = os.path.abspath(args[1]) options.repo = os.path.basename(os.path.abspath(options.repo_dir)) return options def comparator(a, b): # We pass in filenames so there is never a case where they are equal. regexp = re.compile("-(?P[0-9]+)(-(?P[0-9]+))?$") matcha = regexp.search(a) matchb = regexp.search(b) reva = int(matcha.groupdict()['revision']) revb = int(matchb.groupdict()['revision']) if (reva < revb): return -1 elif (reva > revb): return 1 else: inca = matcha.groupdict()['increment'] incb = matchb.groupdict()['increment'] if not inca: return -1 elif not incb: return 1; elif (int(inca) < int(incb)): return -1 else: return 1 def pipe(command): return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0].strip() def readfile(filename): try: return open(filename).read().strip() except: return '' def runcmd(cmd): if options.trace: print >>sys.stderr, '#', cmd if options.simulate: return 0 return subprocess.call(cmd) def deltree(path): runcmd(['rm', '-r', path]) def get_youngest_revision(): if readfile(os.path.join('db', 'fs-type')) != 'fsfs': raise SystemExit("Path '%s' doesn't contain a FSFS repository"%options.repo_dir) return pipe([svnlook,"youngest","."]) def list_repo_backups(): '''Return a sorted list of backups for this repository.''' regexp = re.compile(options.repo + "-[0-9]+(-[0-9]+)?$") directory_list = [x for x in os.listdir(options.backup_dir) if regexp.match(x)] directory_list.sort(comparator) return directory_list def delete_old_backups(): if options.keep <= 0: return for item in list_repo_backups()[:-options.keep]: old_backup_subdir = os.path.join(options.backup_dir, item) print " Removing old backup: ", old_backup_subdir deltree(old_backup_subdir) def find_next_backup_name(youngest): # If there is already a backup of this revision, then append the next # highest increment to the path. We still need to do a backup because the # repository might have changed despite no new revision having been # created. We find the highest increment and add one rather than start # from 1 and increment because the starting increments may have already # been removed due to options.keep. regexp = re.compile(options.repo + "-" + youngest + "(-(?P[0-9]+))?$") directory_list = os.listdir(options.backup_dir) young_list = [ x for x in directory_list if regexp.match(x) ] young_list.sort(comparator) if not young_list: return "%s-%s" %(options.repo, youngest) # Backups for this revision exist already. if not options.force: if not options.quiet: print "Backup already exists at",young_list[-1] raise SystemExit increment = int(regexp.match(young_list[-1]).groupdict()['increment'] or '0') return "%s-%s-%d" %(options.repo, youngest, increment+1) def do_rsync_backup(): youngest = get_youngest_revision() if not options.quiet: print "Beginning hot backup of '%s' (youngest revision is %s)..." %(options.repo, youngest), backup_subdir = os.path.join(options.backup_dir, find_next_backup_name(youngest)) backup_tmpdir = backup_subdir + '.tmp' if os.path.exists(backup_tmpdir): raise SystemExit("%s: Backup in progress? '%s' exists -- aborting."%(sys.argv[0],backup_tmpdir)) if not options.simulate: os.mkdir(backup_tmpdir) # ensures atomicity if os.path.exists(backup_subdir): # Check again after doing mkdir (which serves as a mutex acquire) -- # just in case another process just finished the same backup. if not options.quiet: print "Backup already exists at",backup_subdir raise SystemExit previous_backups = list_repo_backups() ### Use rsync to make a copy. # We need to copy the 'current' file first. # Don't copy the transactions/ directory. # See http://svn.apache.org/repos/asf/subversion/trunk/notes/fsfs rsync_dest = os.path.join(backup_tmpdir,'') # copy db/current. -R tells rsync to use relative pathnames. if runcmd([rsync, '-aR', 'db/current', rsync_dest]): raise "%s: rsync failed" %sys.argv[0] # Now copy everything else. cmd = [rsync, '-a', '--exclude', 'db/current', '--exclude', 'db/transactions/*', '--exclude', 'db/log.*', '.', rsync_dest] # If there's a previous backup, make hard links against the latest. if previous_backups: cmd += ['--link-dest', os.path.join(options.backup_dir, previous_backups[-1])] if runcmd(cmd): raise "%s: rsync failed" %sys.argv[0] # Rename to final name. if not options.simulate: os.rename(backup_tmpdir, backup_subdir) print "Finished backup to", backup_subdir options = parse_commandline() os.chdir(options.repo_dir) do_rsync_backup() delete_old_backups()