#! /usr/bin/env python2.4 ## vim: fileencoding=utf-8 # # Copyright (c) 2006 Adeodato Simó (dato@net.com.org.es) # Licensed under the terms of the MIT license, included below. #{{{ __doc__ """\ deb2bzr - import versions of a Debian source package into a bzr branch deb2bzr is a tool to create a Debian packaging bzr branch from a sequence of versions of a given Debian source package. This can be useful, for example, if you'd like to switch maintenance of some package to bzr, and would like to have the complete history available. deb2bzr imports to the branch only the Debian packaging bits, defined as: * every file under debian/, independently if present in the original tarball or not * for each particular version, each file not under debian/ that is modified in the .diff.gz In its simplest invocation form, deb2bzr accepts one or more .dsc files as arguments. It then tries to find an existing branch where to import them, and creates one if it fails: # create branch ./sl containing history for sl 3.03-12 and 3.03-13 % deb2bzr sl_3.03-12.dsc sl_3.03-13.dsc # program can detect branch moved to ./sl-bzr, and imports sl 3.03-14 % mv sl sl-bzr % deb2bzr sl_3.03-14.dsc The --root option can be given to specify where to create and look for branches; it defaults to the current directory. If the program logic to detect which branch should get updated is unsatisfactory, the --branch option may be used: # sl-bzr is a backup copy, force sl-bzr-work to be used % deb2bzr --branch sl-bzr-work sl_3.03-14.dsc The second mode of operation allows to update or create several branches at once: a number of source package names are given as arguments, and for each of them, the corresponding branch under "--root dir" is created or updated. In this operation mode, new versions can be obtained in two different ways: from a directory containing the dsc files, specified with --dscdir; or, when --update is specified, directly from a mirror (snapshot.debian.net being the default). So, both creating and updating take the same form: # bootstrap new branches for libtorrent and rtorrent from s.d.n % deb2bzr --update libtorrent rtorrent # (some other day) check for new versions and import them % deb2bzr --update libtorrent rtorrent The list of source packages is optional in this second mode of operation. If no arguments are given, deb2bzr updates all branches in "rootdir" for --update, and updates or creates branches for all packages with a dsc file present in the specified dir for --dscdir. deb2bzr always produces identical branches when run over the same set of versions of a source package, and tries to produce compatible branches for the same source package when some versions were missing and not imported. (WORK IN PROGRESS) """ #}}} #{{{ imports import errno import os import os.path as op import re import sha import shutil import subprocess import sys from calendar import timegm from datetime import datetime from glob import glob from operator import itemgetter from optparse import OptionParser from tempfile import mkdtemp from time import strptime # ITP: http://bugs.debian.org/380173 import deb822 # svn://svn.debian.org/secure-testing/lib/python/debian_support.py from debian_support import Version from bzrlib.bzrdir import BzrDir from bzrlib.osutils import compact_date from bzrlib.workingtree import WorkingTree try: import chardet # apt-get install python-chardet __have_chardet = True except ImportError: __have_chardet = False #}}} PROGRAM = 'deb2bzr' VERSION = '__VERSION__' ID_PREFIX = 'deb2bzr-1' VERBOSE_LEVEL = os.environ.get('DEB2BZR_VERBOSE', None) # 2006-08-04 # TODO we have to deal with missing parents; scenario: branch at 0.8.2-3, an # import of 0.8.2-5 is requested, but in the 0.8.2-5 changelog, a 0.8.2-4 # version is mentioned: what to do? # # For now, --force will be required to do such imports. Then, seems like # j-a-meinel talked about ghost revisions being a possibility here. # 2006-08-04 # TODO normally, debian changelogs are linear, and uploads to e.g. security or # backports are not merged back. however, what if they are? (e.g. for # experimental is not so uncommon, I believe) -> maybe support for # branching would be of use? #{{{ main def main(): options, args = parse_options() global VERBOSE_LEVEL if VERBOSE_LEVEL is None: VERBOSE_LEVEL = options.verbose sources = None # find out the list of sources if args and (options.update or options.dscdir): sources = set(args) elif not options.update: # list of sources to be derived from list of dsc files if options.dscdir: dsclist = glob(options.dscdir + '/*.dsc') else: dsclist = set(args) dscmap = dsc_by_source(dsclist) sources = dscmap.keys() if options.branch is None: branches = get_branches(options.rootdir, sources) else: if len(sources) > 1: err("can't use --branch if there is more than one source") sys.exit(1) else: source, version = source_version(options.branch) branches = { source: Branch(source, version, options.branch) } if not branches: err('found no branches to act on!') sys.exit(1) # dsclist is remote for --update if options.update: dscmap = {} for src in branches.keys(): dsclist = list_remote_dsc(src, options.mirror) # XXX sort dscmap[src] = dsclist for src, dsclist in dscmap.items(): b = branches[src] for version, dsc in dsclist: dname = op.basename(dsc) if version > b.version: info('updating %s with %s' % (b.branch, dname)) update_branch(b, dsc, force=options.force) # TODO implement options.backup if dsc == dsclist[-1][1]: break # make the else branch of the loop work else: debug('skipping already seen %s' % dname) else: warn('no newer versions found for %s' % src) #}}} #{{{ list_remote_dsc def list_remote_dsc(srcpkg, mirror): raise NotImplementedError('Sorry mate.') #}}} #{{{ Branch class Branch(object): """Class to pack a (source, version, branch) tuple together.""" def __init__(self, source, version, branch_dir): self.source = source self.version = Version(version) self.branch = branch_dir def __str__(self): return '%s (%s) [%s]' % (self.source, self.version, self.branch) def __repr__(self): return 'Branch(%s, %s, %s)' % (self.source, self.version, self.branch) #}}} #{{{ get_*_branches, likely_branch, source_version, dsc_by_source def get_branches(rootdir, srcpkgs=None): """Return a mapping { source: Branch() } for all sources in srcpkgs. If srcpkgs is None, return all branches in rootdir.""" if srcpkgs is None: return get_all_branches(rootdir, do_debug=True) # TODO do that silly optimization here? branches = {} all_branches = get_all_branches(rootdir) for source in srcpkgs: if all_branches.has_key(source): branches[source] = all_branches[source] debug('adding branch ' + str(branches[source]), 3) continue # branch does not exist, ensure we have a directory for it branch = op.join(rootdir, source) try: os.makedirs(branch) info('could not find branch for source %s, created one in %s' % (source, branch)) except OSError, e: if e.errno != errno.EEXIST: raise else: nbranch = mkdtemp(prefix=source + '.', dir=rootdir) warn('%s exists, branch for source %s created in %s instead' % (branch, source, nbranch)) branch = nbranch BzrDir.create_branch_convenience(branch) branches[source] = Branch(source, '~', branch) return branches def get_all_branches(rootdir, do_debug=False): """Return a mapping { source: Branch() } for all branches in rootdir.""" all_branches = [ d for d in glob(rootdir + '/*') if likely_branch(d) ] branches = {} for b in all_branches: source, version = source_version(b) if branches.has_key(source): warn('found duplicate branch for source %s: %s ' '(already had %s)' % (source, b, branches[source].branch)) else: branches[source] = Branch(source, version, b) if do_debug: debug('adding branch ' + str(branches[source]), 3) return branches def likely_branch(d): """Test whether a directory is likely to be a Debian packaging branch.""" return (op.isdir(d) and op.isdir(op.join(d, '.bzr')) and op.exists(op.join(d, 'debian', 'changelog'))) def source_version(directory): """Return the source and newest version of unpacked source "directory".""" f = file(op.join(directory, 'debian', 'changelog')) r = re.compile(r'^(?P\S+) \((?P[^)]+)\)') l = f.readline() m = r.match(l) f.close() if not m: raise ValueError('bad changelog line: '+l) return m.group('source'), m.group('version') def dsc_by_source(dsclist): """Returns a mapping { src: [ (Ver1, dsc1), (Ver2, dsc2) ] }, classifying dsclist per source package and increasingly sorted by version.""" dsc_map = {} for dsc in dsclist: source, version = op.basename(dsc).rstrip('.dsc').split('_', 1) dsc_map.setdefault(source, []).append((Version(version), dsc)) for dsclist in dsc_map.values(): dsclist.sort(key=itemgetter(0)) return dsc_map #}}} #{{{ update_branch, prepare_new_branch, update_workingtree, path2id, compute_revinfo def update_branch(branch_obj, dsc, force=False, backup=False): nbranch = None precious = None bkup_dir = None obranch = branch_obj.branch topdir = op.dirname(obranch) basename = op.basename(obranch) try: # TODO okay, I'll reckon that all this "put files into new branch, # copy .bzr over, update, commit, replace, maybe-rollback" may be # out of place given that we're dealing with VCS stuff, and revert # and uncommit exist nbranch = mkdtemp(prefix='deb2bzr.', dir=topdir) prepare_new_branch(dsc, nbranch) shutil.copytree(op.join(obranch, '.bzr'), op.join(nbranch, '.bzr')) from_version = str(branch_obj.version) versions = newer_versions(nbranch, from_version) if len(versions) > 1: log_func = not force and err or warn log_func('missing versions between %s and %s: %s' % (from_version, versions[-1], ' '.join(versions[0:-1]))) if not force: log_func('pass --force to allow importing with missing middle versions') sys.exit(1) else: log_func('importing anyway because --force was given') ntree = update_workingtree(nbranch, branch_obj.source) # XXX srcname can change command = split_cmd('dpkg-parsechangelog -v%s' % from_version) pipe_cmd = subprocess.Popen(command, cwd=nbranch, stdout=subprocess.PIPE) pipe_cmd.wait() changelog = deb822.deb822(pipe_cmd.stdout) parsed_dsc = deb822.dsc(file(dsc)) revinfo = compute_revinfo(changelog, parsed_dsc) ntree.commit(**revinfo) # if we arrived here, carefully replace the old branch with the new one # XXX mmm, maybe don't change the dir inode each time, dud. bkup_dir = mkdtemp(prefix='%s.deb2bzr-saved-' % basename, dir=topdir) precious = op.join(bkup_dir, basename) os.rename(obranch, precious) os.rename(nbranch, obranch) if backup: info('keeping old branch in %s' % precious) precious = None else: precious = None shutil.rmtree(bkup_dir) branch_obj.version = Version(versions[-1]) finally: if precious is not None: try: warn('seems something went wrong, leaving old branch untouched') os.rename(precious, obranch) os.rmdir(bkup_dir) except: err('aiee, error while trying to restore old branch from %s' % precious) if nbranch and op.exists(nbranch): shutil.rmtree(nbranch) def prepare_new_branch(dsc, tmpdir): """Puts in tmpdir the files that should be imported into the branch.""" unpacked = None try: unpacked = op.join(tmpdir, 'deb2bzr-unpacked') command = split_cmd('dpkg-source -q -sn -x', dsc, unpacked) retcode = subprocess.call(command, stdout=subprocess.PIPE) # PIPE 'coz -q is noisy :-/ if retcode != 0: raise RuntimeError('dpkg-source exited with status %d!' % retcode) copy_files = [ 'debian' ] parsed_dsc = deb822.dsc(file(dsc)) # XXX Insert comment here about how this is dpkg-source v1 specific for f in parsed_dsc['Files']: if re.search(r'\.diff\.gz$', f['name']): diffgz = op.join(op.dirname(dsc), f['name']) modified = nondebian_modified(diffgz) copy_files.extend(modified) for f in copy_files: prefix = os.path.dirname(f) if prefix: prefix = op.join(tmpdir, prefix) if not op.exists(prefix): os.makedirs(prefix) src = op.join(unpacked, f) dst = op.join(tmpdir, f) os.rename(src, dst) finally: if unpacked and op.exists(unpacked): shutil.rmtree(unpacked) def update_workingtree(branch, srcname): """Walks a working tree and adds unknown files to the inventory.""" files = [] ids = [] dirs = [] tree = WorkingTree.open(branch) tree.lock_read() for (path, class_, kind, id_, entry) in tree.list_files(): if id_ is not None: continue # debug('not adding already versioned %s' % path, 4) # TODO BzrCheckError: Internal check failed: file u'/tmp/foo/x' entered as kind 'file' id 'x-20060806122140-73cs4pd44e74wzuy-1', now of kind 'directory' else: files.append(path) ids.append(path2id(op.join(srcname, path))) debug('adding new entry %s' % path, 4) if kind == 'directory': dirs.append(path) debug('queueing %s for recursion' % path, 4) tree.unlock() branch_len = len(branch.rstrip('/')) while dirs: d = dirs[0] del dirs[0] for root, w_dirs, w_files in os.walk(op.join(branch, d)): root = root[branch_len+1:] for f in w_files + w_dirs: path = op.join(root, f) files.append(path) ids.append(path2id(op.join(srcname, path))) debug('adding new entry %s' % path, 4) tree.add(files, ids) return tree def path2id(path): """Convert a path to a safe path_id representation.""" safe_path = re.sub(r'[^-_./\w]', '', path.lower()) if path != safe_path: # some surgery happened, so append a sha1 sum safe_path += '-' + sha.new(path).hexdigest() return ID_PREFIX + ':' + safe_path.replace('_', '__').replace('/', '_') def compute_revinfo(changelog, parsed_dsc): """Computes a revinfo with deterministic rev-id.""" revinfo = {} email_re = re.compile(r'<(?P.+)>') cont_line = re.compile(r'^ (\.$)?', re.M) tz_offset = int(changelog['Date'][-5:]) tz_seconds = (abs(tz_offset) / 100 * 60 + abs(tz_offset) % 100) * 60 tz_seconds *= (tz_offset < 0) and -1 or 1 parsed_date = strptime(changelog['Date'][:-6], "%a, %d %b %Y %H:%M:%S") date = datetime(*(parsed_date[:6] + (0, FixedOffset(tz_seconds/60)))) email = email_re.search(changelog['Maintainer']).group('email') ### revinfo['message'] = cont_line.sub('', changelog['Changes']).strip() revinfo['timezone'] = date.utcoffset().seconds + date.utcoffset().days * 24 * 3600 revinfo['timestamp'] = timegm(date.utctimetuple()) revinfo['committer'] = changelog['Maintainer'] revinfo['revprops'] = { 'branch-nick': '%s/debian' % changelog['Source'] } for x in ('message', 'committer'): try: revinfo[x] = unicode(revinfo[x], 'ascii') except UnicodeDecodeError: revinfo[x] = to_unicode(revinfo[x]) ### date_compact = compact_date(revinfo['timestamp']) md5sum_concat = ''.join(sorted(x['md5sum'] for x in parsed_dsc['Files'])) unique_hash = sha.new(md5sum_concat).hexdigest() revinfo['rev_id'] = '%s:%s-%s-%s' % (ID_PREFIX, email, date_compact, unique_hash) return revinfo #}}} #{{{ parse_options def parse_options(): """Option parser for deb2bzr.""" p = OptionParser(version='%%prog %s' % str(VERSION), usage='%prog [options] [ dsc | srcpkg ] ...') p.add_option('-q', '--quiet', action='store_const', dest='verbose', const=-1, help='only emit output if an error occurs') p.add_option('-v', '--verbose', action='count', dest='verbose', help='extra verbosity, can be specified more than once') p.add_option('-b', '--branch', dest='branch', metavar='BRANCH', help='in single-source mode, use BRANCH instead of auto-detected') p.add_option('-r', '--root', dest='rootdir', metavar='DIR', help='directory to place created branches in (default: $PWD)') p.add_option('-d', '--dscdir', dest='dscdir', metavar='DIR', help='inspect DIR for source packages, and import all unseen versions') p.add_option('-u', '--update', action='store_true', dest='update', help='update from mirror all known (or cmdline-specified) branches') p.add_option('-m', '--mirror', dest='mirror', help='mirror to get new versions from (default: %default)') p.add_option('-f', '--force', action='store_true', dest='force', help='force potentially dangerous actions to be taken') p.set_defaults(verbose=0, rootdir=os.getcwd(), mirror='snapshot.debian.net') options, args = p.parse_args() if not (args or options.dscdir or options.update): p.error('one of --update or --dscdir is needed if there are no arguments') return options, args #}}} #{{{ newer_versions, nondebian_modified, split_cmd, to_unicode def newer_versions(srcdir, from_version): clog = file(op.join(srcdir, 'debian/changelog')) newer = [] version_line_re = re.compile(r'^\S+ \((?P[^)]+)\)') for line in clog: m = version_line_re.search(line) if not m: continue version = m.group('version') if version == from_version: break else: newer[0:0] = [ version ] return newer def nondebian_modified(diffgz): """Return a list of files not under debian/ modified by the given .diff.gz. Paths given relative to the unpacked directory, as in patch -p0.""" # XXX We ain't no shell script, darling. command = split_cmd('lsdiff -z -p1 -x debian/* -x .bzr/* --strip=1', diffgz) p = subprocess.Popen(command, stdout=subprocess.PIPE) retcode = p.wait() if retcode != 0: raise RuntimeError('lsdiff exited with status %d!' % retcode) return [ l.rstrip('\r\n') for l in p.stdout.readlines() ] def split_cmd(command, *args): """Returns: commands.split() + args""" command = command.split() command.extend(args) return command def to_unicode(string, default='iso-8859-1'): charsets = [ 'utf-8' ] if __have_chardet: charset = chardet.detect(string)['encoding'].lower() if charset not in charsets: charsets.append(charset) charsets.append(default) for charset in charsets: try: unicode_ = unicode(string, charset) return unicode_ except UnicodeDecodeError, e: pass else: raise e #}}} #{{{ msg_factory; debug, info, warn, err def msg_factory(prefix, default_level): """Return a function that logs only if its level arg is <= VERBOSE_LEVEL.""" def function(msg, level=default_level): if level <= VERBOSE_LEVEL: sys.stderr.write('%s: %s\n' % (prefix, msg.rstrip('\r\n'))) return function debug = msg_factory('D', 2) info = msg_factory('I', 1) warn = msg_factory('W', 0) err = msg_factory('E', -1) #}}} #{{{ FixedOffset # Taken from http://docs.python.org/lib/tzinfo-examples.txt from datetime import tzinfo, timedelta, datetime # A class building tzinfo objects for fixed-offset time zones. class FixedOffset(tzinfo): """Fixed offset in minutes east from UTC.""" def __init__(self, offset, name=None): self.__offset = timedelta(minutes = offset) self.__name = name def utcoffset(self, dt): return self.__offset def tzname(self, dt): return self.__name def dst(self, dt): return timedelta(0) #}}} if __name__ == '__main__': main() ### #{{{ MIT LICENSE # (http://www.opensource.org/licenses/mit-license.php) # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #}}}