git_remote_helpers: add fastimport library

This commit is contained in:
Sverre Rabbelier
2010-08-22 01:22:14 -05:00
committed by Pat Thoyts
parent a2f32259dd
commit 6bbd536598
10 changed files with 1775 additions and 1 deletions

View File

@@ -0,0 +1,469 @@
# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""Import command classes."""
import os
# There is a bug in git 1.5.4.3 and older by which unquoting a string consumes
# one extra character. Set this variable to True to work-around it. It only
# happens when renaming a file whose name contains spaces and/or quotes, and
# the symptom is:
# % git-fast-import
# fatal: Missing space after source: R "file 1.txt" file 2.txt
# http://git.kernel.org/?p=git/git.git;a=commit;h=c8744d6a8b27115503565041566d97c21e722584
GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE = False
# Lists of command names
COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'feature', 'progress',
'reset', 'tag']
FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename',
'filedeleteall']
# Feature names
MULTIPLE_AUTHORS_FEATURE = "multiple-authors"
COMMIT_PROPERTIES_FEATURE = "commit-properties"
EMPTY_DIRS_FEATURE = "empty-directories"
FEATURE_NAMES = [
MULTIPLE_AUTHORS_FEATURE,
COMMIT_PROPERTIES_FEATURE,
EMPTY_DIRS_FEATURE,
]
# for classes with no meaningful __str__()
def _simplerepr(self):
return "<%s at 0x%x>" % (self.__class__.__name__, id(self))
# classes that define __str__() should use this instead
def _detailrepr(self):
return ("<%s at 0x%x: %s>"
% (self.__class__.__name__, id(self), str(self)))
class ImportCommand(object):
"""Base class for import commands."""
def __init__(self, name):
self.name = name
# List of field names not to display
self._binary = []
__repr__ = _simplerepr
def format(self):
"""Format this command as a fastimport dump fragment.
Returns a (possibly multiline) string that, if seen in a
fastimport stream, would parse to an equivalent command object.
"""
raise NotImplementedError("abstract method")
def dump_str(self, names=None, child_lists=None, verbose=False):
"""Dump fields as a string.
:param names: the list of fields to include or
None for all public fields
:param child_lists: dictionary of child command names to
fields for that child command to include
:param verbose: if True, prefix each line with the command class and
display fields as a dictionary; if False, dump just the field
values with tabs between them
"""
interesting = {}
if names is None:
fields = [k for k in self.__dict__.keys() if not k.startswith('_')]
else:
fields = names
for field in fields:
value = self.__dict__.get(field)
if field in self._binary and value is not None:
value = '(...)'
interesting[field] = value
if verbose:
return "%s: %s" % (self.__class__.__name__, interesting)
else:
return "\t".join([repr(interesting[k]) for k in fields])
class _MarkMixin(object):
"""mixin for fastimport commands with a mark: blob, commit."""
def __init__(self, mark, location):
self.mark= mark
self.location = location
# Provide a unique id in case the mark is missing
if mark is None:
self.id = '%s@%d' % (os.path.basename(location[0]), location[1])
else:
self.id = ':%s' % mark
def __str__(self):
return self.id
__repr__ = _detailrepr
class BlobCommand(ImportCommand, _MarkMixin):
def __init__(self, mark, data, location):
ImportCommand.__init__(self, 'blob')
_MarkMixin.__init__(self, mark, location)
self.data = data
self._binary = ['data']
def format(self):
if self.mark is None:
mark_line = ""
else:
mark_line = "\nmark :%s" % self.mark
return "blob%s\ndata %d\n%s" % (mark_line, len(self.data), self.data)
class CheckpointCommand(ImportCommand):
def __init__(self):
ImportCommand.__init__(self, 'checkpoint')
def format(self):
return "checkpoint"
class CommitCommand(ImportCommand, _MarkMixin):
def __init__(self, ref, mark, author, committer, message, from_,
merges, file_cmds, location=None, more_authors=None, properties=None):
ImportCommand.__init__(self, 'commit')
_MarkMixin.__init__(self, mark, location)
self.ref = ref
self.author = author
self.committer = committer
self.message = message
self.from_ = from_
self.merges = merges
self.file_cmds = file_cmds
self.more_authors = more_authors
self.properties = properties
self._binary = ['file_cmds']
def format(self, use_features=True, include_file_contents=True):
if self.mark is None:
mark_line = ""
else:
mark_line = "\nmark :%s" % self.mark
if self.author is None:
author_section = ""
else:
author_section = "\nauthor %s" % format_who_when(self.author)
if use_features and self.more_authors:
for author in self.more_authors:
author_section += "\nauthor %s" % format_who_when(author)
committer = "committer %s" % format_who_when(self.committer)
if self.message is None:
msg_section = ""
else:
msg = self.message.encode('utf8')
msg_section = "\ndata %d\n%s" % (len(msg), msg)
if self.from_ is None:
from_line = ""
else:
from_line = "\nfrom %s" % self.from_
if self.merges is None:
merge_lines = ""
else:
merge_lines = "".join(["\nmerge %s" % (m,)
for m in self.merges])
if use_features and self.properties:
property_lines = []
for name in sorted(self.properties):
value = self.properties[name]
property_lines.append("\n" + format_property(name, value))
properties_section = "".join(property_lines)
else:
properties_section = ""
if self.file_cmds is None:
filecommands = ""
else:
if include_file_contents:
format_str = "\n%r"
else:
format_str = "\n%s"
filecommands = "".join(
["\n" + fc.format() for fc in self.file_cmds])
return "commit %s%s%s\n%s%s%s%s%s%s" % (self.ref, mark_line,
author_section, committer, msg_section, from_line, merge_lines,
properties_section, filecommands)
def dump_str(self, names=None, child_lists=None, verbose=False):
result = [ImportCommand.dump_str(self, names, verbose=verbose)]
for f in self.file_cmds:
if child_lists is None:
continue
try:
child_names = child_lists[f.name]
except KeyError:
continue
result.append("\t%s" % f.dump_str(child_names, verbose=verbose))
return '\n'.join(result)
class FeatureCommand(ImportCommand):
def __init__(self, feature_name, value=None, location=None):
ImportCommand.__init__(self, 'feature')
self.feature_name = feature_name
self.value = value
self.location = location
def format(self):
if self.value is None:
value_text = ""
else:
value_text = "=%s" % self.value
return "feature %s%s" % (self.feature_name, value_text)
class ProgressCommand(ImportCommand):
def __init__(self, message):
ImportCommand.__init__(self, 'progress')
self.message = message
def format(self):
return "progress %s" % (self.message,)
class ResetCommand(ImportCommand):
def __init__(self, ref, from_):
ImportCommand.__init__(self, 'reset')
self.ref = ref
self.from_ = from_
def format(self):
if self.from_ is None:
from_line = ""
else:
# According to git-fast-import(1), the extra LF is optional here;
# however, versions of git up to 1.5.4.3 had a bug by which the LF
# was needed. Always emit it, since it doesn't hurt and maintains
# compatibility with older versions.
# http://git.kernel.org/?p=git/git.git;a=commit;h=655e8515f279c01f525745d443f509f97cd805ab
from_line = "\nfrom %s\n" % self.from_
return "reset %s%s" % (self.ref, from_line)
class TagCommand(ImportCommand):
def __init__(self, id, from_, tagger, message):
ImportCommand.__init__(self, 'tag')
self.id = id
self.from_ = from_
self.tagger = tagger
self.message = message
def __str__(self):
return self.id
__repr__ = _detailrepr
def format(self):
if self.from_ is None:
from_line = ""
else:
from_line = "\nfrom %s" % self.from_
if self.tagger is None:
tagger_line = ""
else:
tagger_line = "\ntagger %s" % format_who_when(self.tagger)
if self.message is None:
msg_section = ""
else:
msg = self.message.encode('utf8')
msg_section = "\ndata %d\n%s" % (len(msg), msg)
return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section)
class FileCommand(ImportCommand):
"""Base class for file commands."""
pass
class FileModifyCommand(FileCommand):
def __init__(self, path, mode, dataref, data):
# Either dataref or data should be null
FileCommand.__init__(self, 'filemodify')
self.path = check_path(path)
self.mode = mode
self.dataref = dataref
self.data = data
self._binary = ['data']
def __str__(self):
return self.path
__repr__ = _detailrepr
def format(self, include_file_contents=True):
datastr = ""
if self.dataref is None:
dataref = "inline"
if include_file_contents:
datastr = "\ndata %d\n%s" % (len(self.data), self.data)
else:
dataref = "%s" % (self.dataref,)
path = format_path(self.path)
return "M %s %s %s%s" % (self.mode, dataref, path, datastr)
def is_regular(self):
"""Return true if this is a regular file (mode 644)."""
return self.mode.endswith("644")
def is_executable(self):
"""Return true if this is an executable file (mode 755)."""
return self.mode.endswith("755")
def is_symlink(self):
"""Return true if this is a symlink (mode 120000)."""
return self.mode == "120000"
def is_gitlink(self):
"""Return true if this is a gitlink (mode 160000)."""
return self.mode == "160000"
class FileDeleteCommand(FileCommand):
def __init__(self, path):
FileCommand.__init__(self, 'filedelete')
self.path = check_path(path)
def __str__(self):
return self.path
__repr__ = _detailrepr
def format(self):
return "D %s" % (format_path(self.path),)
class FileCopyCommand(FileCommand):
def __init__(self, src_path, dest_path):
FileCommand.__init__(self, 'filecopy')
self.src_path = check_path(src_path)
self.dest_path = check_path(dest_path)
def __str__(self):
return "%s -> %s" % (self.src_path, self.dest_path)
__repr__ = _detailrepr
def format(self):
return "C %s %s" % (
format_path(self.src_path, quote_spaces=True),
format_path(self.dest_path))
class FileRenameCommand(FileCommand):
def __init__(self, old_path, new_path):
FileCommand.__init__(self, 'filerename')
self.old_path = check_path(old_path)
self.new_path = check_path(new_path)
def __str__(self):
return "%s -> %s" % (self.old_path, self.new_path)
__repr__ = _detailrepr
def format(self):
return "R %s %s" % (
format_path(self.old_path, quote_spaces=True),
format_path(self.new_path))
class FileDeleteAllCommand(FileCommand):
def __init__(self):
FileCommand.__init__(self, 'filedeleteall')
def format(self):
return "deleteall"
def check_path(path):
"""Check that a path is legal.
:return: the path if all is OK
:raise ValueError: if the path is illegal
"""
if path is None or path == '':
raise ValueError("illegal path '%s'" % path)
return path
def format_path(p, quote_spaces=False):
"""Format a path in utf8, quoting it if necessary."""
if '\n' in p:
import re
p = re.sub('\n', '\\n', p)
quote = True
else:
quote = p[0] == '"' or (quote_spaces and ' ' in p)
if quote:
extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or ''
p = '"%s"%s' % (p, extra)
return p.encode('utf8')
def format_who_when(fields):
"""Format a tuple of name,email,secs-since-epoch,utc-offset-secs as a string."""
offset = fields[3]
if offset < 0:
offset_sign = '-'
offset = abs(offset)
else:
offset_sign = '+'
offset_hours = offset / 3600
offset_minutes = offset / 60 - offset_hours * 60
offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes)
name = fields[0]
if name == '':
sep = ''
else:
sep = ' '
if isinstance(name, unicode):
name = name.encode('utf8')
email = fields[1]
if isinstance(email, unicode):
email = email.encode('utf8')
result = "%s%s<%s> %d %s" % (name, sep, email, fields[2], offset_str)
return result
def format_property(name, value):
"""Format the name and value (both unicode) of a property as a string."""
utf8_name = name.encode('utf8')
if value is not None:
utf8_value = value.encode('utf8')
result = "property %s %d %s" % (utf8_name, len(utf8_value), utf8_value)
else:
result = "property %s" % (utf8_name,)
return result

View File

@@ -0,0 +1,79 @@
# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""Date parsing routines.
Each routine returns timestamp,timezone where
* timestamp is seconds since epoch
* timezone is the offset from UTC in seconds.
"""
import time
from git_remote_helpers.fastimport import errors
def parse_raw(s, lineno=0):
"""Parse a date from a raw string.
The format must be exactly "seconds-since-epoch offset-utc".
See the spec for details.
"""
timestamp_str, timezone_str = s.split(' ', 1)
timestamp = float(timestamp_str)
timezone = _parse_tz(timezone_str, lineno)
return timestamp, timezone
def _parse_tz(tz, lineno):
"""Parse a timezone specification in the [+|-]HHMM format.
:return: the timezone offset in seconds.
"""
# from git_repository.py in bzr-git
if len(tz) != 5:
raise errors.InvalidTimezone(lineno, tz)
sign = {'+': +1, '-': -1}[tz[0]]
hours = int(tz[1:3])
minutes = int(tz[3:])
return sign * 60 * (60 * hours + minutes)
def parse_rfc2822(s, lineno=0):
"""Parse a date from a rfc2822 string.
See the spec for details.
"""
raise NotImplementedError(parse_rfc2822)
def parse_now(s, lineno=0):
"""Parse a date from a string.
The format must be exactly "now".
See the spec for details.
"""
return time.time(), 0
# Lookup tabel of date parsing routines
DATE_PARSERS_BY_NAME = {
'raw': parse_raw,
'rfc2822': parse_rfc2822,
'now': parse_now,
}

View File

@@ -0,0 +1,182 @@
# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""Exception classes for fastimport"""
class FastImportError(StandardError):
"""The base exception class for all import processing exceptions."""
_fmt = "Unknown Import Error"
def __str__(self):
return self._fmt % self.__dict__
class ParsingError(FastImportError):
"""The base exception class for all import processing exceptions."""
_fmt = "Unknown Import Parsing Error"
def __init__(self, filename, lineno):
FastImportError.__init__(self)
self.filename = filename
self.lineno = lineno
def __str__(self):
result = []
if self.filename:
result.append(self.filename)
result.append(", ")
result.append("line ")
result.append(str(self.lineno))
result.append(": ")
result.append(FastImportError.__str__(self))
return "".join(result)
class MissingBytes(ParsingError):
"""Raised when EOF encountered while expecting to find more bytes."""
_fmt = ("Unexpected EOF - expected %(expected)d bytes,"
" found %(found)d")
def __init__(self, filename, lineno, expected, found):
ParsingError.__init__(self, filename, lineno)
self.expected = expected
self.found = found
class MissingTerminator(ParsingError):
"""Raised when EOF encountered while expecting to find a terminator."""
_fmt = "Unexpected EOF - expected '%(terminator)s' terminator"
def __init__(self, filename, lineno, terminator):
ParsingError.__init__(self, filename, lineno)
self.terminator = terminator
class InvalidCommand(ParsingError):
"""Raised when an unknown command found."""
_fmt = ("Invalid command '%(cmd)s'")
def __init__(self, filename, lineno, cmd):
ParsingError.__init__(self, filename, lineno)
self.cmd = cmd
class MissingSection(ParsingError):
"""Raised when a section is required in a command but not present."""
_fmt = ("Command %(cmd)s is missing section %(section)s")
def __init__(self, filename, lineno, cmd, section):
ParsingError.__init__(self, filename, lineno)
self.cmd = cmd
self.section = section
class BadFormat(ParsingError):
"""Raised when a section is formatted incorrectly."""
_fmt = ("Bad format for section %(section)s in "
"command %(cmd)s: found '%(text)s'")
def __init__(self, filename, lineno, cmd, section, text):
ParsingError.__init__(self, filename, lineno)
self.cmd = cmd
self.section = section
self.text = text
class InvalidTimezone(ParsingError):
"""Raised when converting a string timezone to a seconds offset."""
_fmt = "Timezone %(timezone)r could not be converted.%(reason)s"
def __init__(self, filename, lineno, timezone, reason=None):
ParsingError.__init__(self, filename, lineno)
self.timezone = timezone
if reason:
self.reason = ' ' + reason
else:
self.reason = ''
class UnknownDateFormat(FastImportError):
"""Raised when an unknown date format is given."""
_fmt = ("Unknown date format '%(format)s'")
def __init__(self, format):
FastImportError.__init__(self)
self.format = format
class MissingHandler(FastImportError):
"""Raised when a processor can't handle a command."""
_fmt = ("Missing handler for command %(cmd)s")
def __init__(self, cmd):
FastImportError.__init__(self)
self.cmd = cmd
class UnknownParameter(FastImportError):
"""Raised when an unknown parameter is passed to a processor."""
_fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s")
def __init__(self, param, knowns):
FastImportError.__init__(self)
self.param = param
self.knowns = knowns
class BadRepositorySize(FastImportError):
"""Raised when the repository has an incorrect number of revisions."""
_fmt = ("Bad repository size - %(found)d revisions found, "
"%(expected)d expected")
def __init__(self, expected, found):
FastImportError.__init__(self)
self.expected = expected
self.found = found
class BadRestart(FastImportError):
"""Raised when the import stream and id-map do not match up."""
_fmt = ("Bad restart - attempted to skip commit %(commit_id)s "
"but matching revision-id is unknown")
def __init__(self, commit_id):
FastImportError.__init__(self)
self.commit_id = commit_id
class UnknownFeature(FastImportError):
"""Raised when an unknown feature is given in the input stream."""
_fmt = ("Unknown feature '%(feature)s' - try a later importer or "
"an earlier data format")
def __init__(self, feature):
FastImportError.__init__(self)
self.feature = feature

View File

@@ -0,0 +1,47 @@
class HeadTracker(object):
"""
Keep track of the heads in a fastimport stream.
"""
def __init__(self):
self.last_ref = None
# map git ref name (e.g. "refs/heads/master") to id of last
# commit with that ref
self.last_ids = {}
# the set of heads seen so far in the stream, as a mapping
# from commit id of the head to set of ref names
self.heads = {}
def track_heads(self, cmd):
"""Track the repository heads given a CommitCommand.
:param cmd: the CommitCommand
:return: the list of parents in terms of commit-ids
"""
# Get the true set of parents
if cmd.from_ is not None:
parents = [cmd.from_]
else:
last_id = self.last_ids.get(cmd.ref)
if last_id is not None:
parents = [last_id]
else:
parents = []
parents.extend(cmd.merges)
# Track the heads
self.track_heads_for_ref(cmd.ref, cmd.id, parents)
return parents
def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
if parents is not None:
for parent in parents:
if parent in self.heads:
del self.heads[parent]
self.heads.setdefault(cmd_id, set()).add(cmd_ref)
self.last_ids[cmd_ref] = cmd_id
self.last_ref = cmd_ref

View File

@@ -0,0 +1,88 @@
# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""Miscellaneous useful stuff."""
import os
def single_plural(n, single, plural):
"""Return a single or plural form of a noun based on number."""
if n == 1:
return single
else:
return plural
def invert_dict(d):
"""Invert a dictionary with keys matching each value turned into a list."""
# Based on recipe from ASPN
result = {}
for k, v in d.iteritems():
keys = result.setdefault(v, [])
keys.append(k)
return result
def invert_dictset(d):
"""Invert a dictionary with keys matching a set of values, turned into lists."""
# Based on recipe from ASPN
result = {}
for k, c in d.iteritems():
for v in c:
keys = result.setdefault(v, [])
keys.append(k)
return result
def _common_path_and_rest(l1, l2, common=[]):
# From http://code.activestate.com/recipes/208993/
if len(l1) < 1: return (common, l1, l2)
if len(l2) < 1: return (common, l1, l2)
if l1[0] != l2[0]: return (common, l1, l2)
return _common_path_and_rest(l1[1:], l2[1:], common+[l1[0]])
def common_path(path1, path2):
"""Find the common bit of 2 paths."""
return ''.join(_common_path_and_rest(path1, path2)[0])
def common_directory(paths):
"""Find the deepest common directory of a list of paths.
:return: if no paths are provided, None is returned;
if there is no common directory, '' is returned;
otherwise the common directory with a trailing / is returned.
"""
def get_dir_with_slash(path):
if path == '' or path.endswith('/'):
return path
else:
dirname, basename = os.path.split(path)
if dirname == '':
return dirname
else:
return dirname + '/'
if not paths:
return None
elif len(paths) == 1:
return get_dir_with_slash(paths[0])
else:
common = common_path(paths[0], paths[1])
for path in paths[2:]:
common = common_path(common, path)
return get_dir_with_slash(common)

View File

@@ -0,0 +1,65 @@
# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""Routines for saving and loading the id-map file."""
import os
def save_id_map(filename, revision_ids):
"""Save the mapping of commit ids to revision ids to a file.
Throws the usual exceptions if the file cannot be opened,
written to or closed.
:param filename: name of the file to save the data to
:param revision_ids: a dictionary of commit ids to revision ids.
"""
f = open(filename, 'wb')
try:
for commit_id, rev_id in revision_ids.iteritems():
f.write("%s %s\n" % (commit_id, rev_id))
f.flush()
finally:
f.close()
def load_id_map(filename):
"""Load the mapping of commit ids to revision ids from a file.
If the file does not exist, an empty result is returned.
If the file does exists but cannot be opened, read or closed,
the normal exceptions are thrown.
NOTE: It is assumed that commit-ids do not have embedded spaces.
:param filename: name of the file to save the data to
:result: map, count where:
map = a dictionary of commit ids to revision ids;
count = the number of keys in map
"""
result = {}
count = 0
if os.path.exists(filename):
f = open(filename)
try:
for line in f:
parts = line[:-1].split(' ', 1)
result[parts[0]] = parts[1]
count += 1
finally:
f.close()
return result, count

View File

@@ -0,0 +1,621 @@
# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import warnings
"""Parser of import data into command objects.
In order to reuse existing front-ends, the stream format is a subset of
the one used by git-fast-import (as of the 1.5.4 release of git at least).
The grammar is:
stream ::= cmd*;
cmd ::= new_blob
| new_commit
| new_tag
| reset_branch
| checkpoint
| progress
;
new_blob ::= 'blob' lf
mark?
file_content;
file_content ::= data;
new_commit ::= 'commit' sp ref_str lf
mark?
('author' sp name '<' email '>' when lf)?
'committer' sp name '<' email '>' when lf
commit_msg
('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
file_change*
lf?;
commit_msg ::= data;
file_change ::= file_clr
| file_del
| file_rnm
| file_cpy
| file_obm
| file_inm;
file_clr ::= 'deleteall' lf;
file_del ::= 'D' sp path_str lf;
file_rnm ::= 'R' sp path_str sp path_str lf;
file_cpy ::= 'C' sp path_str sp path_str lf;
file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
data;
new_tag ::= 'tag' sp tag_str lf
'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
'tagger' sp name '<' email '>' when lf
tag_msg;
tag_msg ::= data;
reset_branch ::= 'reset' sp ref_str lf
('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
lf?;
checkpoint ::= 'checkpoint' lf
lf?;
progress ::= 'progress' sp not_lf* lf
lf?;
# note: the first idnum in a stream should be 1 and subsequent
# idnums should not have gaps between values as this will cause
# the stream parser to reserve space for the gapped values. An
# idnum can be updated in the future to a new object by issuing
# a new mark directive with the old idnum.
#
mark ::= 'mark' sp idnum lf;
data ::= (delimited_data | exact_data)
lf?;
# note: delim may be any string but must not contain lf.
# data_line may contain any data but must not be exactly
# delim. The lf after the final data_line is included in
# the data.
delimited_data ::= 'data' sp '<<' delim lf
(data_line lf)*
delim lf;
# note: declen indicates the length of binary_data in bytes.
# declen does not include the lf preceeding the binary data.
#
exact_data ::= 'data' sp declen lf
binary_data;
# note: quoted strings are C-style quoting supporting \c for
# common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
# is the signed byte value in octal. Note that the only
# characters which must actually be escaped to protect the
# stream formatting is: \, \" and LF. Otherwise these values
# are UTF8.
#
ref_str ::= ref;
sha1exp_str ::= sha1exp;
tag_str ::= tag;
path_str ::= path | '"' quoted(path) '"' ;
mode ::= '100644' | '644'
| '100755' | '755'
| '120000'
;
declen ::= # unsigned 32 bit value, ascii base10 notation;
bigint ::= # unsigned integer value, ascii base10 notation;
binary_data ::= # file content, not interpreted;
when ::= raw_when | rfc2822_when;
raw_when ::= ts sp tz;
rfc2822_when ::= # Valid RFC 2822 date and time;
sp ::= # ASCII space character;
lf ::= # ASCII newline (LF) character;
# note: a colon (':') must precede the numerical value assigned to
# an idnum. This is to distinguish it from a ref or tag name as
# GIT does not permit ':' in ref or tag strings.
#
idnum ::= ':' bigint;
path ::= # GIT style file path, e.g. \"a/b/c\";
ref ::= # GIT ref name, e.g. \"refs/heads/MOZ_GECKO_EXPERIMENT\";
tag ::= # GIT tag name, e.g. \"FIREFOX_1_5\";
sha1exp ::= # Any valid GIT SHA1 expression;
hexsha1 ::= # SHA1 in hexadecimal format;
# note: name and email are UTF8 strings, however name must not
# contain '<' or lf and email must not contain any of the
# following: '<', '>', lf.
#
name ::= # valid GIT author/committer name;
email ::= # valid GIT author/committer email;
ts ::= # time since the epoch in seconds, ascii base10 notation;
tz ::= # GIT style timezone;
# note: comments may appear anywhere in the input, except
# within a data command. Any form of the data command
# always escapes the related input from comment processing.
#
# In case it is not clear, the '#' that starts the comment
# must be the first character on that the line (an lf have
# preceeded it).
#
comment ::= '#' not_lf* lf;
not_lf ::= # Any byte that is not ASCII newline (LF);
"""
import re
import sys
from git_remote_helpers.fastimport import (
commands,
dates,
errors
)
## Stream parsing ##
class LineBasedParser(object):
def __init__(self, input, filename=None):
"""A Parser that keeps track of line numbers.
:param input: the file-like object to read from
"""
self.input = input
if filename is None:
try:
self.filename = input.name
except AttributeError:
self.filename = "(unknown)"
else:
self.filename = filename
self.lineno = 0
# Lines pushed back onto the input stream
self._buffer = []
def abort(self, exception, *args):
"""Raise an exception providing line number information."""
raise exception(self.filename, self.lineno, *args)
def readline(self):
"""Get the next line including the newline or '' on EOF."""
self.lineno += 1
if self._buffer:
return self._buffer.pop()
else:
return self.input.readline()
def next_line(self):
"""Get the next line without the newline or None on EOF."""
line = self.readline()
if line:
return line[:-1]
else:
return None
def push_line(self, line):
"""Push line back onto the line buffer.
:param line: the line with no trailing newline
"""
self.lineno -= 1
self._buffer.append(line + "\n")
def read_bytes(self, count):
"""Read a given number of bytes from the input stream.
Throws MissingBytes if the bytes are not found.
Note: This method does not read from the line buffer.
:return: a string
"""
result = self.input.read(count)
found = len(result)
self.lineno += result.count("\n")
if found != count:
self.abort(errors.MissingBytes, count, found)
return result
def read_until(self, terminator):
"""Read the input stream until the terminator is found.
Throws MissingTerminator if the terminator is not found.
Note: This method does not read from the line buffer.
:return: the bytes read up to but excluding the terminator.
"""
lines = []
term = terminator + '\n'
while True:
line = self.input.readline()
if line == term:
break
else:
lines.append(line)
return ''.join(lines)
# Regular expression used for parsing. (Note: The spec states that the name
# part should be non-empty but git-fast-export doesn't always do that so
# the first bit is \w*, not \w+.) Also git-fast-import code says the
# space before the email is optional.
_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)')
_WHO_RE = re.compile(r'([^<]*)<(.*)>')
class ImportParser(LineBasedParser):
def __init__(self, input, filename=None):
"""A Parser of import commands.
:param input: the file-like object to read from
:param verbose: display extra information of not
"""
LineBasedParser.__init__(self, input, filename)
# We auto-detect the date format when a date is first encountered
self.date_parser = None
def warning(self, msg):
sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg))
def parse(self):
"""Parse the input stream, yielding a sequence of ImportCommand
objects. Iteration terminates on EOF. Raises InvalidCommand on
parse error."""
while True:
line = self.next_line()
if line is None:
break
elif len(line) == 0 or line.startswith('#'):
continue
# Search for commands in order of likelihood
elif line.startswith('commit '):
yield self._parse_commit(line[len('commit '):])
elif line.startswith('blob'):
yield self._parse_blob()
elif line.startswith('done'):
break
elif line.startswith('progress '):
yield commands.ProgressCommand(line[len('progress '):])
elif line.startswith('reset '):
yield self._parse_reset(line[len('reset '):])
elif line.startswith('tag '):
yield self._parse_tag(line[len('tag '):])
elif line.startswith('checkpoint'):
yield commands.CheckpointCommand()
elif line.startswith('feature'):
yield self._parse_feature(line[len('feature '):])
else:
self.abort(errors.InvalidCommand, line)
def iter_commands(self):
warnings.warn("iter_commands() deprecated: use parse()",
DeprecationWarning, stacklevel=2)
return self.parse()
def iter_file_commands(self):
"""Iterator returning FileCommand objects.
If an invalid file command is found, the line is silently
pushed back and iteration ends.
"""
while True:
line = self.next_line()
if line is None:
break
elif len(line) == 0 or line.startswith('#'):
continue
# Search for file commands in order of likelihood
elif line.startswith('M '):
yield self._parse_file_modify(line[2:])
elif line.startswith('D '):
path = self._path(line[2:])
yield commands.FileDeleteCommand(path)
elif line.startswith('R '):
old, new = self._path_pair(line[2:])
yield commands.FileRenameCommand(old, new)
elif line.startswith('C '):
src, dest = self._path_pair(line[2:])
yield commands.FileCopyCommand(src, dest)
elif line.startswith('deleteall'):
yield commands.FileDeleteAllCommand()
else:
self.push_line(line)
break
def _parse_blob(self):
"""Parse a blob command."""
location = (self.filename, self.lineno)
mark = self._get_mark_if_any()
data = self._get_data('blob')
return commands.BlobCommand(mark, data, location)
def _parse_commit(self, ref):
"""Parse a commit command."""
location = (self.filename, self.lineno)
mark = self._get_mark_if_any()
author = self._get_user_info('commit', 'author', False)
more_authors = []
while True:
another_author = self._get_user_info('commit', 'author', False)
if another_author is not None:
more_authors.append(another_author)
else:
break
committer = self._get_user_info('commit', 'committer')
message = self._get_data('commit', 'message')
try:
message = message.decode('utf_8')
except UnicodeDecodeError:
self.warning(
"commit message not in utf8 - replacing unknown characters")
message = message.decode('utf_8', 'replace')
from_ = self._get_from()
merges = []
while True:
merge = self._get_merge()
if merge is not None:
# while the spec suggests it's illegal, git-fast-export
# outputs multiple merges on the one line, e.g.
# merge :x :y :z
these_merges = merge.split(" ")
merges.extend(these_merges)
else:
break
properties = {}
while True:
name_value = self._get_property()
if name_value is not None:
name, value = name_value
properties[name] = value
else:
break
file_cmds = list(self.iter_file_commands())
return commands.CommitCommand(ref, mark, author, committer, message,
from_, merges, file_cmds, location,
more_authors=more_authors, properties=properties)
def _parse_feature(self, info):
"""Parse a feature command."""
parts = info.split("=", 1)
name = parts[0]
if len(parts) > 1:
value = self._path(parts[1])
else:
value = None
location = (self.filename, self.lineno)
return commands.FeatureCommand(name, value, location=location)
def _parse_file_modify(self, info):
"""Parse a filemodify command within a commit.
:param info: a string in the format "mode dataref path"
(where dataref might be the hard-coded literal 'inline').
"""
params = info.split(' ', 2)
path = self._path(params[2])
mode = params[0]
if params[1] == 'inline':
dataref = None
data = self._get_data('filemodify')
else:
dataref = params[1]
data = None
return commands.FileModifyCommand(path, mode, dataref, data)
def _parse_reset(self, ref):
"""Parse a reset command."""
from_ = self._get_from()
return commands.ResetCommand(ref, from_)
def _parse_tag(self, name):
"""Parse a tag command."""
from_ = self._get_from('tag')
tagger = self._get_user_info('tag', 'tagger', accept_just_who=True)
message = self._get_data('tag', 'message').decode('utf_8')
return commands.TagCommand(name, from_, tagger, message)
def _get_mark_if_any(self):
"""Parse a mark section."""
line = self.next_line()
if line.startswith('mark :'):
return line[len('mark :'):]
else:
self.push_line(line)
return None
def _get_from(self, required_for=None):
"""Parse a from section."""
line = self.next_line()
if line is None:
return None
elif line.startswith('from '):
return line[len('from '):]
elif required_for:
self.abort(errors.MissingSection, required_for, 'from')
else:
self.push_line(line)
return None
def _get_merge(self):
"""Parse a merge section."""
line = self.next_line()
if line is None:
return None
elif line.startswith('merge '):
return line[len('merge '):]
else:
self.push_line(line)
return None
def _get_property(self):
"""Parse a property section."""
line = self.next_line()
if line is None:
return None
elif line.startswith('property '):
return self._name_value(line[len('property '):])
else:
self.push_line(line)
return None
def _get_user_info(self, cmd, section, required=True,
accept_just_who=False):
"""Parse a user section."""
line = self.next_line()
if line.startswith(section + ' '):
return self._who_when(line[len(section + ' '):], cmd, section,
accept_just_who=accept_just_who)
elif required:
self.abort(errors.MissingSection, cmd, section)
else:
self.push_line(line)
return None
def _get_data(self, required_for, section='data'):
"""Parse a data section."""
line = self.next_line()
if line.startswith('data '):
rest = line[len('data '):]
if rest.startswith('<<'):
return self.read_until(rest[2:])
else:
size = int(rest)
read_bytes = self.read_bytes(size)
# optional LF after data.
next = self.input.readline()
self.lineno += 1
if len(next) > 1 or next != "\n":
self.push_line(next[:-1])
return read_bytes
else:
self.abort(errors.MissingSection, required_for, section)
def _who_when(self, s, cmd, section, accept_just_who=False):
"""Parse who and when information from a string.
:return: a tuple of (name,email,timestamp,timezone). name may be
the empty string if only an email address was given.
"""
match = _WHO_AND_WHEN_RE.search(s)
if match:
datestr = match.group(3)
if self.date_parser is None:
# auto-detect the date format
if len(datestr.split(' ')) == 2:
format = 'raw'
elif datestr == 'now':
format = 'now'
else:
format = 'rfc2822'
self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
when = self.date_parser(datestr, self.lineno)
else:
match = _WHO_RE.search(s)
if accept_just_who and match:
# HACK around missing time
# TODO: output a warning here
when = dates.DATE_PARSERS_BY_NAME['now']('now')
else:
self.abort(errors.BadFormat, cmd, section, s)
# Do not attempt to decode name or email address; they are just
# bytes. (Everything will work out better if they are in UTF-8,
# but that's not guaranteed.)
name = match.group(1).rstrip()
email = match.group(2)
return (name, email, when[0], when[1])
def _name_value(self, s):
"""Parse a (name,value) tuple from 'name value-length value'."""
parts = s.split(' ', 2)
name = parts[0]
if len(parts) == 1:
value = None
else:
size = int(parts[1])
value = parts[2]
still_to_read = size - len(value)
if still_to_read == 1:
value += "\n"
elif still_to_read > 0:
read_bytes = self.read_bytes(still_to_read - 1)
value += "\n" + read_bytes
value = value.decode('utf8')
return (name, value)
def _path(self, s):
"""Parse a path."""
if s.startswith('"'):
if s[-1] != '"':
self.abort(errors.BadFormat, '?', '?', s)
else:
return _unquote_c_string(s[1:-1])
# Do *not* decode the path to a Unicode string: filenames on
# Unix are just bytes. Git and Mercurial, at least, inherit
# this stance. git-fast-import(1) merely says "It is
# recommended that <path> always be encoded using UTF-8.", which
# is good advice ... but not something we can count on here.
return s
def _path_pair(self, s):
"""Parse two paths separated by a space."""
# TODO: handle a space in the first path
if s.startswith('"'):
parts = s[1:].split('" ', 1)
else:
parts = s.split(' ', 1)
if len(parts) != 2:
self.abort(errors.BadFormat, '?', '?', s)
elif parts[1].startswith('"') and parts[1].endswith('"'):
parts[1] = parts[1][1:-1]
elif parts[1].startswith('"') or parts[1].endswith('"'):
self.abort(errors.BadFormat, '?', '?', s)
return map(_unquote_c_string, parts)
def _mode(self, s):
"""Parse a file mode into executable and symlink flags.
:return (is_executable, is_symlink)
"""
# Note: Output from git-fast-export slightly different to spec
if s in ['644', '100644', '0100644']:
return False, False
elif s in ['755', '100755', '0100755']:
return True, False
elif s in ['120000', '0120000']:
return False, True
else:
self.abort(errors.BadFormat, 'filemodify', 'mode', s)
def _unquote_c_string(s):
"""replace C-style escape sequences (\n, \", etc.) with real chars."""
# HACK: Python strings are close enough
return s.decode('string_escape', 'replace')

View File

@@ -0,0 +1,222 @@
# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""Processor of import commands.
This module provides core processing functionality including an abstract class
for basing real processors on. See the processors package for examples.
"""
import sys
import time
import logging
from git_remote_helpers.fastimport import errors
log = logging.getLogger(__name__)
class ImportProcessor(object):
"""Base class for import processors.
Subclasses should override the pre_*, post_* and *_handler
methods as appropriate.
"""
known_params = []
def __init__(self, params=None, verbose=False, outf=None):
if outf is None:
self.outf = sys.stdout
else:
self.outf = outf
self.verbose = verbose
if params is None:
self.params = {}
else:
self.params = params
self.validate_parameters()
# Handlers can set this to request exiting cleanly without
# iterating through the remaining commands
self.finished = False
def validate_parameters(self):
"""Validate that the parameters are correctly specified."""
for p in self.params:
if p not in self.known_params:
raise errors.UnknownParameter(p, self.known_params)
def process(self, commands):
"""Process a stream of fast-import commands from a parser.
:param commands: a sequence of commands.ImportCommand objects
"""
self.pre_process()
for cmd in commands:
try:
handler = self.__class__.__dict__[cmd.name + "_handler"]
except KeyError:
raise errors.MissingHandler(cmd.name)
else:
self.pre_handler(cmd)
handler(self, cmd)
self.post_handler(cmd)
if self.finished:
break
self.post_process()
def pre_process(self):
"""Hook for logic at start of processing.
Called just before process() starts iterating over its sequence
of commands.
"""
pass
def post_process(self):
"""Hook for logic at end of successful processing.
Called after process() finishes successfully iterating over its
sequence of commands (i.e. not called if an exception is raised
while processing commands).
"""
pass
def pre_handler(self, cmd):
"""Hook for logic before each handler starts."""
pass
def post_handler(self, cmd):
"""Hook for logic after each handler finishes."""
pass
def progress_handler(self, cmd):
"""Process a ProgressCommand."""
raise NotImplementedError(self.progress_handler)
def blob_handler(self, cmd):
"""Process a BlobCommand."""
raise NotImplementedError(self.blob_handler)
def checkpoint_handler(self, cmd):
"""Process a CheckpointCommand."""
raise NotImplementedError(self.checkpoint_handler)
def commit_handler(self, cmd):
"""Process a CommitCommand."""
raise NotImplementedError(self.commit_handler)
def reset_handler(self, cmd):
"""Process a ResetCommand."""
raise NotImplementedError(self.reset_handler)
def tag_handler(self, cmd):
"""Process a TagCommand."""
raise NotImplementedError(self.tag_handler)
def feature_handler(self, cmd):
"""Process a FeatureCommand."""
raise NotImplementedError(self.feature_handler)
class CommitHandler(object):
"""Base class for commit handling.
Subclasses should override the pre_*, post_* and *_handler
methods as appropriate.
"""
def __init__(self, command):
self.command = command
def process(self):
self.pre_process_files()
for fc in self.command.file_cmds:
try:
handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
except KeyError:
raise errors.MissingHandler(fc.name)
else:
handler(self, fc)
self.post_process_files()
def _log(self, level, msg, *args):
log.log(level, msg + " (%s)", *(args + (self.command.id,)))
# Logging methods: unused in this library, but used by
# bzr-fastimport. Could be useful for other subclasses.
def note(self, msg, *args):
"""log.info() with context about the command"""
self._log(logging.INFO, msg, *args)
def warning(self, msg, *args):
"""log.warning() with context about the command"""
self._log(logging.WARNING, msg, *args)
def debug(self, msg, *args):
"""log.debug() with context about the command"""
self._log(logging.DEBUG, msg, *args)
def pre_process_files(self):
"""Prepare for committing."""
pass
def post_process_files(self):
"""Save the revision."""
pass
def modify_handler(self, filecmd):
"""Handle a filemodify command."""
raise NotImplementedError(self.modify_handler)
def delete_handler(self, filecmd):
"""Handle a filedelete command."""
raise NotImplementedError(self.delete_handler)
def copy_handler(self, filecmd):
"""Handle a filecopy command."""
raise NotImplementedError(self.copy_handler)
def rename_handler(self, filecmd):
"""Handle a filerename command."""
raise NotImplementedError(self.rename_handler)
def deleteall_handler(self, filecmd):
"""Handle a filedeleteall command."""
raise NotImplementedError(self.deleteall_handler)
def parseMany(filenames, parser_factory, processor):
"""Parse multiple input files, sending the results all to
'processor'. parser_factory must be a callable that takes one input
file and returns an ImportParser instance, e.g. the ImportParser
class object itself. Each file in 'filenames' is opened, parsed,
and closed in turn. For filename \"-\", reads stdin.
"""
for filename in filenames:
if filename == "-":
infile = sys.stdin
else:
infile = open(filename, "rb")
try:
parser = parser_factory(infile)
processor.process(parser.parse())
finally:
if filename != "-":
infile.close()

View File

@@ -13,5 +13,6 @@ setup(
author_email = 'git@vger.kernel.org',
url = 'http://www.git-scm.com/',
package_dir = {'git_remote_helpers': ''},
packages = ['git_remote_helpers', 'git_remote_helpers.git'],
packages = ['git_remote_helpers', 'git_remote_helpers.git',
'git_remote_helpers.fastimport'],
)