mirror of
https://github.com/git/git.git
synced 2026-02-13 11:21:10 +00:00
git_remote_helpers: add fastimport library
This commit is contained in:
committed by
Pat Thoyts
parent
a2f32259dd
commit
6bbd536598
0
git_remote_helpers/fastimport/__init__.py
Normal file
0
git_remote_helpers/fastimport/__init__.py
Normal file
469
git_remote_helpers/fastimport/commands.py
Normal file
469
git_remote_helpers/fastimport/commands.py
Normal file
@@ -0,0 +1,469 @@
|
||||
# Copyright (C) 2008 Canonical Ltd
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
"""Import command classes."""
|
||||
|
||||
import os
|
||||
|
||||
# There is a bug in git 1.5.4.3 and older by which unquoting a string consumes
|
||||
# one extra character. Set this variable to True to work-around it. It only
|
||||
# happens when renaming a file whose name contains spaces and/or quotes, and
|
||||
# the symptom is:
|
||||
# % git-fast-import
|
||||
# fatal: Missing space after source: R "file 1.txt" file 2.txt
|
||||
# http://git.kernel.org/?p=git/git.git;a=commit;h=c8744d6a8b27115503565041566d97c21e722584
|
||||
GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE = False
|
||||
|
||||
|
||||
# Lists of command names
|
||||
COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'feature', 'progress',
|
||||
'reset', 'tag']
|
||||
FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename',
|
||||
'filedeleteall']
|
||||
|
||||
|
||||
# Feature names
|
||||
MULTIPLE_AUTHORS_FEATURE = "multiple-authors"
|
||||
COMMIT_PROPERTIES_FEATURE = "commit-properties"
|
||||
EMPTY_DIRS_FEATURE = "empty-directories"
|
||||
FEATURE_NAMES = [
|
||||
MULTIPLE_AUTHORS_FEATURE,
|
||||
COMMIT_PROPERTIES_FEATURE,
|
||||
EMPTY_DIRS_FEATURE,
|
||||
]
|
||||
|
||||
|
||||
# for classes with no meaningful __str__()
|
||||
def _simplerepr(self):
|
||||
return "<%s at 0x%x>" % (self.__class__.__name__, id(self))
|
||||
|
||||
# classes that define __str__() should use this instead
|
||||
def _detailrepr(self):
|
||||
return ("<%s at 0x%x: %s>"
|
||||
% (self.__class__.__name__, id(self), str(self)))
|
||||
|
||||
|
||||
class ImportCommand(object):
|
||||
"""Base class for import commands."""
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
# List of field names not to display
|
||||
self._binary = []
|
||||
|
||||
__repr__ = _simplerepr
|
||||
|
||||
def format(self):
|
||||
"""Format this command as a fastimport dump fragment.
|
||||
|
||||
Returns a (possibly multiline) string that, if seen in a
|
||||
fastimport stream, would parse to an equivalent command object.
|
||||
"""
|
||||
raise NotImplementedError("abstract method")
|
||||
|
||||
def dump_str(self, names=None, child_lists=None, verbose=False):
|
||||
"""Dump fields as a string.
|
||||
|
||||
:param names: the list of fields to include or
|
||||
None for all public fields
|
||||
:param child_lists: dictionary of child command names to
|
||||
fields for that child command to include
|
||||
:param verbose: if True, prefix each line with the command class and
|
||||
display fields as a dictionary; if False, dump just the field
|
||||
values with tabs between them
|
||||
"""
|
||||
interesting = {}
|
||||
if names is None:
|
||||
fields = [k for k in self.__dict__.keys() if not k.startswith('_')]
|
||||
else:
|
||||
fields = names
|
||||
for field in fields:
|
||||
value = self.__dict__.get(field)
|
||||
if field in self._binary and value is not None:
|
||||
value = '(...)'
|
||||
interesting[field] = value
|
||||
if verbose:
|
||||
return "%s: %s" % (self.__class__.__name__, interesting)
|
||||
else:
|
||||
return "\t".join([repr(interesting[k]) for k in fields])
|
||||
|
||||
|
||||
class _MarkMixin(object):
|
||||
"""mixin for fastimport commands with a mark: blob, commit."""
|
||||
def __init__(self, mark, location):
|
||||
self.mark= mark
|
||||
self.location = location
|
||||
|
||||
# Provide a unique id in case the mark is missing
|
||||
if mark is None:
|
||||
self.id = '%s@%d' % (os.path.basename(location[0]), location[1])
|
||||
else:
|
||||
self.id = ':%s' % mark
|
||||
|
||||
def __str__(self):
|
||||
return self.id
|
||||
|
||||
__repr__ = _detailrepr
|
||||
|
||||
|
||||
class BlobCommand(ImportCommand, _MarkMixin):
|
||||
|
||||
def __init__(self, mark, data, location):
|
||||
ImportCommand.__init__(self, 'blob')
|
||||
_MarkMixin.__init__(self, mark, location)
|
||||
self.data = data
|
||||
self._binary = ['data']
|
||||
|
||||
def format(self):
|
||||
if self.mark is None:
|
||||
mark_line = ""
|
||||
else:
|
||||
mark_line = "\nmark :%s" % self.mark
|
||||
return "blob%s\ndata %d\n%s" % (mark_line, len(self.data), self.data)
|
||||
|
||||
|
||||
class CheckpointCommand(ImportCommand):
|
||||
|
||||
def __init__(self):
|
||||
ImportCommand.__init__(self, 'checkpoint')
|
||||
|
||||
def format(self):
|
||||
return "checkpoint"
|
||||
|
||||
|
||||
class CommitCommand(ImportCommand, _MarkMixin):
|
||||
|
||||
def __init__(self, ref, mark, author, committer, message, from_,
|
||||
merges, file_cmds, location=None, more_authors=None, properties=None):
|
||||
ImportCommand.__init__(self, 'commit')
|
||||
_MarkMixin.__init__(self, mark, location)
|
||||
self.ref = ref
|
||||
self.author = author
|
||||
self.committer = committer
|
||||
self.message = message
|
||||
self.from_ = from_
|
||||
self.merges = merges
|
||||
self.file_cmds = file_cmds
|
||||
self.more_authors = more_authors
|
||||
self.properties = properties
|
||||
self._binary = ['file_cmds']
|
||||
|
||||
def format(self, use_features=True, include_file_contents=True):
|
||||
if self.mark is None:
|
||||
mark_line = ""
|
||||
else:
|
||||
mark_line = "\nmark :%s" % self.mark
|
||||
if self.author is None:
|
||||
author_section = ""
|
||||
else:
|
||||
author_section = "\nauthor %s" % format_who_when(self.author)
|
||||
if use_features and self.more_authors:
|
||||
for author in self.more_authors:
|
||||
author_section += "\nauthor %s" % format_who_when(author)
|
||||
committer = "committer %s" % format_who_when(self.committer)
|
||||
if self.message is None:
|
||||
msg_section = ""
|
||||
else:
|
||||
msg = self.message.encode('utf8')
|
||||
msg_section = "\ndata %d\n%s" % (len(msg), msg)
|
||||
if self.from_ is None:
|
||||
from_line = ""
|
||||
else:
|
||||
from_line = "\nfrom %s" % self.from_
|
||||
if self.merges is None:
|
||||
merge_lines = ""
|
||||
else:
|
||||
merge_lines = "".join(["\nmerge %s" % (m,)
|
||||
for m in self.merges])
|
||||
if use_features and self.properties:
|
||||
property_lines = []
|
||||
for name in sorted(self.properties):
|
||||
value = self.properties[name]
|
||||
property_lines.append("\n" + format_property(name, value))
|
||||
properties_section = "".join(property_lines)
|
||||
else:
|
||||
properties_section = ""
|
||||
if self.file_cmds is None:
|
||||
filecommands = ""
|
||||
else:
|
||||
if include_file_contents:
|
||||
format_str = "\n%r"
|
||||
else:
|
||||
format_str = "\n%s"
|
||||
filecommands = "".join(
|
||||
["\n" + fc.format() for fc in self.file_cmds])
|
||||
return "commit %s%s%s\n%s%s%s%s%s%s" % (self.ref, mark_line,
|
||||
author_section, committer, msg_section, from_line, merge_lines,
|
||||
properties_section, filecommands)
|
||||
|
||||
def dump_str(self, names=None, child_lists=None, verbose=False):
|
||||
result = [ImportCommand.dump_str(self, names, verbose=verbose)]
|
||||
for f in self.file_cmds:
|
||||
if child_lists is None:
|
||||
continue
|
||||
try:
|
||||
child_names = child_lists[f.name]
|
||||
except KeyError:
|
||||
continue
|
||||
result.append("\t%s" % f.dump_str(child_names, verbose=verbose))
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
class FeatureCommand(ImportCommand):
|
||||
|
||||
def __init__(self, feature_name, value=None, location=None):
|
||||
ImportCommand.__init__(self, 'feature')
|
||||
self.feature_name = feature_name
|
||||
self.value = value
|
||||
self.location = location
|
||||
|
||||
def format(self):
|
||||
if self.value is None:
|
||||
value_text = ""
|
||||
else:
|
||||
value_text = "=%s" % self.value
|
||||
return "feature %s%s" % (self.feature_name, value_text)
|
||||
|
||||
|
||||
class ProgressCommand(ImportCommand):
|
||||
|
||||
def __init__(self, message):
|
||||
ImportCommand.__init__(self, 'progress')
|
||||
self.message = message
|
||||
|
||||
def format(self):
|
||||
return "progress %s" % (self.message,)
|
||||
|
||||
|
||||
class ResetCommand(ImportCommand):
|
||||
|
||||
def __init__(self, ref, from_):
|
||||
ImportCommand.__init__(self, 'reset')
|
||||
self.ref = ref
|
||||
self.from_ = from_
|
||||
|
||||
def format(self):
|
||||
if self.from_ is None:
|
||||
from_line = ""
|
||||
else:
|
||||
# According to git-fast-import(1), the extra LF is optional here;
|
||||
# however, versions of git up to 1.5.4.3 had a bug by which the LF
|
||||
# was needed. Always emit it, since it doesn't hurt and maintains
|
||||
# compatibility with older versions.
|
||||
# http://git.kernel.org/?p=git/git.git;a=commit;h=655e8515f279c01f525745d443f509f97cd805ab
|
||||
from_line = "\nfrom %s\n" % self.from_
|
||||
return "reset %s%s" % (self.ref, from_line)
|
||||
|
||||
|
||||
class TagCommand(ImportCommand):
|
||||
|
||||
def __init__(self, id, from_, tagger, message):
|
||||
ImportCommand.__init__(self, 'tag')
|
||||
self.id = id
|
||||
self.from_ = from_
|
||||
self.tagger = tagger
|
||||
self.message = message
|
||||
|
||||
def __str__(self):
|
||||
return self.id
|
||||
|
||||
__repr__ = _detailrepr
|
||||
|
||||
def format(self):
|
||||
if self.from_ is None:
|
||||
from_line = ""
|
||||
else:
|
||||
from_line = "\nfrom %s" % self.from_
|
||||
if self.tagger is None:
|
||||
tagger_line = ""
|
||||
else:
|
||||
tagger_line = "\ntagger %s" % format_who_when(self.tagger)
|
||||
if self.message is None:
|
||||
msg_section = ""
|
||||
else:
|
||||
msg = self.message.encode('utf8')
|
||||
msg_section = "\ndata %d\n%s" % (len(msg), msg)
|
||||
return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section)
|
||||
|
||||
|
||||
class FileCommand(ImportCommand):
|
||||
"""Base class for file commands."""
|
||||
pass
|
||||
|
||||
|
||||
class FileModifyCommand(FileCommand):
|
||||
|
||||
def __init__(self, path, mode, dataref, data):
|
||||
# Either dataref or data should be null
|
||||
FileCommand.__init__(self, 'filemodify')
|
||||
self.path = check_path(path)
|
||||
self.mode = mode
|
||||
self.dataref = dataref
|
||||
self.data = data
|
||||
self._binary = ['data']
|
||||
|
||||
def __str__(self):
|
||||
return self.path
|
||||
|
||||
__repr__ = _detailrepr
|
||||
|
||||
def format(self, include_file_contents=True):
|
||||
datastr = ""
|
||||
if self.dataref is None:
|
||||
dataref = "inline"
|
||||
if include_file_contents:
|
||||
datastr = "\ndata %d\n%s" % (len(self.data), self.data)
|
||||
else:
|
||||
dataref = "%s" % (self.dataref,)
|
||||
path = format_path(self.path)
|
||||
return "M %s %s %s%s" % (self.mode, dataref, path, datastr)
|
||||
|
||||
def is_regular(self):
|
||||
"""Return true if this is a regular file (mode 644)."""
|
||||
return self.mode.endswith("644")
|
||||
|
||||
def is_executable(self):
|
||||
"""Return true if this is an executable file (mode 755)."""
|
||||
return self.mode.endswith("755")
|
||||
|
||||
def is_symlink(self):
|
||||
"""Return true if this is a symlink (mode 120000)."""
|
||||
return self.mode == "120000"
|
||||
|
||||
def is_gitlink(self):
|
||||
"""Return true if this is a gitlink (mode 160000)."""
|
||||
return self.mode == "160000"
|
||||
|
||||
|
||||
class FileDeleteCommand(FileCommand):
|
||||
|
||||
def __init__(self, path):
|
||||
FileCommand.__init__(self, 'filedelete')
|
||||
self.path = check_path(path)
|
||||
|
||||
def __str__(self):
|
||||
return self.path
|
||||
|
||||
__repr__ = _detailrepr
|
||||
|
||||
def format(self):
|
||||
return "D %s" % (format_path(self.path),)
|
||||
|
||||
|
||||
class FileCopyCommand(FileCommand):
|
||||
|
||||
def __init__(self, src_path, dest_path):
|
||||
FileCommand.__init__(self, 'filecopy')
|
||||
self.src_path = check_path(src_path)
|
||||
self.dest_path = check_path(dest_path)
|
||||
|
||||
def __str__(self):
|
||||
return "%s -> %s" % (self.src_path, self.dest_path)
|
||||
|
||||
__repr__ = _detailrepr
|
||||
|
||||
def format(self):
|
||||
return "C %s %s" % (
|
||||
format_path(self.src_path, quote_spaces=True),
|
||||
format_path(self.dest_path))
|
||||
|
||||
|
||||
class FileRenameCommand(FileCommand):
|
||||
|
||||
def __init__(self, old_path, new_path):
|
||||
FileCommand.__init__(self, 'filerename')
|
||||
self.old_path = check_path(old_path)
|
||||
self.new_path = check_path(new_path)
|
||||
|
||||
def __str__(self):
|
||||
return "%s -> %s" % (self.old_path, self.new_path)
|
||||
|
||||
__repr__ = _detailrepr
|
||||
|
||||
def format(self):
|
||||
return "R %s %s" % (
|
||||
format_path(self.old_path, quote_spaces=True),
|
||||
format_path(self.new_path))
|
||||
|
||||
|
||||
class FileDeleteAllCommand(FileCommand):
|
||||
|
||||
def __init__(self):
|
||||
FileCommand.__init__(self, 'filedeleteall')
|
||||
|
||||
def format(self):
|
||||
return "deleteall"
|
||||
|
||||
|
||||
def check_path(path):
|
||||
"""Check that a path is legal.
|
||||
|
||||
:return: the path if all is OK
|
||||
:raise ValueError: if the path is illegal
|
||||
"""
|
||||
if path is None or path == '':
|
||||
raise ValueError("illegal path '%s'" % path)
|
||||
return path
|
||||
|
||||
|
||||
def format_path(p, quote_spaces=False):
|
||||
"""Format a path in utf8, quoting it if necessary."""
|
||||
if '\n' in p:
|
||||
import re
|
||||
p = re.sub('\n', '\\n', p)
|
||||
quote = True
|
||||
else:
|
||||
quote = p[0] == '"' or (quote_spaces and ' ' in p)
|
||||
if quote:
|
||||
extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or ''
|
||||
p = '"%s"%s' % (p, extra)
|
||||
return p.encode('utf8')
|
||||
|
||||
|
||||
def format_who_when(fields):
|
||||
"""Format a tuple of name,email,secs-since-epoch,utc-offset-secs as a string."""
|
||||
offset = fields[3]
|
||||
if offset < 0:
|
||||
offset_sign = '-'
|
||||
offset = abs(offset)
|
||||
else:
|
||||
offset_sign = '+'
|
||||
offset_hours = offset / 3600
|
||||
offset_minutes = offset / 60 - offset_hours * 60
|
||||
offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes)
|
||||
name = fields[0]
|
||||
if name == '':
|
||||
sep = ''
|
||||
else:
|
||||
sep = ' '
|
||||
if isinstance(name, unicode):
|
||||
name = name.encode('utf8')
|
||||
email = fields[1]
|
||||
if isinstance(email, unicode):
|
||||
email = email.encode('utf8')
|
||||
result = "%s%s<%s> %d %s" % (name, sep, email, fields[2], offset_str)
|
||||
return result
|
||||
|
||||
|
||||
def format_property(name, value):
|
||||
"""Format the name and value (both unicode) of a property as a string."""
|
||||
utf8_name = name.encode('utf8')
|
||||
if value is not None:
|
||||
utf8_value = value.encode('utf8')
|
||||
result = "property %s %d %s" % (utf8_name, len(utf8_value), utf8_value)
|
||||
else:
|
||||
result = "property %s" % (utf8_name,)
|
||||
return result
|
||||
79
git_remote_helpers/fastimport/dates.py
Normal file
79
git_remote_helpers/fastimport/dates.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# Copyright (C) 2008 Canonical Ltd
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
"""Date parsing routines.
|
||||
|
||||
Each routine returns timestamp,timezone where
|
||||
|
||||
* timestamp is seconds since epoch
|
||||
* timezone is the offset from UTC in seconds.
|
||||
"""
|
||||
|
||||
|
||||
import time
|
||||
|
||||
from git_remote_helpers.fastimport import errors
|
||||
|
||||
|
||||
def parse_raw(s, lineno=0):
|
||||
"""Parse a date from a raw string.
|
||||
|
||||
The format must be exactly "seconds-since-epoch offset-utc".
|
||||
See the spec for details.
|
||||
"""
|
||||
timestamp_str, timezone_str = s.split(' ', 1)
|
||||
timestamp = float(timestamp_str)
|
||||
timezone = _parse_tz(timezone_str, lineno)
|
||||
return timestamp, timezone
|
||||
|
||||
|
||||
def _parse_tz(tz, lineno):
|
||||
"""Parse a timezone specification in the [+|-]HHMM format.
|
||||
|
||||
:return: the timezone offset in seconds.
|
||||
"""
|
||||
# from git_repository.py in bzr-git
|
||||
if len(tz) != 5:
|
||||
raise errors.InvalidTimezone(lineno, tz)
|
||||
sign = {'+': +1, '-': -1}[tz[0]]
|
||||
hours = int(tz[1:3])
|
||||
minutes = int(tz[3:])
|
||||
return sign * 60 * (60 * hours + minutes)
|
||||
|
||||
|
||||
def parse_rfc2822(s, lineno=0):
|
||||
"""Parse a date from a rfc2822 string.
|
||||
|
||||
See the spec for details.
|
||||
"""
|
||||
raise NotImplementedError(parse_rfc2822)
|
||||
|
||||
|
||||
def parse_now(s, lineno=0):
|
||||
"""Parse a date from a string.
|
||||
|
||||
The format must be exactly "now".
|
||||
See the spec for details.
|
||||
"""
|
||||
return time.time(), 0
|
||||
|
||||
|
||||
# Lookup tabel of date parsing routines
|
||||
DATE_PARSERS_BY_NAME = {
|
||||
'raw': parse_raw,
|
||||
'rfc2822': parse_rfc2822,
|
||||
'now': parse_now,
|
||||
}
|
||||
182
git_remote_helpers/fastimport/errors.py
Normal file
182
git_remote_helpers/fastimport/errors.py
Normal file
@@ -0,0 +1,182 @@
|
||||
# Copyright (C) 2008 Canonical Ltd
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
"""Exception classes for fastimport"""
|
||||
|
||||
|
||||
class FastImportError(StandardError):
|
||||
"""The base exception class for all import processing exceptions."""
|
||||
|
||||
_fmt = "Unknown Import Error"
|
||||
|
||||
def __str__(self):
|
||||
return self._fmt % self.__dict__
|
||||
|
||||
class ParsingError(FastImportError):
|
||||
"""The base exception class for all import processing exceptions."""
|
||||
|
||||
_fmt = "Unknown Import Parsing Error"
|
||||
|
||||
def __init__(self, filename, lineno):
|
||||
FastImportError.__init__(self)
|
||||
self.filename = filename
|
||||
self.lineno = lineno
|
||||
|
||||
def __str__(self):
|
||||
result = []
|
||||
if self.filename:
|
||||
result.append(self.filename)
|
||||
result.append(", ")
|
||||
result.append("line ")
|
||||
result.append(str(self.lineno))
|
||||
result.append(": ")
|
||||
result.append(FastImportError.__str__(self))
|
||||
return "".join(result)
|
||||
|
||||
|
||||
class MissingBytes(ParsingError):
|
||||
"""Raised when EOF encountered while expecting to find more bytes."""
|
||||
|
||||
_fmt = ("Unexpected EOF - expected %(expected)d bytes,"
|
||||
" found %(found)d")
|
||||
|
||||
def __init__(self, filename, lineno, expected, found):
|
||||
ParsingError.__init__(self, filename, lineno)
|
||||
self.expected = expected
|
||||
self.found = found
|
||||
|
||||
|
||||
class MissingTerminator(ParsingError):
|
||||
"""Raised when EOF encountered while expecting to find a terminator."""
|
||||
|
||||
_fmt = "Unexpected EOF - expected '%(terminator)s' terminator"
|
||||
|
||||
def __init__(self, filename, lineno, terminator):
|
||||
ParsingError.__init__(self, filename, lineno)
|
||||
self.terminator = terminator
|
||||
|
||||
|
||||
class InvalidCommand(ParsingError):
|
||||
"""Raised when an unknown command found."""
|
||||
|
||||
_fmt = ("Invalid command '%(cmd)s'")
|
||||
|
||||
def __init__(self, filename, lineno, cmd):
|
||||
ParsingError.__init__(self, filename, lineno)
|
||||
self.cmd = cmd
|
||||
|
||||
|
||||
class MissingSection(ParsingError):
|
||||
"""Raised when a section is required in a command but not present."""
|
||||
|
||||
_fmt = ("Command %(cmd)s is missing section %(section)s")
|
||||
|
||||
def __init__(self, filename, lineno, cmd, section):
|
||||
ParsingError.__init__(self, filename, lineno)
|
||||
self.cmd = cmd
|
||||
self.section = section
|
||||
|
||||
|
||||
class BadFormat(ParsingError):
|
||||
"""Raised when a section is formatted incorrectly."""
|
||||
|
||||
_fmt = ("Bad format for section %(section)s in "
|
||||
"command %(cmd)s: found '%(text)s'")
|
||||
|
||||
def __init__(self, filename, lineno, cmd, section, text):
|
||||
ParsingError.__init__(self, filename, lineno)
|
||||
self.cmd = cmd
|
||||
self.section = section
|
||||
self.text = text
|
||||
|
||||
|
||||
class InvalidTimezone(ParsingError):
|
||||
"""Raised when converting a string timezone to a seconds offset."""
|
||||
|
||||
_fmt = "Timezone %(timezone)r could not be converted.%(reason)s"
|
||||
|
||||
def __init__(self, filename, lineno, timezone, reason=None):
|
||||
ParsingError.__init__(self, filename, lineno)
|
||||
self.timezone = timezone
|
||||
if reason:
|
||||
self.reason = ' ' + reason
|
||||
else:
|
||||
self.reason = ''
|
||||
|
||||
|
||||
class UnknownDateFormat(FastImportError):
|
||||
"""Raised when an unknown date format is given."""
|
||||
|
||||
_fmt = ("Unknown date format '%(format)s'")
|
||||
|
||||
def __init__(self, format):
|
||||
FastImportError.__init__(self)
|
||||
self.format = format
|
||||
|
||||
|
||||
class MissingHandler(FastImportError):
|
||||
"""Raised when a processor can't handle a command."""
|
||||
|
||||
_fmt = ("Missing handler for command %(cmd)s")
|
||||
|
||||
def __init__(self, cmd):
|
||||
FastImportError.__init__(self)
|
||||
self.cmd = cmd
|
||||
|
||||
|
||||
class UnknownParameter(FastImportError):
|
||||
"""Raised when an unknown parameter is passed to a processor."""
|
||||
|
||||
_fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s")
|
||||
|
||||
def __init__(self, param, knowns):
|
||||
FastImportError.__init__(self)
|
||||
self.param = param
|
||||
self.knowns = knowns
|
||||
|
||||
|
||||
class BadRepositorySize(FastImportError):
|
||||
"""Raised when the repository has an incorrect number of revisions."""
|
||||
|
||||
_fmt = ("Bad repository size - %(found)d revisions found, "
|
||||
"%(expected)d expected")
|
||||
|
||||
def __init__(self, expected, found):
|
||||
FastImportError.__init__(self)
|
||||
self.expected = expected
|
||||
self.found = found
|
||||
|
||||
|
||||
class BadRestart(FastImportError):
|
||||
"""Raised when the import stream and id-map do not match up."""
|
||||
|
||||
_fmt = ("Bad restart - attempted to skip commit %(commit_id)s "
|
||||
"but matching revision-id is unknown")
|
||||
|
||||
def __init__(self, commit_id):
|
||||
FastImportError.__init__(self)
|
||||
self.commit_id = commit_id
|
||||
|
||||
|
||||
class UnknownFeature(FastImportError):
|
||||
"""Raised when an unknown feature is given in the input stream."""
|
||||
|
||||
_fmt = ("Unknown feature '%(feature)s' - try a later importer or "
|
||||
"an earlier data format")
|
||||
|
||||
def __init__(self, feature):
|
||||
FastImportError.__init__(self)
|
||||
self.feature = feature
|
||||
47
git_remote_helpers/fastimport/head_tracker.py
Normal file
47
git_remote_helpers/fastimport/head_tracker.py
Normal file
@@ -0,0 +1,47 @@
|
||||
|
||||
|
||||
class HeadTracker(object):
|
||||
"""
|
||||
Keep track of the heads in a fastimport stream.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.last_ref = None
|
||||
|
||||
# map git ref name (e.g. "refs/heads/master") to id of last
|
||||
# commit with that ref
|
||||
self.last_ids = {}
|
||||
|
||||
# the set of heads seen so far in the stream, as a mapping
|
||||
# from commit id of the head to set of ref names
|
||||
self.heads = {}
|
||||
|
||||
def track_heads(self, cmd):
|
||||
"""Track the repository heads given a CommitCommand.
|
||||
|
||||
:param cmd: the CommitCommand
|
||||
:return: the list of parents in terms of commit-ids
|
||||
"""
|
||||
# Get the true set of parents
|
||||
if cmd.from_ is not None:
|
||||
parents = [cmd.from_]
|
||||
else:
|
||||
last_id = self.last_ids.get(cmd.ref)
|
||||
if last_id is not None:
|
||||
parents = [last_id]
|
||||
else:
|
||||
parents = []
|
||||
parents.extend(cmd.merges)
|
||||
|
||||
# Track the heads
|
||||
self.track_heads_for_ref(cmd.ref, cmd.id, parents)
|
||||
return parents
|
||||
|
||||
def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
|
||||
if parents is not None:
|
||||
for parent in parents:
|
||||
if parent in self.heads:
|
||||
del self.heads[parent]
|
||||
self.heads.setdefault(cmd_id, set()).add(cmd_ref)
|
||||
self.last_ids[cmd_ref] = cmd_id
|
||||
self.last_ref = cmd_ref
|
||||
|
||||
88
git_remote_helpers/fastimport/helpers.py
Normal file
88
git_remote_helpers/fastimport/helpers.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# Copyright (C) 2008 Canonical Ltd
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
"""Miscellaneous useful stuff."""
|
||||
|
||||
import os
|
||||
|
||||
def single_plural(n, single, plural):
|
||||
"""Return a single or plural form of a noun based on number."""
|
||||
if n == 1:
|
||||
return single
|
||||
else:
|
||||
return plural
|
||||
|
||||
|
||||
def invert_dict(d):
|
||||
"""Invert a dictionary with keys matching each value turned into a list."""
|
||||
# Based on recipe from ASPN
|
||||
result = {}
|
||||
for k, v in d.iteritems():
|
||||
keys = result.setdefault(v, [])
|
||||
keys.append(k)
|
||||
return result
|
||||
|
||||
|
||||
def invert_dictset(d):
|
||||
"""Invert a dictionary with keys matching a set of values, turned into lists."""
|
||||
# Based on recipe from ASPN
|
||||
result = {}
|
||||
for k, c in d.iteritems():
|
||||
for v in c:
|
||||
keys = result.setdefault(v, [])
|
||||
keys.append(k)
|
||||
return result
|
||||
|
||||
|
||||
def _common_path_and_rest(l1, l2, common=[]):
|
||||
# From http://code.activestate.com/recipes/208993/
|
||||
if len(l1) < 1: return (common, l1, l2)
|
||||
if len(l2) < 1: return (common, l1, l2)
|
||||
if l1[0] != l2[0]: return (common, l1, l2)
|
||||
return _common_path_and_rest(l1[1:], l2[1:], common+[l1[0]])
|
||||
|
||||
|
||||
def common_path(path1, path2):
|
||||
"""Find the common bit of 2 paths."""
|
||||
return ''.join(_common_path_and_rest(path1, path2)[0])
|
||||
|
||||
|
||||
def common_directory(paths):
|
||||
"""Find the deepest common directory of a list of paths.
|
||||
|
||||
:return: if no paths are provided, None is returned;
|
||||
if there is no common directory, '' is returned;
|
||||
otherwise the common directory with a trailing / is returned.
|
||||
"""
|
||||
def get_dir_with_slash(path):
|
||||
if path == '' or path.endswith('/'):
|
||||
return path
|
||||
else:
|
||||
dirname, basename = os.path.split(path)
|
||||
if dirname == '':
|
||||
return dirname
|
||||
else:
|
||||
return dirname + '/'
|
||||
|
||||
if not paths:
|
||||
return None
|
||||
elif len(paths) == 1:
|
||||
return get_dir_with_slash(paths[0])
|
||||
else:
|
||||
common = common_path(paths[0], paths[1])
|
||||
for path in paths[2:]:
|
||||
common = common_path(common, path)
|
||||
return get_dir_with_slash(common)
|
||||
65
git_remote_helpers/fastimport/idmapfile.py
Normal file
65
git_remote_helpers/fastimport/idmapfile.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# Copyright (C) 2008 Canonical Ltd
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
"""Routines for saving and loading the id-map file."""
|
||||
|
||||
import os
|
||||
|
||||
|
||||
def save_id_map(filename, revision_ids):
|
||||
"""Save the mapping of commit ids to revision ids to a file.
|
||||
|
||||
Throws the usual exceptions if the file cannot be opened,
|
||||
written to or closed.
|
||||
|
||||
:param filename: name of the file to save the data to
|
||||
:param revision_ids: a dictionary of commit ids to revision ids.
|
||||
"""
|
||||
f = open(filename, 'wb')
|
||||
try:
|
||||
for commit_id, rev_id in revision_ids.iteritems():
|
||||
f.write("%s %s\n" % (commit_id, rev_id))
|
||||
f.flush()
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
|
||||
def load_id_map(filename):
|
||||
"""Load the mapping of commit ids to revision ids from a file.
|
||||
|
||||
If the file does not exist, an empty result is returned.
|
||||
If the file does exists but cannot be opened, read or closed,
|
||||
the normal exceptions are thrown.
|
||||
|
||||
NOTE: It is assumed that commit-ids do not have embedded spaces.
|
||||
|
||||
:param filename: name of the file to save the data to
|
||||
:result: map, count where:
|
||||
map = a dictionary of commit ids to revision ids;
|
||||
count = the number of keys in map
|
||||
"""
|
||||
result = {}
|
||||
count = 0
|
||||
if os.path.exists(filename):
|
||||
f = open(filename)
|
||||
try:
|
||||
for line in f:
|
||||
parts = line[:-1].split(' ', 1)
|
||||
result[parts[0]] = parts[1]
|
||||
count += 1
|
||||
finally:
|
||||
f.close()
|
||||
return result, count
|
||||
621
git_remote_helpers/fastimport/parser.py
Normal file
621
git_remote_helpers/fastimport/parser.py
Normal file
@@ -0,0 +1,621 @@
|
||||
# Copyright (C) 2008 Canonical Ltd
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
import warnings
|
||||
|
||||
"""Parser of import data into command objects.
|
||||
|
||||
In order to reuse existing front-ends, the stream format is a subset of
|
||||
the one used by git-fast-import (as of the 1.5.4 release of git at least).
|
||||
The grammar is:
|
||||
|
||||
stream ::= cmd*;
|
||||
|
||||
cmd ::= new_blob
|
||||
| new_commit
|
||||
| new_tag
|
||||
| reset_branch
|
||||
| checkpoint
|
||||
| progress
|
||||
;
|
||||
|
||||
new_blob ::= 'blob' lf
|
||||
mark?
|
||||
file_content;
|
||||
file_content ::= data;
|
||||
|
||||
new_commit ::= 'commit' sp ref_str lf
|
||||
mark?
|
||||
('author' sp name '<' email '>' when lf)?
|
||||
'committer' sp name '<' email '>' when lf
|
||||
commit_msg
|
||||
('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
|
||||
('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
|
||||
file_change*
|
||||
lf?;
|
||||
commit_msg ::= data;
|
||||
|
||||
file_change ::= file_clr
|
||||
| file_del
|
||||
| file_rnm
|
||||
| file_cpy
|
||||
| file_obm
|
||||
| file_inm;
|
||||
file_clr ::= 'deleteall' lf;
|
||||
file_del ::= 'D' sp path_str lf;
|
||||
file_rnm ::= 'R' sp path_str sp path_str lf;
|
||||
file_cpy ::= 'C' sp path_str sp path_str lf;
|
||||
file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
|
||||
file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
|
||||
data;
|
||||
|
||||
new_tag ::= 'tag' sp tag_str lf
|
||||
'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
|
||||
'tagger' sp name '<' email '>' when lf
|
||||
tag_msg;
|
||||
tag_msg ::= data;
|
||||
|
||||
reset_branch ::= 'reset' sp ref_str lf
|
||||
('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
|
||||
lf?;
|
||||
|
||||
checkpoint ::= 'checkpoint' lf
|
||||
lf?;
|
||||
|
||||
progress ::= 'progress' sp not_lf* lf
|
||||
lf?;
|
||||
|
||||
# note: the first idnum in a stream should be 1 and subsequent
|
||||
# idnums should not have gaps between values as this will cause
|
||||
# the stream parser to reserve space for the gapped values. An
|
||||
# idnum can be updated in the future to a new object by issuing
|
||||
# a new mark directive with the old idnum.
|
||||
#
|
||||
mark ::= 'mark' sp idnum lf;
|
||||
data ::= (delimited_data | exact_data)
|
||||
lf?;
|
||||
|
||||
# note: delim may be any string but must not contain lf.
|
||||
# data_line may contain any data but must not be exactly
|
||||
# delim. The lf after the final data_line is included in
|
||||
# the data.
|
||||
delimited_data ::= 'data' sp '<<' delim lf
|
||||
(data_line lf)*
|
||||
delim lf;
|
||||
|
||||
# note: declen indicates the length of binary_data in bytes.
|
||||
# declen does not include the lf preceeding the binary data.
|
||||
#
|
||||
exact_data ::= 'data' sp declen lf
|
||||
binary_data;
|
||||
|
||||
# note: quoted strings are C-style quoting supporting \c for
|
||||
# common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
|
||||
# is the signed byte value in octal. Note that the only
|
||||
# characters which must actually be escaped to protect the
|
||||
# stream formatting is: \, \" and LF. Otherwise these values
|
||||
# are UTF8.
|
||||
#
|
||||
ref_str ::= ref;
|
||||
sha1exp_str ::= sha1exp;
|
||||
tag_str ::= tag;
|
||||
path_str ::= path | '"' quoted(path) '"' ;
|
||||
mode ::= '100644' | '644'
|
||||
| '100755' | '755'
|
||||
| '120000'
|
||||
;
|
||||
|
||||
declen ::= # unsigned 32 bit value, ascii base10 notation;
|
||||
bigint ::= # unsigned integer value, ascii base10 notation;
|
||||
binary_data ::= # file content, not interpreted;
|
||||
|
||||
when ::= raw_when | rfc2822_when;
|
||||
raw_when ::= ts sp tz;
|
||||
rfc2822_when ::= # Valid RFC 2822 date and time;
|
||||
|
||||
sp ::= # ASCII space character;
|
||||
lf ::= # ASCII newline (LF) character;
|
||||
|
||||
# note: a colon (':') must precede the numerical value assigned to
|
||||
# an idnum. This is to distinguish it from a ref or tag name as
|
||||
# GIT does not permit ':' in ref or tag strings.
|
||||
#
|
||||
idnum ::= ':' bigint;
|
||||
path ::= # GIT style file path, e.g. \"a/b/c\";
|
||||
ref ::= # GIT ref name, e.g. \"refs/heads/MOZ_GECKO_EXPERIMENT\";
|
||||
tag ::= # GIT tag name, e.g. \"FIREFOX_1_5\";
|
||||
sha1exp ::= # Any valid GIT SHA1 expression;
|
||||
hexsha1 ::= # SHA1 in hexadecimal format;
|
||||
|
||||
# note: name and email are UTF8 strings, however name must not
|
||||
# contain '<' or lf and email must not contain any of the
|
||||
# following: '<', '>', lf.
|
||||
#
|
||||
name ::= # valid GIT author/committer name;
|
||||
email ::= # valid GIT author/committer email;
|
||||
ts ::= # time since the epoch in seconds, ascii base10 notation;
|
||||
tz ::= # GIT style timezone;
|
||||
|
||||
# note: comments may appear anywhere in the input, except
|
||||
# within a data command. Any form of the data command
|
||||
# always escapes the related input from comment processing.
|
||||
#
|
||||
# In case it is not clear, the '#' that starts the comment
|
||||
# must be the first character on that the line (an lf have
|
||||
# preceeded it).
|
||||
#
|
||||
comment ::= '#' not_lf* lf;
|
||||
not_lf ::= # Any byte that is not ASCII newline (LF);
|
||||
"""
|
||||
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
from git_remote_helpers.fastimport import (
|
||||
commands,
|
||||
dates,
|
||||
errors
|
||||
)
|
||||
|
||||
|
||||
## Stream parsing ##
|
||||
|
||||
class LineBasedParser(object):
|
||||
|
||||
def __init__(self, input, filename=None):
|
||||
"""A Parser that keeps track of line numbers.
|
||||
|
||||
:param input: the file-like object to read from
|
||||
"""
|
||||
self.input = input
|
||||
if filename is None:
|
||||
try:
|
||||
self.filename = input.name
|
||||
except AttributeError:
|
||||
self.filename = "(unknown)"
|
||||
else:
|
||||
self.filename = filename
|
||||
self.lineno = 0
|
||||
# Lines pushed back onto the input stream
|
||||
self._buffer = []
|
||||
|
||||
def abort(self, exception, *args):
|
||||
"""Raise an exception providing line number information."""
|
||||
raise exception(self.filename, self.lineno, *args)
|
||||
|
||||
def readline(self):
|
||||
"""Get the next line including the newline or '' on EOF."""
|
||||
self.lineno += 1
|
||||
if self._buffer:
|
||||
return self._buffer.pop()
|
||||
else:
|
||||
return self.input.readline()
|
||||
|
||||
def next_line(self):
|
||||
"""Get the next line without the newline or None on EOF."""
|
||||
line = self.readline()
|
||||
if line:
|
||||
return line[:-1]
|
||||
else:
|
||||
return None
|
||||
|
||||
def push_line(self, line):
|
||||
"""Push line back onto the line buffer.
|
||||
|
||||
:param line: the line with no trailing newline
|
||||
"""
|
||||
self.lineno -= 1
|
||||
self._buffer.append(line + "\n")
|
||||
|
||||
def read_bytes(self, count):
|
||||
"""Read a given number of bytes from the input stream.
|
||||
|
||||
Throws MissingBytes if the bytes are not found.
|
||||
|
||||
Note: This method does not read from the line buffer.
|
||||
|
||||
:return: a string
|
||||
"""
|
||||
result = self.input.read(count)
|
||||
found = len(result)
|
||||
self.lineno += result.count("\n")
|
||||
if found != count:
|
||||
self.abort(errors.MissingBytes, count, found)
|
||||
return result
|
||||
|
||||
def read_until(self, terminator):
|
||||
"""Read the input stream until the terminator is found.
|
||||
|
||||
Throws MissingTerminator if the terminator is not found.
|
||||
|
||||
Note: This method does not read from the line buffer.
|
||||
|
||||
:return: the bytes read up to but excluding the terminator.
|
||||
"""
|
||||
|
||||
lines = []
|
||||
term = terminator + '\n'
|
||||
while True:
|
||||
line = self.input.readline()
|
||||
if line == term:
|
||||
break
|
||||
else:
|
||||
lines.append(line)
|
||||
return ''.join(lines)
|
||||
|
||||
|
||||
# Regular expression used for parsing. (Note: The spec states that the name
|
||||
# part should be non-empty but git-fast-export doesn't always do that so
|
||||
# the first bit is \w*, not \w+.) Also git-fast-import code says the
|
||||
# space before the email is optional.
|
||||
_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)')
|
||||
_WHO_RE = re.compile(r'([^<]*)<(.*)>')
|
||||
|
||||
|
||||
class ImportParser(LineBasedParser):
|
||||
|
||||
def __init__(self, input, filename=None):
|
||||
"""A Parser of import commands.
|
||||
|
||||
:param input: the file-like object to read from
|
||||
:param verbose: display extra information of not
|
||||
"""
|
||||
LineBasedParser.__init__(self, input, filename)
|
||||
|
||||
# We auto-detect the date format when a date is first encountered
|
||||
self.date_parser = None
|
||||
|
||||
def warning(self, msg):
|
||||
sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg))
|
||||
|
||||
def parse(self):
|
||||
"""Parse the input stream, yielding a sequence of ImportCommand
|
||||
objects. Iteration terminates on EOF. Raises InvalidCommand on
|
||||
parse error."""
|
||||
while True:
|
||||
line = self.next_line()
|
||||
if line is None:
|
||||
break
|
||||
elif len(line) == 0 or line.startswith('#'):
|
||||
continue
|
||||
# Search for commands in order of likelihood
|
||||
elif line.startswith('commit '):
|
||||
yield self._parse_commit(line[len('commit '):])
|
||||
elif line.startswith('blob'):
|
||||
yield self._parse_blob()
|
||||
elif line.startswith('done'):
|
||||
break
|
||||
elif line.startswith('progress '):
|
||||
yield commands.ProgressCommand(line[len('progress '):])
|
||||
elif line.startswith('reset '):
|
||||
yield self._parse_reset(line[len('reset '):])
|
||||
elif line.startswith('tag '):
|
||||
yield self._parse_tag(line[len('tag '):])
|
||||
elif line.startswith('checkpoint'):
|
||||
yield commands.CheckpointCommand()
|
||||
elif line.startswith('feature'):
|
||||
yield self._parse_feature(line[len('feature '):])
|
||||
else:
|
||||
self.abort(errors.InvalidCommand, line)
|
||||
|
||||
def iter_commands(self):
|
||||
warnings.warn("iter_commands() deprecated: use parse()",
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return self.parse()
|
||||
|
||||
def iter_file_commands(self):
|
||||
"""Iterator returning FileCommand objects.
|
||||
|
||||
If an invalid file command is found, the line is silently
|
||||
pushed back and iteration ends.
|
||||
"""
|
||||
while True:
|
||||
line = self.next_line()
|
||||
if line is None:
|
||||
break
|
||||
elif len(line) == 0 or line.startswith('#'):
|
||||
continue
|
||||
# Search for file commands in order of likelihood
|
||||
elif line.startswith('M '):
|
||||
yield self._parse_file_modify(line[2:])
|
||||
elif line.startswith('D '):
|
||||
path = self._path(line[2:])
|
||||
yield commands.FileDeleteCommand(path)
|
||||
elif line.startswith('R '):
|
||||
old, new = self._path_pair(line[2:])
|
||||
yield commands.FileRenameCommand(old, new)
|
||||
elif line.startswith('C '):
|
||||
src, dest = self._path_pair(line[2:])
|
||||
yield commands.FileCopyCommand(src, dest)
|
||||
elif line.startswith('deleteall'):
|
||||
yield commands.FileDeleteAllCommand()
|
||||
else:
|
||||
self.push_line(line)
|
||||
break
|
||||
|
||||
def _parse_blob(self):
|
||||
"""Parse a blob command."""
|
||||
location = (self.filename, self.lineno)
|
||||
mark = self._get_mark_if_any()
|
||||
data = self._get_data('blob')
|
||||
return commands.BlobCommand(mark, data, location)
|
||||
|
||||
def _parse_commit(self, ref):
|
||||
"""Parse a commit command."""
|
||||
location = (self.filename, self.lineno)
|
||||
mark = self._get_mark_if_any()
|
||||
author = self._get_user_info('commit', 'author', False)
|
||||
more_authors = []
|
||||
while True:
|
||||
another_author = self._get_user_info('commit', 'author', False)
|
||||
if another_author is not None:
|
||||
more_authors.append(another_author)
|
||||
else:
|
||||
break
|
||||
committer = self._get_user_info('commit', 'committer')
|
||||
message = self._get_data('commit', 'message')
|
||||
try:
|
||||
message = message.decode('utf_8')
|
||||
except UnicodeDecodeError:
|
||||
self.warning(
|
||||
"commit message not in utf8 - replacing unknown characters")
|
||||
message = message.decode('utf_8', 'replace')
|
||||
from_ = self._get_from()
|
||||
merges = []
|
||||
while True:
|
||||
merge = self._get_merge()
|
||||
if merge is not None:
|
||||
# while the spec suggests it's illegal, git-fast-export
|
||||
# outputs multiple merges on the one line, e.g.
|
||||
# merge :x :y :z
|
||||
these_merges = merge.split(" ")
|
||||
merges.extend(these_merges)
|
||||
else:
|
||||
break
|
||||
properties = {}
|
||||
while True:
|
||||
name_value = self._get_property()
|
||||
if name_value is not None:
|
||||
name, value = name_value
|
||||
properties[name] = value
|
||||
else:
|
||||
break
|
||||
file_cmds = list(self.iter_file_commands())
|
||||
return commands.CommitCommand(ref, mark, author, committer, message,
|
||||
from_, merges, file_cmds, location,
|
||||
more_authors=more_authors, properties=properties)
|
||||
|
||||
def _parse_feature(self, info):
|
||||
"""Parse a feature command."""
|
||||
parts = info.split("=", 1)
|
||||
name = parts[0]
|
||||
if len(parts) > 1:
|
||||
value = self._path(parts[1])
|
||||
else:
|
||||
value = None
|
||||
location = (self.filename, self.lineno)
|
||||
return commands.FeatureCommand(name, value, location=location)
|
||||
|
||||
|
||||
def _parse_file_modify(self, info):
|
||||
"""Parse a filemodify command within a commit.
|
||||
|
||||
:param info: a string in the format "mode dataref path"
|
||||
(where dataref might be the hard-coded literal 'inline').
|
||||
"""
|
||||
params = info.split(' ', 2)
|
||||
path = self._path(params[2])
|
||||
mode = params[0]
|
||||
if params[1] == 'inline':
|
||||
dataref = None
|
||||
data = self._get_data('filemodify')
|
||||
else:
|
||||
dataref = params[1]
|
||||
data = None
|
||||
return commands.FileModifyCommand(path, mode, dataref, data)
|
||||
|
||||
def _parse_reset(self, ref):
|
||||
"""Parse a reset command."""
|
||||
from_ = self._get_from()
|
||||
return commands.ResetCommand(ref, from_)
|
||||
|
||||
def _parse_tag(self, name):
|
||||
"""Parse a tag command."""
|
||||
from_ = self._get_from('tag')
|
||||
tagger = self._get_user_info('tag', 'tagger', accept_just_who=True)
|
||||
message = self._get_data('tag', 'message').decode('utf_8')
|
||||
return commands.TagCommand(name, from_, tagger, message)
|
||||
|
||||
def _get_mark_if_any(self):
|
||||
"""Parse a mark section."""
|
||||
line = self.next_line()
|
||||
if line.startswith('mark :'):
|
||||
return line[len('mark :'):]
|
||||
else:
|
||||
self.push_line(line)
|
||||
return None
|
||||
|
||||
def _get_from(self, required_for=None):
|
||||
"""Parse a from section."""
|
||||
line = self.next_line()
|
||||
if line is None:
|
||||
return None
|
||||
elif line.startswith('from '):
|
||||
return line[len('from '):]
|
||||
elif required_for:
|
||||
self.abort(errors.MissingSection, required_for, 'from')
|
||||
else:
|
||||
self.push_line(line)
|
||||
return None
|
||||
|
||||
def _get_merge(self):
|
||||
"""Parse a merge section."""
|
||||
line = self.next_line()
|
||||
if line is None:
|
||||
return None
|
||||
elif line.startswith('merge '):
|
||||
return line[len('merge '):]
|
||||
else:
|
||||
self.push_line(line)
|
||||
return None
|
||||
|
||||
def _get_property(self):
|
||||
"""Parse a property section."""
|
||||
line = self.next_line()
|
||||
if line is None:
|
||||
return None
|
||||
elif line.startswith('property '):
|
||||
return self._name_value(line[len('property '):])
|
||||
else:
|
||||
self.push_line(line)
|
||||
return None
|
||||
|
||||
def _get_user_info(self, cmd, section, required=True,
|
||||
accept_just_who=False):
|
||||
"""Parse a user section."""
|
||||
line = self.next_line()
|
||||
if line.startswith(section + ' '):
|
||||
return self._who_when(line[len(section + ' '):], cmd, section,
|
||||
accept_just_who=accept_just_who)
|
||||
elif required:
|
||||
self.abort(errors.MissingSection, cmd, section)
|
||||
else:
|
||||
self.push_line(line)
|
||||
return None
|
||||
|
||||
def _get_data(self, required_for, section='data'):
|
||||
"""Parse a data section."""
|
||||
line = self.next_line()
|
||||
if line.startswith('data '):
|
||||
rest = line[len('data '):]
|
||||
if rest.startswith('<<'):
|
||||
return self.read_until(rest[2:])
|
||||
else:
|
||||
size = int(rest)
|
||||
read_bytes = self.read_bytes(size)
|
||||
# optional LF after data.
|
||||
next = self.input.readline()
|
||||
self.lineno += 1
|
||||
if len(next) > 1 or next != "\n":
|
||||
self.push_line(next[:-1])
|
||||
return read_bytes
|
||||
else:
|
||||
self.abort(errors.MissingSection, required_for, section)
|
||||
|
||||
def _who_when(self, s, cmd, section, accept_just_who=False):
|
||||
"""Parse who and when information from a string.
|
||||
|
||||
:return: a tuple of (name,email,timestamp,timezone). name may be
|
||||
the empty string if only an email address was given.
|
||||
"""
|
||||
match = _WHO_AND_WHEN_RE.search(s)
|
||||
if match:
|
||||
datestr = match.group(3)
|
||||
if self.date_parser is None:
|
||||
# auto-detect the date format
|
||||
if len(datestr.split(' ')) == 2:
|
||||
format = 'raw'
|
||||
elif datestr == 'now':
|
||||
format = 'now'
|
||||
else:
|
||||
format = 'rfc2822'
|
||||
self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
|
||||
when = self.date_parser(datestr, self.lineno)
|
||||
else:
|
||||
match = _WHO_RE.search(s)
|
||||
if accept_just_who and match:
|
||||
# HACK around missing time
|
||||
# TODO: output a warning here
|
||||
when = dates.DATE_PARSERS_BY_NAME['now']('now')
|
||||
else:
|
||||
self.abort(errors.BadFormat, cmd, section, s)
|
||||
|
||||
# Do not attempt to decode name or email address; they are just
|
||||
# bytes. (Everything will work out better if they are in UTF-8,
|
||||
# but that's not guaranteed.)
|
||||
name = match.group(1).rstrip()
|
||||
email = match.group(2)
|
||||
return (name, email, when[0], when[1])
|
||||
|
||||
def _name_value(self, s):
|
||||
"""Parse a (name,value) tuple from 'name value-length value'."""
|
||||
parts = s.split(' ', 2)
|
||||
name = parts[0]
|
||||
if len(parts) == 1:
|
||||
value = None
|
||||
else:
|
||||
size = int(parts[1])
|
||||
value = parts[2]
|
||||
still_to_read = size - len(value)
|
||||
if still_to_read == 1:
|
||||
value += "\n"
|
||||
elif still_to_read > 0:
|
||||
read_bytes = self.read_bytes(still_to_read - 1)
|
||||
value += "\n" + read_bytes
|
||||
value = value.decode('utf8')
|
||||
return (name, value)
|
||||
|
||||
def _path(self, s):
|
||||
"""Parse a path."""
|
||||
if s.startswith('"'):
|
||||
if s[-1] != '"':
|
||||
self.abort(errors.BadFormat, '?', '?', s)
|
||||
else:
|
||||
return _unquote_c_string(s[1:-1])
|
||||
|
||||
# Do *not* decode the path to a Unicode string: filenames on
|
||||
# Unix are just bytes. Git and Mercurial, at least, inherit
|
||||
# this stance. git-fast-import(1) merely says "It is
|
||||
# recommended that <path> always be encoded using UTF-8.", which
|
||||
# is good advice ... but not something we can count on here.
|
||||
return s
|
||||
|
||||
def _path_pair(self, s):
|
||||
"""Parse two paths separated by a space."""
|
||||
# TODO: handle a space in the first path
|
||||
if s.startswith('"'):
|
||||
parts = s[1:].split('" ', 1)
|
||||
else:
|
||||
parts = s.split(' ', 1)
|
||||
if len(parts) != 2:
|
||||
self.abort(errors.BadFormat, '?', '?', s)
|
||||
elif parts[1].startswith('"') and parts[1].endswith('"'):
|
||||
parts[1] = parts[1][1:-1]
|
||||
elif parts[1].startswith('"') or parts[1].endswith('"'):
|
||||
self.abort(errors.BadFormat, '?', '?', s)
|
||||
return map(_unquote_c_string, parts)
|
||||
|
||||
def _mode(self, s):
|
||||
"""Parse a file mode into executable and symlink flags.
|
||||
|
||||
:return (is_executable, is_symlink)
|
||||
"""
|
||||
# Note: Output from git-fast-export slightly different to spec
|
||||
if s in ['644', '100644', '0100644']:
|
||||
return False, False
|
||||
elif s in ['755', '100755', '0100755']:
|
||||
return True, False
|
||||
elif s in ['120000', '0120000']:
|
||||
return False, True
|
||||
else:
|
||||
self.abort(errors.BadFormat, 'filemodify', 'mode', s)
|
||||
|
||||
|
||||
def _unquote_c_string(s):
|
||||
"""replace C-style escape sequences (\n, \", etc.) with real chars."""
|
||||
# HACK: Python strings are close enough
|
||||
return s.decode('string_escape', 'replace')
|
||||
222
git_remote_helpers/fastimport/processor.py
Normal file
222
git_remote_helpers/fastimport/processor.py
Normal file
@@ -0,0 +1,222 @@
|
||||
# Copyright (C) 2008 Canonical Ltd
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
"""Processor of import commands.
|
||||
|
||||
This module provides core processing functionality including an abstract class
|
||||
for basing real processors on. See the processors package for examples.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
|
||||
from git_remote_helpers.fastimport import errors
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ImportProcessor(object):
|
||||
"""Base class for import processors.
|
||||
|
||||
Subclasses should override the pre_*, post_* and *_handler
|
||||
methods as appropriate.
|
||||
"""
|
||||
|
||||
known_params = []
|
||||
|
||||
def __init__(self, params=None, verbose=False, outf=None):
|
||||
if outf is None:
|
||||
self.outf = sys.stdout
|
||||
else:
|
||||
self.outf = outf
|
||||
self.verbose = verbose
|
||||
if params is None:
|
||||
self.params = {}
|
||||
else:
|
||||
self.params = params
|
||||
self.validate_parameters()
|
||||
|
||||
# Handlers can set this to request exiting cleanly without
|
||||
# iterating through the remaining commands
|
||||
self.finished = False
|
||||
|
||||
def validate_parameters(self):
|
||||
"""Validate that the parameters are correctly specified."""
|
||||
for p in self.params:
|
||||
if p not in self.known_params:
|
||||
raise errors.UnknownParameter(p, self.known_params)
|
||||
|
||||
def process(self, commands):
|
||||
"""Process a stream of fast-import commands from a parser.
|
||||
|
||||
:param commands: a sequence of commands.ImportCommand objects
|
||||
"""
|
||||
self.pre_process()
|
||||
for cmd in commands:
|
||||
try:
|
||||
handler = self.__class__.__dict__[cmd.name + "_handler"]
|
||||
except KeyError:
|
||||
raise errors.MissingHandler(cmd.name)
|
||||
else:
|
||||
self.pre_handler(cmd)
|
||||
handler(self, cmd)
|
||||
self.post_handler(cmd)
|
||||
if self.finished:
|
||||
break
|
||||
self.post_process()
|
||||
|
||||
def pre_process(self):
|
||||
"""Hook for logic at start of processing.
|
||||
|
||||
Called just before process() starts iterating over its sequence
|
||||
of commands.
|
||||
"""
|
||||
pass
|
||||
|
||||
def post_process(self):
|
||||
"""Hook for logic at end of successful processing.
|
||||
|
||||
Called after process() finishes successfully iterating over its
|
||||
sequence of commands (i.e. not called if an exception is raised
|
||||
while processing commands).
|
||||
"""
|
||||
pass
|
||||
|
||||
def pre_handler(self, cmd):
|
||||
"""Hook for logic before each handler starts."""
|
||||
pass
|
||||
|
||||
def post_handler(self, cmd):
|
||||
"""Hook for logic after each handler finishes."""
|
||||
pass
|
||||
|
||||
def progress_handler(self, cmd):
|
||||
"""Process a ProgressCommand."""
|
||||
raise NotImplementedError(self.progress_handler)
|
||||
|
||||
def blob_handler(self, cmd):
|
||||
"""Process a BlobCommand."""
|
||||
raise NotImplementedError(self.blob_handler)
|
||||
|
||||
def checkpoint_handler(self, cmd):
|
||||
"""Process a CheckpointCommand."""
|
||||
raise NotImplementedError(self.checkpoint_handler)
|
||||
|
||||
def commit_handler(self, cmd):
|
||||
"""Process a CommitCommand."""
|
||||
raise NotImplementedError(self.commit_handler)
|
||||
|
||||
def reset_handler(self, cmd):
|
||||
"""Process a ResetCommand."""
|
||||
raise NotImplementedError(self.reset_handler)
|
||||
|
||||
def tag_handler(self, cmd):
|
||||
"""Process a TagCommand."""
|
||||
raise NotImplementedError(self.tag_handler)
|
||||
|
||||
def feature_handler(self, cmd):
|
||||
"""Process a FeatureCommand."""
|
||||
raise NotImplementedError(self.feature_handler)
|
||||
|
||||
|
||||
class CommitHandler(object):
|
||||
"""Base class for commit handling.
|
||||
|
||||
Subclasses should override the pre_*, post_* and *_handler
|
||||
methods as appropriate.
|
||||
"""
|
||||
|
||||
def __init__(self, command):
|
||||
self.command = command
|
||||
|
||||
def process(self):
|
||||
self.pre_process_files()
|
||||
for fc in self.command.file_cmds:
|
||||
try:
|
||||
handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
|
||||
except KeyError:
|
||||
raise errors.MissingHandler(fc.name)
|
||||
else:
|
||||
handler(self, fc)
|
||||
self.post_process_files()
|
||||
|
||||
def _log(self, level, msg, *args):
|
||||
log.log(level, msg + " (%s)", *(args + (self.command.id,)))
|
||||
|
||||
# Logging methods: unused in this library, but used by
|
||||
# bzr-fastimport. Could be useful for other subclasses.
|
||||
|
||||
def note(self, msg, *args):
|
||||
"""log.info() with context about the command"""
|
||||
self._log(logging.INFO, msg, *args)
|
||||
|
||||
def warning(self, msg, *args):
|
||||
"""log.warning() with context about the command"""
|
||||
self._log(logging.WARNING, msg, *args)
|
||||
|
||||
def debug(self, msg, *args):
|
||||
"""log.debug() with context about the command"""
|
||||
self._log(logging.DEBUG, msg, *args)
|
||||
|
||||
def pre_process_files(self):
|
||||
"""Prepare for committing."""
|
||||
pass
|
||||
|
||||
def post_process_files(self):
|
||||
"""Save the revision."""
|
||||
pass
|
||||
|
||||
def modify_handler(self, filecmd):
|
||||
"""Handle a filemodify command."""
|
||||
raise NotImplementedError(self.modify_handler)
|
||||
|
||||
def delete_handler(self, filecmd):
|
||||
"""Handle a filedelete command."""
|
||||
raise NotImplementedError(self.delete_handler)
|
||||
|
||||
def copy_handler(self, filecmd):
|
||||
"""Handle a filecopy command."""
|
||||
raise NotImplementedError(self.copy_handler)
|
||||
|
||||
def rename_handler(self, filecmd):
|
||||
"""Handle a filerename command."""
|
||||
raise NotImplementedError(self.rename_handler)
|
||||
|
||||
def deleteall_handler(self, filecmd):
|
||||
"""Handle a filedeleteall command."""
|
||||
raise NotImplementedError(self.deleteall_handler)
|
||||
|
||||
|
||||
def parseMany(filenames, parser_factory, processor):
|
||||
"""Parse multiple input files, sending the results all to
|
||||
'processor'. parser_factory must be a callable that takes one input
|
||||
file and returns an ImportParser instance, e.g. the ImportParser
|
||||
class object itself. Each file in 'filenames' is opened, parsed,
|
||||
and closed in turn. For filename \"-\", reads stdin.
|
||||
"""
|
||||
for filename in filenames:
|
||||
if filename == "-":
|
||||
infile = sys.stdin
|
||||
else:
|
||||
infile = open(filename, "rb")
|
||||
|
||||
try:
|
||||
parser = parser_factory(infile)
|
||||
processor.process(parser.parse())
|
||||
finally:
|
||||
if filename != "-":
|
||||
infile.close()
|
||||
@@ -13,5 +13,6 @@ setup(
|
||||
author_email = 'git@vger.kernel.org',
|
||||
url = 'http://www.git-scm.com/',
|
||||
package_dir = {'git_remote_helpers': ''},
|
||||
packages = ['git_remote_helpers', 'git_remote_helpers.git'],
|
||||
packages = ['git_remote_helpers', 'git_remote_helpers.git',
|
||||
'git_remote_helpers.fastimport'],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user