mirror of
https://github.com/git/git.git
synced 2026-03-06 07:19:08 +01:00
Our current loose object format has a few problems. First, it is not efficient: the list of object IDs is not sorted and even if it were, there would not be an efficient way to look up objects in both algorithms. Second, we need to store mappings for things which are not technically loose objects but are not packed objects, either, and so cannot be stored in a pack index. These kinds of things include shallows, their parents, and their trees, as well as submodules. Yet we also need to implement a sensible way to store the kind of object so that we can prune unneeded entries. For instance, if the user has updated the shallows, we can remove the old values. For these reasons, introduce a new binary object map format. The careful reader will notice that it resembles very closely the pack index v3 format. Add an in-memory object map as well, and allow writing to a batched map, which can then be written later as one of the binary object maps. Include several tests for round tripping and data lookup across algorithms. Note that the use of this code elsewhere in Git will involve some C code and some C-compatible code in Rust that will be introduced in a future commit. Thus, for example, we ignore the fact that if there is no current batch and the caller asks for data to be written, this code does nothing, mostly because this code also does not involve itself with opening or manipulating files. The C code that we will add later will implement this functionality at a higher level and take care of this, since the code which is necessary for writing to the object store is deeply involved with our C abstractions and it would require extensive work (which would not be especially valuable at this point) to port those to Rust. Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
914 lines
31 KiB
Rust
914 lines
31 KiB
Rust
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation: version 2 of the License, dated June 1991.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License along
|
|
// with this program; if not, see <https://www.gnu.org/licenses/>.
|
|
|
|
use crate::hash::{HashAlgorithm, ObjectID, GIT_MAX_RAWSZ};
|
|
use std::collections::BTreeMap;
|
|
use std::convert::TryInto;
|
|
use std::io::{self, Write};
|
|
|
|
/// The type of object stored in the map.
|
|
///
|
|
/// If this value is `Reserved`, then it is never written to disk and is used primarily to store
|
|
/// certain hard-coded objects, like the empty tree, empty blob, or null object ID.
|
|
///
|
|
/// If this value is `LooseObject`, then this represents a loose object. `Shallow` represents a
|
|
/// shallow commit, its parent, or its tree. `Submodule` represents a submodule commit.
|
|
#[repr(C)]
|
|
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
|
|
pub enum MapType {
|
|
Reserved = 0,
|
|
LooseObject = 1,
|
|
Shallow = 2,
|
|
Submodule = 3,
|
|
}
|
|
|
|
impl MapType {
|
|
pub fn from_u32(n: u32) -> Option<MapType> {
|
|
match n {
|
|
0 => Some(Self::Reserved),
|
|
1 => Some(Self::LooseObject),
|
|
2 => Some(Self::Shallow),
|
|
3 => Some(Self::Submodule),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// The value of an object stored in a `ObjectMemoryMap`.
|
|
///
|
|
/// This keeps the object ID to which the key is mapped and its kind together.
|
|
struct MappedObject {
|
|
oid: ObjectID,
|
|
kind: MapType,
|
|
}
|
|
|
|
/// Memory storage for a loose object.
|
|
struct ObjectMemoryMap {
|
|
to_compat: BTreeMap<ObjectID, MappedObject>,
|
|
to_storage: BTreeMap<ObjectID, MappedObject>,
|
|
compat: HashAlgorithm,
|
|
storage: HashAlgorithm,
|
|
}
|
|
|
|
impl ObjectMemoryMap {
|
|
/// Create a new `ObjectMemoryMap`.
|
|
///
|
|
/// The storage and compatibility `HashAlgorithm` instances are used to store the object IDs in
|
|
/// the correct map.
|
|
fn new(storage: HashAlgorithm, compat: HashAlgorithm) -> Self {
|
|
Self {
|
|
to_compat: BTreeMap::new(),
|
|
to_storage: BTreeMap::new(),
|
|
compat,
|
|
storage,
|
|
}
|
|
}
|
|
|
|
fn len(&self) -> usize {
|
|
self.to_compat.len()
|
|
}
|
|
|
|
/// Write this map to an interface implementing `std::io::Write`.
|
|
fn write<W: Write>(&self, wrtr: W) -> io::Result<()> {
|
|
const VERSION_NUMBER: u32 = 1;
|
|
const NUM_OBJECT_FORMATS: u32 = 2;
|
|
const PADDING: [u8; 4] = [0u8; 4];
|
|
|
|
let mut wrtr = wrtr;
|
|
let header_size: u32 = (4 * 5) + (4 + 4 + 8) * NUM_OBJECT_FORMATS + 8;
|
|
|
|
wrtr.write_all(b"LMAP")?;
|
|
wrtr.write_all(&VERSION_NUMBER.to_be_bytes())?;
|
|
wrtr.write_all(&header_size.to_be_bytes())?;
|
|
wrtr.write_all(&(self.to_compat.len() as u32).to_be_bytes())?;
|
|
wrtr.write_all(&NUM_OBJECT_FORMATS.to_be_bytes())?;
|
|
|
|
let storage_short_len = self.find_short_name_len(&self.to_compat, self.storage);
|
|
let compat_short_len = self.find_short_name_len(&self.to_storage, self.compat);
|
|
|
|
let storage_npadding = Self::required_nul_padding(self.to_compat.len(), storage_short_len);
|
|
let compat_npadding = Self::required_nul_padding(self.to_compat.len(), compat_short_len);
|
|
|
|
let mut offset: u64 = header_size as u64;
|
|
|
|
for (algo, len, npadding) in &[
|
|
(self.storage, storage_short_len, storage_npadding),
|
|
(self.compat, compat_short_len, compat_npadding),
|
|
] {
|
|
wrtr.write_all(&algo.format_id().to_be_bytes())?;
|
|
wrtr.write_all(&(*len as u32).to_be_bytes())?;
|
|
|
|
offset += *npadding;
|
|
wrtr.write_all(&offset.to_be_bytes())?;
|
|
|
|
offset += self.to_compat.len() as u64 * (*len as u64 + algo.raw_len() as u64 + 4);
|
|
}
|
|
|
|
wrtr.write_all(&offset.to_be_bytes())?;
|
|
|
|
let order_map: BTreeMap<&ObjectID, usize> = self
|
|
.to_compat
|
|
.keys()
|
|
.enumerate()
|
|
.map(|(i, oid)| (oid, i))
|
|
.collect();
|
|
|
|
wrtr.write_all(&PADDING[0..storage_npadding as usize])?;
|
|
for oid in self.to_compat.keys() {
|
|
wrtr.write_all(&oid.as_slice().unwrap()[0..storage_short_len])?;
|
|
}
|
|
for oid in self.to_compat.keys() {
|
|
wrtr.write_all(oid.as_slice().unwrap())?;
|
|
}
|
|
for meta in self.to_compat.values() {
|
|
wrtr.write_all(&(meta.kind as u32).to_be_bytes())?;
|
|
}
|
|
|
|
wrtr.write_all(&PADDING[0..compat_npadding as usize])?;
|
|
for oid in self.to_storage.keys() {
|
|
wrtr.write_all(&oid.as_slice().unwrap()[0..compat_short_len])?;
|
|
}
|
|
for meta in self.to_compat.values() {
|
|
wrtr.write_all(meta.oid.as_slice().unwrap())?;
|
|
}
|
|
for meta in self.to_storage.values() {
|
|
wrtr.write_all(&(order_map[&meta.oid] as u32).to_be_bytes())?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn required_nul_padding(nitems: usize, short_len: usize) -> u64 {
|
|
let shortened_table_len = nitems as u64 * short_len as u64;
|
|
let misalignment = shortened_table_len & 3;
|
|
// If the value is 0, return 0; otherwise, return the difference from 4.
|
|
(4 - misalignment) & 3
|
|
}
|
|
|
|
fn last_matching_offset(a: &ObjectID, b: &ObjectID, algop: HashAlgorithm) -> usize {
|
|
for i in 0..=algop.raw_len() {
|
|
if a.hash[i] != b.hash[i] {
|
|
return i;
|
|
}
|
|
}
|
|
algop.raw_len()
|
|
}
|
|
|
|
fn find_short_name_len(
|
|
&self,
|
|
map: &BTreeMap<ObjectID, MappedObject>,
|
|
algop: HashAlgorithm,
|
|
) -> usize {
|
|
if map.len() <= 1 {
|
|
return 1;
|
|
}
|
|
let mut len = 1;
|
|
let mut iter = map.keys();
|
|
let mut cur = match iter.next() {
|
|
Some(cur) => cur,
|
|
None => return len,
|
|
};
|
|
for item in iter {
|
|
let offset = Self::last_matching_offset(cur, item, algop);
|
|
if offset >= len {
|
|
len = offset + 1;
|
|
}
|
|
cur = item;
|
|
}
|
|
if len > algop.raw_len() {
|
|
algop.raw_len()
|
|
} else {
|
|
len
|
|
}
|
|
}
|
|
}
|
|
|
|
struct ObjectFormatData {
|
|
data_off: usize,
|
|
shortened_len: usize,
|
|
full_off: usize,
|
|
mapping_off: Option<usize>,
|
|
}
|
|
|
|
pub struct MmapedObjectMapIter<'a> {
|
|
offset: usize,
|
|
algos: Vec<HashAlgorithm>,
|
|
source: &'a MmapedObjectMap<'a>,
|
|
}
|
|
|
|
impl<'a> Iterator for MmapedObjectMapIter<'a> {
|
|
type Item = Vec<ObjectID>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
if self.offset >= self.source.nitems {
|
|
return None;
|
|
}
|
|
let offset = self.offset;
|
|
self.offset += 1;
|
|
let v: Vec<ObjectID> = self
|
|
.algos
|
|
.iter()
|
|
.cloned()
|
|
.filter_map(|algo| self.source.oid_from_offset(offset, algo))
|
|
.collect();
|
|
if v.len() != self.algos.len() {
|
|
return None;
|
|
}
|
|
Some(v)
|
|
}
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
pub struct MmapedObjectMap<'a> {
|
|
memory: &'a [u8],
|
|
nitems: usize,
|
|
meta_off: usize,
|
|
obj_formats: BTreeMap<HashAlgorithm, ObjectFormatData>,
|
|
main_algo: HashAlgorithm,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
#[allow(dead_code)]
|
|
enum MmapedParseError {
|
|
HeaderTooSmall,
|
|
InvalidSignature,
|
|
InvalidVersion,
|
|
UnknownAlgorithm,
|
|
OffsetTooLarge,
|
|
TooFewObjectFormats,
|
|
UnalignedData,
|
|
InvalidTrailerOffset,
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
impl<'a> MmapedObjectMap<'a> {
|
|
fn new(
|
|
slice: &'a [u8],
|
|
hash_algo: HashAlgorithm,
|
|
) -> Result<MmapedObjectMap<'a>, MmapedParseError> {
|
|
let object_format_header_size = 4 + 4 + 8;
|
|
let trailer_offset_size = 8;
|
|
let header_size: usize =
|
|
4 + 4 + 4 + 4 + 4 + object_format_header_size * 2 + trailer_offset_size;
|
|
if slice.len() < header_size {
|
|
return Err(MmapedParseError::HeaderTooSmall);
|
|
}
|
|
if slice[0..4] != *b"LMAP" {
|
|
return Err(MmapedParseError::InvalidSignature);
|
|
}
|
|
if Self::u32_at_offset(slice, 4) != 1 {
|
|
return Err(MmapedParseError::InvalidVersion);
|
|
}
|
|
let _ = Self::u32_at_offset(slice, 8) as usize;
|
|
let nitems = Self::u32_at_offset(slice, 12) as usize;
|
|
let nobj_formats = Self::u32_at_offset(slice, 16) as usize;
|
|
if nobj_formats < 2 {
|
|
return Err(MmapedParseError::TooFewObjectFormats);
|
|
}
|
|
let mut offset = 20;
|
|
let mut meta_off = None;
|
|
let mut data = BTreeMap::new();
|
|
for i in 0..nobj_formats {
|
|
if offset + object_format_header_size + trailer_offset_size > slice.len() {
|
|
return Err(MmapedParseError::HeaderTooSmall);
|
|
}
|
|
let format_id = Self::u32_at_offset(slice, offset);
|
|
let shortened_len = Self::u32_at_offset(slice, offset + 4) as usize;
|
|
let data_off = Self::u64_at_offset(slice, offset + 8);
|
|
|
|
let algo = HashAlgorithm::from_format_id(format_id)
|
|
.ok_or(MmapedParseError::UnknownAlgorithm)?;
|
|
let data_off: usize = data_off
|
|
.try_into()
|
|
.map_err(|_| MmapedParseError::OffsetTooLarge)?;
|
|
|
|
// Every object format must have these entries.
|
|
let shortened_table_len = shortened_len
|
|
.checked_mul(nitems)
|
|
.ok_or(MmapedParseError::OffsetTooLarge)?;
|
|
let full_off = data_off
|
|
.checked_add(shortened_table_len)
|
|
.ok_or(MmapedParseError::OffsetTooLarge)?;
|
|
Self::verify_aligned(full_off)?;
|
|
Self::verify_valid(slice, full_off as u64)?;
|
|
|
|
let full_length = algo
|
|
.raw_len()
|
|
.checked_mul(nitems)
|
|
.ok_or(MmapedParseError::OffsetTooLarge)?;
|
|
let off = full_length
|
|
.checked_add(full_off)
|
|
.ok_or(MmapedParseError::OffsetTooLarge)?;
|
|
Self::verify_aligned(off)?;
|
|
Self::verify_valid(slice, off as u64)?;
|
|
|
|
// This is for the metadata for the first object format and for the order mapping for
|
|
// other object formats.
|
|
let meta_size = nitems
|
|
.checked_mul(4)
|
|
.ok_or(MmapedParseError::OffsetTooLarge)?;
|
|
let meta_end = off
|
|
.checked_add(meta_size)
|
|
.ok_or(MmapedParseError::OffsetTooLarge)?;
|
|
Self::verify_valid(slice, meta_end as u64)?;
|
|
|
|
let mut mapping_off = None;
|
|
if i == 0 {
|
|
meta_off = Some(off);
|
|
} else {
|
|
mapping_off = Some(off);
|
|
}
|
|
|
|
data.insert(
|
|
algo,
|
|
ObjectFormatData {
|
|
data_off,
|
|
shortened_len,
|
|
full_off,
|
|
mapping_off,
|
|
},
|
|
);
|
|
offset += object_format_header_size;
|
|
}
|
|
let trailer = Self::u64_at_offset(slice, offset);
|
|
Self::verify_aligned(trailer as usize)?;
|
|
Self::verify_valid(slice, trailer)?;
|
|
let end = trailer
|
|
.checked_add(hash_algo.raw_len() as u64)
|
|
.ok_or(MmapedParseError::OffsetTooLarge)?;
|
|
if end != slice.len() as u64 {
|
|
return Err(MmapedParseError::InvalidTrailerOffset);
|
|
}
|
|
match meta_off {
|
|
Some(meta_off) => Ok(MmapedObjectMap {
|
|
memory: slice,
|
|
nitems,
|
|
meta_off,
|
|
obj_formats: data,
|
|
main_algo: hash_algo,
|
|
}),
|
|
None => Err(MmapedParseError::TooFewObjectFormats),
|
|
}
|
|
}
|
|
|
|
fn iter(&self) -> MmapedObjectMapIter<'_> {
|
|
let mut algos = Vec::with_capacity(self.obj_formats.len());
|
|
algos.push(self.main_algo);
|
|
for algo in self.obj_formats.keys().cloned() {
|
|
if algo != self.main_algo {
|
|
algos.push(algo);
|
|
}
|
|
}
|
|
MmapedObjectMapIter {
|
|
offset: 0,
|
|
algos,
|
|
source: self,
|
|
}
|
|
}
|
|
|
|
/// Treats `sl` as if it were a set of slices of `wanted.len()` bytes, and searches for
|
|
/// `wanted` within it.
|
|
///
|
|
/// If found, returns the offset of the subslice in `sl`.
|
|
///
|
|
/// ```
|
|
/// let sl = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
|
|
///
|
|
/// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[2, 3]), Some(1));
|
|
/// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[6, 7]), Some(4));
|
|
/// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[1, 2]), None);
|
|
/// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[10, 20]), None);
|
|
/// ```
|
|
fn binary_search_slice(sl: &[u8], wanted: &[u8]) -> Option<usize> {
|
|
let len = wanted.len();
|
|
let res = sl.binary_search_by(|item| {
|
|
// We would like element_offset, but that is currently nightly only. Instead, do a
|
|
// pointer subtraction to find the index.
|
|
let index = unsafe { (item as *const u8).offset_from(sl.as_ptr()) } as usize;
|
|
// Now we have the index of this object. Round it down to the nearest full-sized
|
|
// chunk to find the actual offset where this starts.
|
|
let index = index - (index % len);
|
|
// Compute the comparison of that value instead, which will provide the expected
|
|
// result.
|
|
sl[index..index + wanted.len()].cmp(wanted)
|
|
});
|
|
res.ok().map(|offset| offset / len)
|
|
}
|
|
|
|
/// Look up `oid` in the map in order to convert it to `algo`.
|
|
///
|
|
/// If this object is in the map, return the offset in the table for the main algorithm.
|
|
fn look_up_object(&self, oid: &ObjectID) -> Option<usize> {
|
|
let oid_algo = HashAlgorithm::from_u32(oid.algo)?;
|
|
let params = self.obj_formats.get(&oid_algo)?;
|
|
let short_table =
|
|
&self.memory[params.data_off..params.data_off + (params.shortened_len * self.nitems)];
|
|
let index = Self::binary_search_slice(
|
|
short_table,
|
|
&oid.as_slice().unwrap()[0..params.shortened_len],
|
|
)?;
|
|
match params.mapping_off {
|
|
Some(from_off) => {
|
|
// oid is in a compatibility algorithm. Find the mapping index.
|
|
let mapped = Self::u32_at_offset(self.memory, from_off + index * 4) as usize;
|
|
if mapped >= self.nitems {
|
|
return None;
|
|
}
|
|
let oid_offset = params.full_off + mapped * oid_algo.raw_len();
|
|
if self.memory[oid_offset..oid_offset + oid_algo.raw_len()]
|
|
!= *oid.as_slice().unwrap()
|
|
{
|
|
return None;
|
|
}
|
|
Some(mapped)
|
|
}
|
|
None => {
|
|
// oid is in the main algorithm. Find the object ID in the main map to confirm
|
|
// it's correct.
|
|
let oid_offset = params.full_off + index * oid_algo.raw_len();
|
|
if self.memory[oid_offset..oid_offset + oid_algo.raw_len()]
|
|
!= *oid.as_slice().unwrap()
|
|
{
|
|
return None;
|
|
}
|
|
Some(index)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
fn map_object(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<MappedObject> {
|
|
let main = self.look_up_object(oid)?;
|
|
let meta = MapType::from_u32(Self::u32_at_offset(self.memory, self.meta_off + (main * 4)))?;
|
|
Some(MappedObject {
|
|
oid: self.oid_from_offset(main, algo)?,
|
|
kind: meta,
|
|
})
|
|
}
|
|
|
|
fn map_oid(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<ObjectID> {
|
|
if algo as u32 == oid.algo {
|
|
return Some(oid.clone());
|
|
}
|
|
|
|
let main = self.look_up_object(oid)?;
|
|
self.oid_from_offset(main, algo)
|
|
}
|
|
|
|
fn oid_from_offset(&self, offset: usize, algo: HashAlgorithm) -> Option<ObjectID> {
|
|
let aparams = self.obj_formats.get(&algo)?;
|
|
|
|
let mut hash = [0u8; GIT_MAX_RAWSZ];
|
|
let len = algo.raw_len();
|
|
let oid_off = aparams.full_off + (offset * len);
|
|
hash[0..len].copy_from_slice(&self.memory[oid_off..oid_off + len]);
|
|
Some(ObjectID {
|
|
hash,
|
|
algo: algo as u32,
|
|
})
|
|
}
|
|
|
|
fn u32_at_offset(slice: &[u8], offset: usize) -> u32 {
|
|
u32::from_be_bytes(slice[offset..offset + 4].try_into().unwrap())
|
|
}
|
|
|
|
fn u64_at_offset(slice: &[u8], offset: usize) -> u64 {
|
|
u64::from_be_bytes(slice[offset..offset + 8].try_into().unwrap())
|
|
}
|
|
|
|
fn verify_aligned(offset: usize) -> Result<(), MmapedParseError> {
|
|
if (offset & 3) != 0 {
|
|
return Err(MmapedParseError::UnalignedData);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn verify_valid(slice: &[u8], offset: u64) -> Result<(), MmapedParseError> {
|
|
if offset >= slice.len() as u64 {
|
|
return Err(MmapedParseError::OffsetTooLarge);
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
/// A map for loose and other non-packed object IDs that maps between a storage and compatibility
|
|
/// mapping.
|
|
///
|
|
/// In addition to the in-memory option, there is an optional batched storage, which can be used to
|
|
/// write objects to disk in an efficient way.
|
|
pub struct ObjectMap {
|
|
mem: ObjectMemoryMap,
|
|
batch: Option<ObjectMemoryMap>,
|
|
}
|
|
|
|
impl ObjectMap {
|
|
/// Create a new `ObjectMap` with the given hash algorithms.
|
|
///
|
|
/// This initializes the memory map to automatically map the empty tree, empty blob, and null
|
|
/// object ID.
|
|
pub fn new(storage: HashAlgorithm, compat: HashAlgorithm) -> Self {
|
|
let mut map = ObjectMemoryMap::new(storage, compat);
|
|
for (main, compat) in &[
|
|
(storage.empty_tree(), compat.empty_tree()),
|
|
(storage.empty_blob(), compat.empty_blob()),
|
|
(storage.null_oid(), compat.null_oid()),
|
|
] {
|
|
map.to_storage.insert(
|
|
(*compat).clone(),
|
|
MappedObject {
|
|
oid: (*main).clone(),
|
|
kind: MapType::Reserved,
|
|
},
|
|
);
|
|
map.to_compat.insert(
|
|
(*main).clone(),
|
|
MappedObject {
|
|
oid: (*compat).clone(),
|
|
kind: MapType::Reserved,
|
|
},
|
|
);
|
|
}
|
|
Self {
|
|
mem: map,
|
|
batch: None,
|
|
}
|
|
}
|
|
|
|
pub fn hash_algo(&self) -> HashAlgorithm {
|
|
self.mem.storage
|
|
}
|
|
|
|
/// Start a batch for efficient writing.
|
|
///
|
|
/// If there is already a batch started, this does nothing and the existing batch is retained.
|
|
pub fn start_batch(&mut self) {
|
|
if self.batch.is_none() {
|
|
self.batch = Some(ObjectMemoryMap::new(self.mem.storage, self.mem.compat));
|
|
}
|
|
}
|
|
|
|
pub fn batch_len(&self) -> Option<usize> {
|
|
self.batch.as_ref().map(|b| b.len())
|
|
}
|
|
|
|
/// If a batch exists, write it to the writer.
|
|
pub fn finish_batch<W: Write>(&mut self, w: W) -> io::Result<()> {
|
|
if let Some(txn) = self.batch.take() {
|
|
txn.write(w)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// If a batch exists, write it to the writer.
|
|
pub fn abort_batch(&mut self) {
|
|
self.batch = None;
|
|
}
|
|
|
|
/// Return whether there is a batch already started.
|
|
///
|
|
/// If you just want a batch to exist and don't care whether one has already been started, you
|
|
/// may simply call `start_batch` unconditionally.
|
|
pub fn has_batch(&self) -> bool {
|
|
self.batch.is_some()
|
|
}
|
|
|
|
/// Insert an object into the map.
|
|
///
|
|
/// If `write` is true and there is a batch started, write the object into the batch as well as
|
|
/// into the memory map.
|
|
pub fn insert(&mut self, oid1: &ObjectID, oid2: &ObjectID, kind: MapType, write: bool) {
|
|
let (compat_oid, storage_oid) =
|
|
if HashAlgorithm::from_u32(oid1.algo) == Some(self.mem.compat) {
|
|
(oid1, oid2)
|
|
} else {
|
|
(oid2, oid1)
|
|
};
|
|
Self::insert_into(&mut self.mem, storage_oid, compat_oid, kind);
|
|
if write {
|
|
if let Some(ref mut batch) = self.batch {
|
|
Self::insert_into(batch, storage_oid, compat_oid, kind);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn insert_into(
|
|
map: &mut ObjectMemoryMap,
|
|
storage: &ObjectID,
|
|
compat: &ObjectID,
|
|
kind: MapType,
|
|
) {
|
|
map.to_compat.insert(
|
|
storage.clone(),
|
|
MappedObject {
|
|
oid: compat.clone(),
|
|
kind,
|
|
},
|
|
);
|
|
map.to_storage.insert(
|
|
compat.clone(),
|
|
MappedObject {
|
|
oid: storage.clone(),
|
|
kind,
|
|
},
|
|
);
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
fn map_object(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<&MappedObject> {
|
|
let map = if algo == self.mem.storage {
|
|
&self.mem.to_storage
|
|
} else {
|
|
&self.mem.to_compat
|
|
};
|
|
map.get(oid)
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
fn map_oid<'a, 'b: 'a>(
|
|
&'b self,
|
|
oid: &'a ObjectID,
|
|
algo: HashAlgorithm,
|
|
) -> Option<&'a ObjectID> {
|
|
if algo as u32 == oid.algo {
|
|
return Some(oid);
|
|
}
|
|
let entry = self.map_object(oid, algo);
|
|
entry.map(|obj| &obj.oid)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{MapType, MmapedObjectMap, ObjectMap, ObjectMemoryMap};
|
|
use crate::hash::{CryptoDigest, CryptoHasher, HashAlgorithm, ObjectID};
|
|
use std::convert::TryInto;
|
|
use std::io::{self, Cursor, Write};
|
|
|
|
struct TrailingWriter {
|
|
curs: Cursor<Vec<u8>>,
|
|
hasher: CryptoHasher,
|
|
}
|
|
|
|
impl TrailingWriter {
|
|
fn new() -> Self {
|
|
Self {
|
|
curs: Cursor::new(Vec::new()),
|
|
hasher: CryptoHasher::new(HashAlgorithm::SHA256),
|
|
}
|
|
}
|
|
|
|
fn finalize(mut self) -> Vec<u8> {
|
|
let _ = self.hasher.flush();
|
|
let mut v = self.curs.into_inner();
|
|
v.extend(self.hasher.into_vec());
|
|
v
|
|
}
|
|
}
|
|
|
|
impl Write for TrailingWriter {
|
|
fn write(&mut self, data: &[u8]) -> io::Result<usize> {
|
|
self.hasher.write_all(data)?;
|
|
self.curs.write_all(data)?;
|
|
Ok(data.len())
|
|
}
|
|
|
|
fn flush(&mut self) -> io::Result<()> {
|
|
self.hasher.flush()?;
|
|
self.curs.flush()?;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
fn sha1_oid(b: &[u8]) -> ObjectID {
|
|
assert_eq!(b.len(), 20);
|
|
let mut data = [0u8; 32];
|
|
data[0..20].copy_from_slice(b);
|
|
ObjectID {
|
|
hash: data,
|
|
algo: HashAlgorithm::SHA1 as u32,
|
|
}
|
|
}
|
|
|
|
fn sha256_oid(b: &[u8]) -> ObjectID {
|
|
assert_eq!(b.len(), 32);
|
|
ObjectID {
|
|
hash: b.try_into().unwrap(),
|
|
algo: HashAlgorithm::SHA256 as u32,
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::type_complexity)]
|
|
fn test_entries() -> &'static [(&'static str, &'static [u8], &'static [u8], MapType, bool)] {
|
|
// These are all example blobs containing the content in the first argument.
|
|
&[
|
|
("abc", b"\xf2\xba\x8f\x84\xab\x5c\x1b\xce\x84\xa7\xb4\x41\xcb\x19\x59\xcf\xc7\x09\x3b\x7f", b"\xc1\xcf\x6e\x46\x50\x77\x93\x0e\x88\xdc\x51\x36\x64\x1d\x40\x2f\x72\xa2\x29\xdd\xd9\x96\xf6\x27\xd6\x0e\x96\x39\xea\xba\x35\xa6", MapType::LooseObject, false),
|
|
("def", b"\x0c\x00\x38\x32\xe7\xbf\xa9\xca\x8b\x5c\x20\x35\xc9\xbd\x68\x4a\x5f\x26\x23\xbc", b"\x8a\x90\x17\x26\x48\x4d\xb0\xf2\x27\x9f\x30\x8d\x58\x96\xd9\x6b\xf6\x3a\xd6\xde\x95\x7c\xa3\x8a\xdc\x33\x61\x68\x03\x6e\xf6\x63", MapType::Shallow, true),
|
|
("ghi", b"\x45\xa8\x2e\x29\x5c\x52\x47\x31\x14\xc5\x7c\x18\xf4\xf5\x23\x68\xdf\x2a\x3c\xfd", b"\x6e\x47\x4c\x74\xf5\xd7\x78\x14\xc7\xf7\xf0\x7c\x37\x80\x07\x90\x53\x42\xaf\x42\x81\xe6\x86\x8d\x33\x46\x45\x4b\xb8\x63\xab\xc3", MapType::Submodule, false),
|
|
("jkl", b"\x45\x32\x8c\x36\xff\x2e\x9b\x9b\x4e\x59\x2c\x84\x7d\x3f\x9a\x7f\xd9\xb3\xe7\x16", b"\xc3\xee\xf7\x54\xa2\x1e\xc6\x9d\x43\x75\xbe\x6f\x18\x47\x89\xa8\x11\x6f\xd9\x66\xfc\x67\xdc\x31\xd2\x11\x15\x42\xc8\xd5\xa0\xaf", MapType::LooseObject, true),
|
|
]
|
|
}
|
|
|
|
fn test_map(write_all: bool) -> Box<ObjectMap> {
|
|
let mut map = Box::new(ObjectMap::new(HashAlgorithm::SHA256, HashAlgorithm::SHA1));
|
|
|
|
map.start_batch();
|
|
|
|
for (_blob_content, sha1, sha256, kind, swap) in test_entries() {
|
|
let s256 = sha256_oid(sha256);
|
|
let s1 = sha1_oid(sha1);
|
|
let write = write_all || (*kind as u32 & 2) == 0;
|
|
if *swap {
|
|
// Insert the item into the batch arbitrarily based on the type. This tests that
|
|
// we can specify either order and we'll do the right thing.
|
|
map.insert(&s256, &s1, *kind, write);
|
|
} else {
|
|
map.insert(&s1, &s256, *kind, write);
|
|
}
|
|
}
|
|
|
|
map
|
|
}
|
|
|
|
#[test]
|
|
fn can_read_and_write_format() {
|
|
for full in &[true, false] {
|
|
let mut map = test_map(*full);
|
|
let mut wrtr = TrailingWriter::new();
|
|
map.finish_batch(&mut wrtr).unwrap();
|
|
|
|
assert!(!map.has_batch());
|
|
|
|
let data = wrtr.finalize();
|
|
MmapedObjectMap::new(&data, HashAlgorithm::SHA256).unwrap();
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn looks_up_from_mmaped() {
|
|
let mut map = test_map(true);
|
|
let mut wrtr = TrailingWriter::new();
|
|
map.finish_batch(&mut wrtr).unwrap();
|
|
|
|
assert!(!map.has_batch());
|
|
|
|
let data = wrtr.finalize();
|
|
let entries = test_entries();
|
|
let map = MmapedObjectMap::new(&data, HashAlgorithm::SHA256).unwrap();
|
|
|
|
for (_, sha1, sha256, kind, _) in entries {
|
|
let s256 = sha256_oid(sha256);
|
|
let s1 = sha1_oid(sha1);
|
|
|
|
let res = map.map_object(&s256, HashAlgorithm::SHA1).unwrap();
|
|
assert_eq!(res.oid, s1);
|
|
assert_eq!(res.kind, *kind);
|
|
let res = map.map_oid(&s256, HashAlgorithm::SHA1).unwrap();
|
|
assert_eq!(res, s1);
|
|
|
|
let res = map.map_object(&s256, HashAlgorithm::SHA256).unwrap();
|
|
assert_eq!(res.oid, s256);
|
|
assert_eq!(res.kind, *kind);
|
|
let res = map.map_oid(&s256, HashAlgorithm::SHA256).unwrap();
|
|
assert_eq!(res, s256);
|
|
|
|
let res = map.map_object(&s1, HashAlgorithm::SHA256).unwrap();
|
|
assert_eq!(res.oid, s256);
|
|
assert_eq!(res.kind, *kind);
|
|
let res = map.map_oid(&s1, HashAlgorithm::SHA256).unwrap();
|
|
assert_eq!(res, s256);
|
|
|
|
let res = map.map_object(&s1, HashAlgorithm::SHA1).unwrap();
|
|
assert_eq!(res.oid, s1);
|
|
assert_eq!(res.kind, *kind);
|
|
let res = map.map_oid(&s1, HashAlgorithm::SHA1).unwrap();
|
|
assert_eq!(res, s1);
|
|
}
|
|
|
|
for octet in &[0x00u8, 0x6d, 0x6e, 0x8a, 0xff] {
|
|
let missing_oid = ObjectID {
|
|
hash: [*octet; 32],
|
|
algo: HashAlgorithm::SHA256 as u32,
|
|
};
|
|
|
|
assert!(map.map_object(&missing_oid, HashAlgorithm::SHA1).is_none());
|
|
assert!(map.map_oid(&missing_oid, HashAlgorithm::SHA1).is_none());
|
|
|
|
assert_eq!(
|
|
map.map_oid(&missing_oid, HashAlgorithm::SHA256).unwrap(),
|
|
missing_oid
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn binary_searches_slices_correctly() {
|
|
let sl = &[
|
|
0, 1, 2, 15, 14, 13, 18, 10, 2, 20, 20, 20, 21, 21, 0, 21, 21, 1, 21, 21, 21, 21, 21,
|
|
22, 22, 23, 24,
|
|
];
|
|
|
|
let expected: &[(&[u8], Option<usize>)] = &[
|
|
(&[0, 1, 2], Some(0)),
|
|
(&[15, 14, 13], Some(1)),
|
|
(&[18, 10, 2], Some(2)),
|
|
(&[20, 20, 20], Some(3)),
|
|
(&[21, 21, 0], Some(4)),
|
|
(&[21, 21, 1], Some(5)),
|
|
(&[21, 21, 21], Some(6)),
|
|
(&[21, 21, 22], Some(7)),
|
|
(&[22, 23, 24], Some(8)),
|
|
(&[2, 15, 14], None),
|
|
(&[0, 21, 21], None),
|
|
(&[21, 21, 23], None),
|
|
(&[22, 22, 23], None),
|
|
(&[0xff, 0xff, 0xff], None),
|
|
(&[0, 0, 0], None),
|
|
];
|
|
|
|
for (wanted, value) in expected {
|
|
assert_eq!(MmapedObjectMap::binary_search_slice(sl, wanted), *value);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn looks_up_oid_correctly() {
|
|
let map = test_map(false);
|
|
let entries = test_entries();
|
|
|
|
let s256 = sha256_oid(entries[0].2);
|
|
let s1 = sha1_oid(entries[0].1);
|
|
|
|
let missing_oid = ObjectID {
|
|
hash: [0xffu8; 32],
|
|
algo: HashAlgorithm::SHA256 as u32,
|
|
};
|
|
|
|
let res = map.map_object(&s256, HashAlgorithm::SHA1).unwrap();
|
|
assert_eq!(res.oid, s1);
|
|
assert_eq!(res.kind, MapType::LooseObject);
|
|
let res = map.map_oid(&s256, HashAlgorithm::SHA1).unwrap();
|
|
assert_eq!(*res, s1);
|
|
|
|
let res = map.map_object(&s1, HashAlgorithm::SHA256).unwrap();
|
|
assert_eq!(res.oid, s256);
|
|
assert_eq!(res.kind, MapType::LooseObject);
|
|
let res = map.map_oid(&s1, HashAlgorithm::SHA256).unwrap();
|
|
assert_eq!(*res, s256);
|
|
|
|
assert!(map.map_object(&missing_oid, HashAlgorithm::SHA1).is_none());
|
|
assert!(map.map_oid(&missing_oid, HashAlgorithm::SHA1).is_none());
|
|
|
|
assert_eq!(
|
|
*map.map_oid(&missing_oid, HashAlgorithm::SHA256).unwrap(),
|
|
missing_oid
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn looks_up_known_oids_correctly() {
|
|
let map = test_map(false);
|
|
|
|
let funcs: &[&dyn Fn(HashAlgorithm) -> &'static ObjectID] = &[
|
|
&|h: HashAlgorithm| h.empty_tree(),
|
|
&|h: HashAlgorithm| h.empty_blob(),
|
|
&|h: HashAlgorithm| h.null_oid(),
|
|
];
|
|
|
|
for f in funcs {
|
|
let s256 = f(HashAlgorithm::SHA256);
|
|
let s1 = f(HashAlgorithm::SHA1);
|
|
|
|
let res = map.map_object(s256, HashAlgorithm::SHA1).unwrap();
|
|
assert_eq!(res.oid, *s1);
|
|
assert_eq!(res.kind, MapType::Reserved);
|
|
let res = map.map_oid(s256, HashAlgorithm::SHA1).unwrap();
|
|
assert_eq!(*res, *s1);
|
|
|
|
let res = map.map_object(s1, HashAlgorithm::SHA256).unwrap();
|
|
assert_eq!(res.oid, *s256);
|
|
assert_eq!(res.kind, MapType::Reserved);
|
|
let res = map.map_oid(s1, HashAlgorithm::SHA256).unwrap();
|
|
assert_eq!(*res, *s256);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn nul_padding() {
|
|
assert_eq!(ObjectMemoryMap::required_nul_padding(1, 1), 3);
|
|
assert_eq!(ObjectMemoryMap::required_nul_padding(2, 1), 2);
|
|
assert_eq!(ObjectMemoryMap::required_nul_padding(3, 1), 1);
|
|
assert_eq!(ObjectMemoryMap::required_nul_padding(2, 2), 0);
|
|
|
|
assert_eq!(ObjectMemoryMap::required_nul_padding(39, 3), 3);
|
|
}
|
|
}
|