git/src/loose.rs

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation: version 2 of the License, dated June 1991.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, see <https://www.gnu.org/licenses/>.

use crate::hash::{HashAlgorithm, ObjectID, GIT_MAX_RAWSZ};
use std::collections::BTreeMap;
use std::convert::TryInto;
use std::io::{self, Write};

/// The type of object stored in the map.
///
/// If this value is `Reserved`, then it is never written to disk and is used primarily to store
/// certain hard-coded objects, like the empty tree, empty blob, or null object ID.
///
/// If this value is `LooseObject`, then this represents a loose object.  `Shallow` represents a
/// shallow commit, its parent, or its tree.  `Submodule` represents a submodule commit.
#[repr(C)]
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
pub enum MapType {
    Reserved = 0,
    LooseObject = 1,
    Shallow = 2,
    Submodule = 3,
}

impl MapType {
    pub fn from_u32(n: u32) -> Option<MapType> {
        match n {
            0 => Some(Self::Reserved),
            1 => Some(Self::LooseObject),
            2 => Some(Self::Shallow),
            3 => Some(Self::Submodule),
            _ => None,
        }
    }
}

/// The value of an object stored in a `ObjectMemoryMap`.
///
/// This keeps the object ID to which the key is mapped and its kind together.
struct MappedObject {
    oid: ObjectID,
    kind: MapType,
}

/// Memory storage for a loose object.
struct ObjectMemoryMap {
    to_compat: BTreeMap<ObjectID, MappedObject>,
    to_storage: BTreeMap<ObjectID, MappedObject>,
    compat: HashAlgorithm,
    storage: HashAlgorithm,
}

impl ObjectMemoryMap {
    /// Create a new `ObjectMemoryMap`.
    ///
    /// The storage and compatibility `HashAlgorithm` instances are used to store the object IDs in
    /// the correct map.
    fn new(storage: HashAlgorithm, compat: HashAlgorithm) -> Self {
        Self {
            to_compat: BTreeMap::new(),
            to_storage: BTreeMap::new(),
            compat,
            storage,
        }
    }

    fn len(&self) -> usize {
        self.to_compat.len()
    }

    /// Write this map to an interface implementing `std::io::Write`.
    fn write<W: Write>(&self, wrtr: W) -> io::Result<()> {
        const VERSION_NUMBER: u32 = 1;
        const NUM_OBJECT_FORMATS: u32 = 2;
        const PADDING: [u8; 4] = [0u8; 4];

        let mut wrtr = wrtr;
        let header_size: u32 = (4 * 5) + (4 + 4 + 8) * NUM_OBJECT_FORMATS + 8;

        wrtr.write_all(b"LMAP")?;
        wrtr.write_all(&VERSION_NUMBER.to_be_bytes())?;
        wrtr.write_all(&header_size.to_be_bytes())?;
        wrtr.write_all(&(self.to_compat.len() as u32).to_be_bytes())?;
        wrtr.write_all(&NUM_OBJECT_FORMATS.to_be_bytes())?;

        let storage_short_len = self.find_short_name_len(&self.to_compat, self.storage);
        let compat_short_len = self.find_short_name_len(&self.to_storage, self.compat);

        let storage_npadding = Self::required_nul_padding(self.to_compat.len(), storage_short_len);
        let compat_npadding = Self::required_nul_padding(self.to_compat.len(), compat_short_len);

        let mut offset: u64 = header_size as u64;

        for (algo, len, npadding) in &[
            (self.storage, storage_short_len, storage_npadding),
            (self.compat, compat_short_len, compat_npadding),
        ] {
            wrtr.write_all(&algo.format_id().to_be_bytes())?;
            wrtr.write_all(&(*len as u32).to_be_bytes())?;

            offset += *npadding;
            wrtr.write_all(&offset.to_be_bytes())?;

            offset += self.to_compat.len() as u64 * (*len as u64 + algo.raw_len() as u64 + 4);
        }

        wrtr.write_all(&offset.to_be_bytes())?;

        let order_map: BTreeMap<&ObjectID, usize> = self
            .to_compat
            .keys()
            .enumerate()
            .map(|(i, oid)| (oid, i))
            .collect();

        wrtr.write_all(&PADDING[0..storage_npadding as usize])?;
        for oid in self.to_compat.keys() {
            wrtr.write_all(&oid.as_slice().unwrap()[0..storage_short_len])?;
        }
        for oid in self.to_compat.keys() {
            wrtr.write_all(oid.as_slice().unwrap())?;
        }
        for meta in self.to_compat.values() {
            wrtr.write_all(&(meta.kind as u32).to_be_bytes())?;
        }

        wrtr.write_all(&PADDING[0..compat_npadding as usize])?;
        for oid in self.to_storage.keys() {
            wrtr.write_all(&oid.as_slice().unwrap()[0..compat_short_len])?;
        }
        for meta in self.to_compat.values() {
            wrtr.write_all(meta.oid.as_slice().unwrap())?;
        }
        for meta in self.to_storage.values() {
            wrtr.write_all(&(order_map[&meta.oid] as u32).to_be_bytes())?;
        }

        Ok(())
    }

    fn required_nul_padding(nitems: usize, short_len: usize) -> u64 {
        let shortened_table_len = nitems as u64 * short_len as u64;
        let misalignment = shortened_table_len & 3;
        // If the value is 0, return 0; otherwise, return the difference from 4.
        (4 - misalignment) & 3
    }

    fn last_matching_offset(a: &ObjectID, b: &ObjectID, algop: HashAlgorithm) -> usize {
        for i in 0..=algop.raw_len() {
            if a.hash[i] != b.hash[i] {
                return i;
            }
        }
        algop.raw_len()
    }

    fn find_short_name_len(
        &self,
        map: &BTreeMap<ObjectID, MappedObject>,
        algop: HashAlgorithm,
    ) -> usize {
        if map.len() <= 1 {
            return 1;
        }
        let mut len = 1;
        let mut iter = map.keys();
        let mut cur = match iter.next() {
            Some(cur) => cur,
            None => return len,
        };
        for item in iter {
            let offset = Self::last_matching_offset(cur, item, algop);
            if offset >= len {
                len = offset + 1;
            }
            cur = item;
        }
        if len > algop.raw_len() {
            algop.raw_len()
        } else {
            len
        }
    }
}

struct ObjectFormatData {
    data_off: usize,
    shortened_len: usize,
    full_off: usize,
    mapping_off: Option<usize>,
}

pub struct MmapedObjectMapIter<'a> {
    offset: usize,
    algos: Vec<HashAlgorithm>,
    source: &'a MmapedObjectMap<'a>,
}

impl<'a> Iterator for MmapedObjectMapIter<'a> {
    type Item = Vec<ObjectID>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.offset >= self.source.nitems {
            return None;
        }
        let offset = self.offset;
        self.offset += 1;
        let v: Vec<ObjectID> = self
            .algos
            .iter()
            .cloned()
            .filter_map(|algo| self.source.oid_from_offset(offset, algo))
            .collect();
        if v.len() != self.algos.len() {
            return None;
        }
        Some(v)
    }
}

#[allow(dead_code)]
pub struct MmapedObjectMap<'a> {
    memory: &'a [u8],
    nitems: usize,
    meta_off: usize,
    obj_formats: BTreeMap<HashAlgorithm, ObjectFormatData>,
    main_algo: HashAlgorithm,
}

#[derive(Debug)]
#[allow(dead_code)]
enum MmapedParseError {
    HeaderTooSmall,
    InvalidSignature,
    InvalidVersion,
    UnknownAlgorithm,
    OffsetTooLarge,
    TooFewObjectFormats,
    UnalignedData,
    InvalidTrailerOffset,
}

#[allow(dead_code)]
impl<'a> MmapedObjectMap<'a> {
    fn new(
        slice: &'a [u8],
        hash_algo: HashAlgorithm,
    ) -> Result<MmapedObjectMap<'a>, MmapedParseError> {
        let object_format_header_size = 4 + 4 + 8;
        let trailer_offset_size = 8;
        let header_size: usize =
            4 + 4 + 4 + 4 + 4 + object_format_header_size * 2 + trailer_offset_size;
        if slice.len() < header_size {
            return Err(MmapedParseError::HeaderTooSmall);
        }
        if slice[0..4] != *b"LMAP" {
            return Err(MmapedParseError::InvalidSignature);
        }
        if Self::u32_at_offset(slice, 4) != 1 {
            return Err(MmapedParseError::InvalidVersion);
        }
        let _ = Self::u32_at_offset(slice, 8) as usize;
        let nitems = Self::u32_at_offset(slice, 12) as usize;
        let nobj_formats = Self::u32_at_offset(slice, 16) as usize;
        if nobj_formats < 2 {
            return Err(MmapedParseError::TooFewObjectFormats);
        }
        let mut offset = 20;
        let mut meta_off = None;
        let mut data = BTreeMap::new();
        for i in 0..nobj_formats {
            if offset + object_format_header_size + trailer_offset_size > slice.len() {
                return Err(MmapedParseError::HeaderTooSmall);
            }
            let format_id = Self::u32_at_offset(slice, offset);
            let shortened_len = Self::u32_at_offset(slice, offset + 4) as usize;
            let data_off = Self::u64_at_offset(slice, offset + 8);

            let algo = HashAlgorithm::from_format_id(format_id)
                .ok_or(MmapedParseError::UnknownAlgorithm)?;
            let data_off: usize = data_off
                .try_into()
                .map_err(|_| MmapedParseError::OffsetTooLarge)?;

            // Every object format must have these entries.
            let shortened_table_len = shortened_len
                .checked_mul(nitems)
                .ok_or(MmapedParseError::OffsetTooLarge)?;
            let full_off = data_off
                .checked_add(shortened_table_len)
                .ok_or(MmapedParseError::OffsetTooLarge)?;
            Self::verify_aligned(full_off)?;
            Self::verify_valid(slice, full_off as u64)?;

            let full_length = algo
                .raw_len()
                .checked_mul(nitems)
                .ok_or(MmapedParseError::OffsetTooLarge)?;
            let off = full_length
                .checked_add(full_off)
                .ok_or(MmapedParseError::OffsetTooLarge)?;
            Self::verify_aligned(off)?;
            Self::verify_valid(slice, off as u64)?;

            // This is for the metadata for the first object format and for the order mapping for
            // other object formats.
            let meta_size = nitems
                .checked_mul(4)
                .ok_or(MmapedParseError::OffsetTooLarge)?;
            let meta_end = off
                .checked_add(meta_size)
                .ok_or(MmapedParseError::OffsetTooLarge)?;
            Self::verify_valid(slice, meta_end as u64)?;

            let mut mapping_off = None;
            if i == 0 {
                meta_off = Some(off);
            } else {
                mapping_off = Some(off);
            }

            data.insert(
                algo,
                ObjectFormatData {
                    data_off,
                    shortened_len,
                    full_off,
                    mapping_off,
                },
            );
            offset += object_format_header_size;
        }
        let trailer = Self::u64_at_offset(slice, offset);
        Self::verify_aligned(trailer as usize)?;
        Self::verify_valid(slice, trailer)?;
        let end = trailer
            .checked_add(hash_algo.raw_len() as u64)
            .ok_or(MmapedParseError::OffsetTooLarge)?;
        if end != slice.len() as u64 {
            return Err(MmapedParseError::InvalidTrailerOffset);
        }
        match meta_off {
            Some(meta_off) => Ok(MmapedObjectMap {
                memory: slice,
                nitems,
                meta_off,
                obj_formats: data,
                main_algo: hash_algo,
            }),
            None => Err(MmapedParseError::TooFewObjectFormats),
        }
    }

    fn iter(&self) -> MmapedObjectMapIter<'_> {
        let mut algos = Vec::with_capacity(self.obj_formats.len());
        algos.push(self.main_algo);
        for algo in self.obj_formats.keys().cloned() {
            if algo != self.main_algo {
                algos.push(algo);
            }
        }
        MmapedObjectMapIter {
            offset: 0,
            algos,
            source: self,
        }
    }

    /// Treats `sl` as if it were a set of slices of `wanted.len()` bytes, and searches for
    /// `wanted` within it.
    ///
    /// If found, returns the offset of the subslice in `sl`.
    ///
    /// ```
    /// let sl = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
    ///
    /// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[2, 3]), Some(1));
    /// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[6, 7]), Some(4));
    /// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[1, 2]), None);
    /// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[10, 20]), None);
    /// ```
    fn binary_search_slice(sl: &[u8], wanted: &[u8]) -> Option<usize> {
        let len = wanted.len();
        let res = sl.binary_search_by(|item| {
            // We would like element_offset, but that is currently nightly only.  Instead, do a
            // pointer subtraction to find the index.
            let index = unsafe { (item as *const u8).offset_from(sl.as_ptr()) } as usize;
            // Now we have the index of this object.  Round it down to the nearest full-sized
            // chunk to find the actual offset where this starts.
            let index = index - (index % len);
            // Compute the comparison of that value instead, which will provide the expected
            // result.
            sl[index..index + wanted.len()].cmp(wanted)
        });
        res.ok().map(|offset| offset / len)
    }

    /// Look up `oid` in the map in order to convert it to `algo`.
    ///
    /// If this object is in the map, return the offset in the table for the main algorithm.
    fn look_up_object(&self, oid: &ObjectID) -> Option<usize> {
        let oid_algo = HashAlgorithm::from_u32(oid.algo)?;
        let params = self.obj_formats.get(&oid_algo)?;
        let short_table =
            &self.memory[params.data_off..params.data_off + (params.shortened_len * self.nitems)];
        let index = Self::binary_search_slice(
            short_table,
            &oid.as_slice().unwrap()[0..params.shortened_len],
        )?;
        match params.mapping_off {
            Some(from_off) => {
                // oid is in a compatibility algorithm.  Find the mapping index.
                let mapped = Self::u32_at_offset(self.memory, from_off + index * 4) as usize;
                if mapped >= self.nitems {
                    return None;
                }
                let oid_offset = params.full_off + mapped * oid_algo.raw_len();
                if self.memory[oid_offset..oid_offset + oid_algo.raw_len()]
                    != *oid.as_slice().unwrap()
                {
                    return None;
                }
                Some(mapped)
            }
            None => {
                // oid is in the main algorithm.  Find the object ID in the main map to confirm
                // it's correct.
                let oid_offset = params.full_off + index * oid_algo.raw_len();
                if self.memory[oid_offset..oid_offset + oid_algo.raw_len()]
                    != *oid.as_slice().unwrap()
                {
                    return None;
                }
                Some(index)
            }
        }
    }

    #[allow(dead_code)]
    fn map_object(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<MappedObject> {
        let main = self.look_up_object(oid)?;
        let meta = MapType::from_u32(Self::u32_at_offset(self.memory, self.meta_off + (main * 4)))?;
        Some(MappedObject {
            oid: self.oid_from_offset(main, algo)?,
            kind: meta,
        })
    }

    fn map_oid(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<ObjectID> {
        if algo as u32 == oid.algo {
            return Some(oid.clone());
        }

        let main = self.look_up_object(oid)?;
        self.oid_from_offset(main, algo)
    }

    fn oid_from_offset(&self, offset: usize, algo: HashAlgorithm) -> Option<ObjectID> {
        let aparams = self.obj_formats.get(&algo)?;

        let mut hash = [0u8; GIT_MAX_RAWSZ];
        let len = algo.raw_len();
        let oid_off = aparams.full_off + (offset * len);
        hash[0..len].copy_from_slice(&self.memory[oid_off..oid_off + len]);
        Some(ObjectID {
            hash,
            algo: algo as u32,
        })
    }

    fn u32_at_offset(slice: &[u8], offset: usize) -> u32 {
        u32::from_be_bytes(slice[offset..offset + 4].try_into().unwrap())
    }

    fn u64_at_offset(slice: &[u8], offset: usize) -> u64 {
        u64::from_be_bytes(slice[offset..offset + 8].try_into().unwrap())
    }

    fn verify_aligned(offset: usize) -> Result<(), MmapedParseError> {
        if (offset & 3) != 0 {
            return Err(MmapedParseError::UnalignedData);
        }
        Ok(())
    }

    fn verify_valid(slice: &[u8], offset: u64) -> Result<(), MmapedParseError> {
        if offset >= slice.len() as u64 {
            return Err(MmapedParseError::OffsetTooLarge);
        }
        Ok(())
    }
}

/// A map for loose and other non-packed object IDs that maps between a storage and compatibility
/// mapping.
///
/// In addition to the in-memory option, there is an optional batched storage, which can be used to
/// write objects to disk in an efficient way.
pub struct ObjectMap {
    mem: ObjectMemoryMap,
    batch: Option<ObjectMemoryMap>,
}

impl ObjectMap {
    /// Create a new `ObjectMap` with the given hash algorithms.
    ///
    /// This initializes the memory map to automatically map the empty tree, empty blob, and null
    /// object ID.
    pub fn new(storage: HashAlgorithm, compat: HashAlgorithm) -> Self {
        let mut map = ObjectMemoryMap::new(storage, compat);
        for (main, compat) in &[
            (storage.empty_tree(), compat.empty_tree()),
            (storage.empty_blob(), compat.empty_blob()),
            (storage.null_oid(), compat.null_oid()),
        ] {
            map.to_storage.insert(
                (*compat).clone(),
                MappedObject {
                    oid: (*main).clone(),
                    kind: MapType::Reserved,
                },
            );
            map.to_compat.insert(
                (*main).clone(),
                MappedObject {
                    oid: (*compat).clone(),
                    kind: MapType::Reserved,
                },
            );
        }
        Self {
            mem: map,
            batch: None,
        }
    }

    pub fn hash_algo(&self) -> HashAlgorithm {
        self.mem.storage
    }

    /// Start a batch for efficient writing.
    ///
    /// If there is already a batch started, this does nothing and the existing batch is retained.
    pub fn start_batch(&mut self) {
        if self.batch.is_none() {
            self.batch = Some(ObjectMemoryMap::new(self.mem.storage, self.mem.compat));
        }
    }

    pub fn batch_len(&self) -> Option<usize> {
        self.batch.as_ref().map(|b| b.len())
    }

    /// If a batch exists, write it to the writer.
    pub fn finish_batch<W: Write>(&mut self, w: W) -> io::Result<()> {
        if let Some(txn) = self.batch.take() {
            txn.write(w)?;
        }
        Ok(())
    }

    /// If a batch exists, write it to the writer.
    pub fn abort_batch(&mut self) {
        self.batch = None;
    }

    /// Return whether there is a batch already started.
    ///
    /// If you just want a batch to exist and don't care whether one has already been started, you
    /// may simply call `start_batch` unconditionally.
    pub fn has_batch(&self) -> bool {
        self.batch.is_some()
    }

    /// Insert an object into the map.
    ///
    /// If `write` is true and there is a batch started, write the object into the batch as well as
    /// into the memory map.
    pub fn insert(&mut self, oid1: &ObjectID, oid2: &ObjectID, kind: MapType, write: bool) {
        let (compat_oid, storage_oid) =
            if HashAlgorithm::from_u32(oid1.algo) == Some(self.mem.compat) {
                (oid1, oid2)
            } else {
                (oid2, oid1)
            };
        Self::insert_into(&mut self.mem, storage_oid, compat_oid, kind);
        if write {
            if let Some(ref mut batch) = self.batch {
                Self::insert_into(batch, storage_oid, compat_oid, kind);
            }
        }
    }

    fn insert_into(
        map: &mut ObjectMemoryMap,
        storage: &ObjectID,
        compat: &ObjectID,
        kind: MapType,
    ) {
        map.to_compat.insert(
            storage.clone(),
            MappedObject {
                oid: compat.clone(),
                kind,
            },
        );
        map.to_storage.insert(
            compat.clone(),
            MappedObject {
                oid: storage.clone(),
                kind,
            },
        );
    }

    #[allow(dead_code)]
    fn map_object(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<&MappedObject> {
        let map = if algo == self.mem.storage {
            &self.mem.to_storage
        } else {
            &self.mem.to_compat
        };
        map.get(oid)
    }

    #[allow(dead_code)]
    fn map_oid<'a, 'b: 'a>(
        &'b self,
        oid: &'a ObjectID,
        algo: HashAlgorithm,
    ) -> Option<&'a ObjectID> {
        if algo as u32 == oid.algo {
            return Some(oid);
        }
        let entry = self.map_object(oid, algo);
        entry.map(|obj| &obj.oid)
    }
}

#[cfg(test)]
mod tests {
    use super::{MapType, MmapedObjectMap, ObjectMap, ObjectMemoryMap};
    use crate::hash::{CryptoDigest, CryptoHasher, HashAlgorithm, ObjectID};
    use std::convert::TryInto;
    use std::io::{self, Cursor, Write};

    struct TrailingWriter {
        curs: Cursor<Vec<u8>>,
        hasher: CryptoHasher,
    }

    impl TrailingWriter {
        fn new() -> Self {
            Self {
                curs: Cursor::new(Vec::new()),
                hasher: CryptoHasher::new(HashAlgorithm::SHA256),
            }
        }

        fn finalize(mut self) -> Vec<u8> {
            let _ = self.hasher.flush();
            let mut v = self.curs.into_inner();
            v.extend(self.hasher.into_vec());
            v
        }
    }

    impl Write for TrailingWriter {
        fn write(&mut self, data: &[u8]) -> io::Result<usize> {
            self.hasher.write_all(data)?;
            self.curs.write_all(data)?;
            Ok(data.len())
        }

        fn flush(&mut self) -> io::Result<()> {
            self.hasher.flush()?;
            self.curs.flush()?;
            Ok(())
        }
    }

    fn sha1_oid(b: &[u8]) -> ObjectID {
        assert_eq!(b.len(), 20);
        let mut data = [0u8; 32];
        data[0..20].copy_from_slice(b);
        ObjectID {
            hash: data,
            algo: HashAlgorithm::SHA1 as u32,
        }
    }

    fn sha256_oid(b: &[u8]) -> ObjectID {
        assert_eq!(b.len(), 32);
        ObjectID {
            hash: b.try_into().unwrap(),
            algo: HashAlgorithm::SHA256 as u32,
        }
    }

    #[allow(clippy::type_complexity)]
    fn test_entries() -> &'static [(&'static str, &'static [u8], &'static [u8], MapType, bool)] {
        // These are all example blobs containing the content in the first argument.
        &[
            ("abc", b"\xf2\xba\x8f\x84\xab\x5c\x1b\xce\x84\xa7\xb4\x41\xcb\x19\x59\xcf\xc7\x09\x3b\x7f", b"\xc1\xcf\x6e\x46\x50\x77\x93\x0e\x88\xdc\x51\x36\x64\x1d\x40\x2f\x72\xa2\x29\xdd\xd9\x96\xf6\x27\xd6\x0e\x96\x39\xea\xba\x35\xa6", MapType::LooseObject, false),
            ("def", b"\x0c\x00\x38\x32\xe7\xbf\xa9\xca\x8b\x5c\x20\x35\xc9\xbd\x68\x4a\x5f\x26\x23\xbc", b"\x8a\x90\x17\x26\x48\x4d\xb0\xf2\x27\x9f\x30\x8d\x58\x96\xd9\x6b\xf6\x3a\xd6\xde\x95\x7c\xa3\x8a\xdc\x33\x61\x68\x03\x6e\xf6\x63", MapType::Shallow, true),
            ("ghi", b"\x45\xa8\x2e\x29\x5c\x52\x47\x31\x14\xc5\x7c\x18\xf4\xf5\x23\x68\xdf\x2a\x3c\xfd", b"\x6e\x47\x4c\x74\xf5\xd7\x78\x14\xc7\xf7\xf0\x7c\x37\x80\x07\x90\x53\x42\xaf\x42\x81\xe6\x86\x8d\x33\x46\x45\x4b\xb8\x63\xab\xc3", MapType::Submodule, false),
            ("jkl", b"\x45\x32\x8c\x36\xff\x2e\x9b\x9b\x4e\x59\x2c\x84\x7d\x3f\x9a\x7f\xd9\xb3\xe7\x16", b"\xc3\xee\xf7\x54\xa2\x1e\xc6\x9d\x43\x75\xbe\x6f\x18\x47\x89\xa8\x11\x6f\xd9\x66\xfc\x67\xdc\x31\xd2\x11\x15\x42\xc8\xd5\xa0\xaf", MapType::LooseObject, true),
        ]
    }

    fn test_map(write_all: bool) -> Box<ObjectMap> {
        let mut map = Box::new(ObjectMap::new(HashAlgorithm::SHA256, HashAlgorithm::SHA1));

        map.start_batch();

        for (_blob_content, sha1, sha256, kind, swap) in test_entries() {
            let s256 = sha256_oid(sha256);
            let s1 = sha1_oid(sha1);
            let write = write_all || (*kind as u32 & 2) == 0;
            if *swap {
                // Insert the item into the batch arbitrarily based on the type.  This tests that
                // we can specify either order and we'll do the right thing.
                map.insert(&s256, &s1, *kind, write);
            } else {
                map.insert(&s1, &s256, *kind, write);
            }
        }

        map
    }

    #[test]
    fn can_read_and_write_format() {
        for full in &[true, false] {
            let mut map = test_map(*full);
            let mut wrtr = TrailingWriter::new();
            map.finish_batch(&mut wrtr).unwrap();

            assert!(!map.has_batch());

            let data = wrtr.finalize();
            MmapedObjectMap::new(&data, HashAlgorithm::SHA256).unwrap();
        }
    }

    #[test]
    fn looks_up_from_mmaped() {
        let mut map = test_map(true);
        let mut wrtr = TrailingWriter::new();
        map.finish_batch(&mut wrtr).unwrap();

        assert!(!map.has_batch());

        let data = wrtr.finalize();
        let entries = test_entries();
        let map = MmapedObjectMap::new(&data, HashAlgorithm::SHA256).unwrap();

        for (_, sha1, sha256, kind, _) in entries {
            let s256 = sha256_oid(sha256);
            let s1 = sha1_oid(sha1);

            let res = map.map_object(&s256, HashAlgorithm::SHA1).unwrap();
            assert_eq!(res.oid, s1);
            assert_eq!(res.kind, *kind);
            let res = map.map_oid(&s256, HashAlgorithm::SHA1).unwrap();
            assert_eq!(res, s1);

            let res = map.map_object(&s256, HashAlgorithm::SHA256).unwrap();
            assert_eq!(res.oid, s256);
            assert_eq!(res.kind, *kind);
            let res = map.map_oid(&s256, HashAlgorithm::SHA256).unwrap();
            assert_eq!(res, s256);

            let res = map.map_object(&s1, HashAlgorithm::SHA256).unwrap();
            assert_eq!(res.oid, s256);
            assert_eq!(res.kind, *kind);
            let res = map.map_oid(&s1, HashAlgorithm::SHA256).unwrap();
            assert_eq!(res, s256);

            let res = map.map_object(&s1, HashAlgorithm::SHA1).unwrap();
            assert_eq!(res.oid, s1);
            assert_eq!(res.kind, *kind);
            let res = map.map_oid(&s1, HashAlgorithm::SHA1).unwrap();
            assert_eq!(res, s1);
        }

        for octet in &[0x00u8, 0x6d, 0x6e, 0x8a, 0xff] {
            let missing_oid = ObjectID {
                hash: [*octet; 32],
                algo: HashAlgorithm::SHA256 as u32,
            };

            assert!(map.map_object(&missing_oid, HashAlgorithm::SHA1).is_none());
            assert!(map.map_oid(&missing_oid, HashAlgorithm::SHA1).is_none());

            assert_eq!(
                map.map_oid(&missing_oid, HashAlgorithm::SHA256).unwrap(),
                missing_oid
            );
        }
    }

    #[test]
    fn binary_searches_slices_correctly() {
        let sl = &[
            0, 1, 2, 15, 14, 13, 18, 10, 2, 20, 20, 20, 21, 21, 0, 21, 21, 1, 21, 21, 21, 21, 21,
            22, 22, 23, 24,
        ];

        let expected: &[(&[u8], Option<usize>)] = &[
            (&[0, 1, 2], Some(0)),
            (&[15, 14, 13], Some(1)),
            (&[18, 10, 2], Some(2)),
            (&[20, 20, 20], Some(3)),
            (&[21, 21, 0], Some(4)),
            (&[21, 21, 1], Some(5)),
            (&[21, 21, 21], Some(6)),
            (&[21, 21, 22], Some(7)),
            (&[22, 23, 24], Some(8)),
            (&[2, 15, 14], None),
            (&[0, 21, 21], None),
            (&[21, 21, 23], None),
            (&[22, 22, 23], None),
            (&[0xff, 0xff, 0xff], None),
            (&[0, 0, 0], None),
        ];

        for (wanted, value) in expected {
            assert_eq!(MmapedObjectMap::binary_search_slice(sl, wanted), *value);
        }
    }

    #[test]
    fn looks_up_oid_correctly() {
        let map = test_map(false);
        let entries = test_entries();

        let s256 = sha256_oid(entries[0].2);
        let s1 = sha1_oid(entries[0].1);

        let missing_oid = ObjectID {
            hash: [0xffu8; 32],
            algo: HashAlgorithm::SHA256 as u32,
        };

        let res = map.map_object(&s256, HashAlgorithm::SHA1).unwrap();
        assert_eq!(res.oid, s1);
        assert_eq!(res.kind, MapType::LooseObject);
        let res = map.map_oid(&s256, HashAlgorithm::SHA1).unwrap();
        assert_eq!(*res, s1);

        let res = map.map_object(&s1, HashAlgorithm::SHA256).unwrap();
        assert_eq!(res.oid, s256);
        assert_eq!(res.kind, MapType::LooseObject);
        let res = map.map_oid(&s1, HashAlgorithm::SHA256).unwrap();
        assert_eq!(*res, s256);

        assert!(map.map_object(&missing_oid, HashAlgorithm::SHA1).is_none());
        assert!(map.map_oid(&missing_oid, HashAlgorithm::SHA1).is_none());

        assert_eq!(
            *map.map_oid(&missing_oid, HashAlgorithm::SHA256).unwrap(),
            missing_oid
        );
    }

    #[test]
    fn looks_up_known_oids_correctly() {
        let map = test_map(false);

        let funcs: &[&dyn Fn(HashAlgorithm) -> &'static ObjectID] = &[
            &|h: HashAlgorithm| h.empty_tree(),
            &|h: HashAlgorithm| h.empty_blob(),
            &|h: HashAlgorithm| h.null_oid(),
        ];

        for f in funcs {
            let s256 = f(HashAlgorithm::SHA256);
            let s1 = f(HashAlgorithm::SHA1);

            let res = map.map_object(s256, HashAlgorithm::SHA1).unwrap();
            assert_eq!(res.oid, *s1);
            assert_eq!(res.kind, MapType::Reserved);
            let res = map.map_oid(s256, HashAlgorithm::SHA1).unwrap();
            assert_eq!(*res, *s1);

            let res = map.map_object(s1, HashAlgorithm::SHA256).unwrap();
            assert_eq!(res.oid, *s256);
            assert_eq!(res.kind, MapType::Reserved);
            let res = map.map_oid(s1, HashAlgorithm::SHA256).unwrap();
            assert_eq!(*res, *s256);
        }
    }

    #[test]
    fn nul_padding() {
        assert_eq!(ObjectMemoryMap::required_nul_padding(1, 1), 3);
        assert_eq!(ObjectMemoryMap::required_nul_padding(2, 1), 2);
        assert_eq!(ObjectMemoryMap::required_nul_padding(3, 1), 1);
        assert_eq!(ObjectMemoryMap::required_nul_padding(2, 2), 0);

        assert_eq!(ObjectMemoryMap::required_nul_padding(39, 3), 3);
    }
}