diff --git a/gix-pack/src/cache/delta/traverse/mod.rs b/gix-pack/src/cache/delta/traverse/mod.rs index e43b99a711d..c3ad93ecc07 100644 --- a/gix-pack/src/cache/delta/traverse/mod.rs +++ b/gix-pack/src/cache/delta/traverse/mod.rs @@ -170,7 +170,7 @@ where node, state, resolve_data, - object_hash.len_in_bytes(), + object_hash, threads_left, should_interrupt, ) diff --git a/gix-pack/src/cache/delta/traverse/resolve.rs b/gix-pack/src/cache/delta/traverse/resolve.rs index d93780f222d..292102d3f1b 100644 --- a/gix-pack/src/cache/delta/traverse/resolve.rs +++ b/gix-pack/src/cache/delta/traverse/resolve.rs @@ -111,7 +111,7 @@ pub(super) unsafe fn deltas( child_items, }: &mut State<'_, F, MBFN, T>, resolve_data: &R, - hash_len: usize, + object_hash: gix_hash::Kind, threads_left: &AtomicIsize, should_interrupt: &AtomicBool, ) -> Result<(), Error> @@ -128,7 +128,7 @@ where let bytes = resolve(slice.clone(), resolve_data).ok_or(Error::ResolveFailed { pack_offset: slice.start, })?; - let entry = data::Entry::from_bytes(bytes, slice.start, hash_len)?; + let entry = data::Entry::from_bytes(bytes, slice.start, object_hash)?; let compressed = &bytes[entry.header_size()..]; let decompressed_len = entry.decompressed_size as usize; decompress_all_at_once_with(&mut inflate, compressed, decompressed_len, out)?; @@ -239,7 +239,7 @@ where resolve.clone(), resolve_data, modify_base.clone(), - hash_len, + object_hash, threads_left, should_interrupt, ); @@ -264,7 +264,7 @@ fn deltas_mt( resolve: F, resolve_data: &R, modify_base: MBFN, - hash_len: usize, + object_hash: gix_hash::Kind, threads_left: &AtomicIsize, should_interrupt: &AtomicBool, ) -> Result<(), Error> @@ -304,7 +304,7 @@ where let bytes = resolve(slice.clone(), resolve_data).ok_or(Error::ResolveFailed { pack_offset: slice.start, })?; - let entry = data::Entry::from_bytes(bytes, slice.start, hash_len)?; + let entry = data::Entry::from_bytes(bytes, slice.start, object_hash)?; let compressed = &bytes[entry.header_size()..]; let decompressed_len = entry.decompressed_size as usize; decompress_all_at_once_with(&mut inflate, compressed, decompressed_len, out)?; diff --git a/gix-pack/src/data/entry/decode.rs b/gix-pack/src/data/entry/decode.rs index dd1fdb67b04..a61e9237877 100644 --- a/gix-pack/src/data/entry/decode.rs +++ b/gix-pack/src/data/entry/decode.rs @@ -20,12 +20,9 @@ pub enum Error { /// Decoding impl data::Entry { /// Decode an entry from the given entry data `d`, providing the `pack_offset` to allow tracking the start of the entry data section. - /// - /// # Panics - /// - /// If we cannot understand the header, garbage data is likely to trigger this. - pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> Result { + pub fn from_bytes(d: &[u8], pack_offset: data::Offset, object_hash: gix_hash::Kind) -> Result { let (type_id, size, mut consumed) = parse_header_info(d)?; + let hash_len = object_hash.len_in_bytes(); use crate::data::entry::Header::*; let object = match type_id { @@ -38,12 +35,16 @@ impl data::Entry { delta } REF_DELTA => { + let hash = d + .get(consumed..) + .and_then(|d| d.get(..hash_len)) + .ok_or(Error::Corrupt { + message: "ref-delta base object id", + })?; let delta = RefDelta { - base_id: gix_hash::ObjectId::from_bytes_or_panic(d.get(consumed..consumed + hash_len).ok_or( - Error::Corrupt { - message: "ref-delta base object id", - }, - )?), + base_id: gix_hash::ObjectId::try_from(hash).map_err(|_| Error::Corrupt { + message: "unsupported object hash length", + })?, }; consumed += hash_len; delta @@ -58,7 +59,7 @@ impl data::Entry { header: object, decompressed_size: size, data_offset: pack_offset + consumed as u64, - encoded_header_size: consumed.try_into().expect("pack entry headers fit into u16"), + encoded_header_size: encoded_header_size(consumed)?, }) } @@ -97,11 +98,18 @@ impl data::Entry { header: object, decompressed_size: size, data_offset: pack_offset + consumed as u64, - encoded_header_size: consumed.try_into().expect("pack entry headers fit into u16"), + encoded_header_size: encoded_header_size(consumed) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?, }) } } +fn encoded_header_size(consumed: usize) -> Result { + consumed.try_into().map_err(|_| Error::Corrupt { + message: "entry header size does not fit into u16", + }) +} + #[inline] fn streaming_parse_header_info(read: &mut dyn io::Read) -> Result<(u8, u64, usize), io::Error> { let mut byte = [0u8; 1]; @@ -176,7 +184,7 @@ mod tests { #[test] fn accepts_non_canonical_pack_entry_header_encoding() { let pack_offset = 42; - let entry = data::Entry::from_bytes(&[0xb3, 0x00], pack_offset, gix_hash::Kind::Sha1.len_in_bytes()) + let entry = data::Entry::from_bytes(&[0xb3, 0x00], pack_offset, gix_hash::Kind::Sha1) .expect("non-canonical size encodings are accepted by git"); assert_eq!(entry.header, data::entry::Header::Blob); @@ -206,12 +214,8 @@ mod tests { fn non_canonical_pack_entry_header_keeps_ofs_delta_base_offsets_correct() { let pack_offset = 100; let base_distance = 5; - let entry = data::Entry::from_bytes( - &[0xe4, 0x00, base_distance], - pack_offset, - gix_hash::Kind::Sha1.len_in_bytes(), - ) - .expect("non-canonical ofs-delta size encodings are accepted by git"); + let entry = data::Entry::from_bytes(&[0xe4, 0x00, base_distance], pack_offset, gix_hash::Kind::Sha1) + .expect("non-canonical ofs-delta size encodings are accepted by git"); assert_eq!( entry.header, @@ -236,4 +240,15 @@ mod tests { "ofs-delta base distances are relative to the entry start, so preserving `pack_offset()` keeps the base lookup correct" ); } + + #[test] + fn oversized_encoded_header_size_is_rejected() { + assert!( + matches!( + encoded_header_size(usize::from(u16::MAX) + 1), + Err(Error::Corrupt { message }) if message == "entry header size does not fit into u16" + ), + "entry header lengths that cannot be stored in the Entry metadata must be rejected" + ); + } } diff --git a/gix-pack/src/data/file/decode/entry.rs b/gix-pack/src/data/file/decode/entry.rs index 2a3fc4835df..11109ccd060 100644 --- a/gix-pack/src/data/file/decode/entry.rs +++ b/gix-pack/src/data/file/decode/entry.rs @@ -111,7 +111,7 @@ where } let object_data = &self.data[pack_offset..]; - data::Entry::from_bytes(object_data, offset, self.hash_len) + data::Entry::from_bytes(object_data, offset, self.object_hash) } /// Decompress the object expected at the given data offset, sans pack header. This information is only diff --git a/gix-pack/src/data/file/init.rs b/gix-pack/src/data/file/init.rs index 0511469a59f..579135b4127 100644 --- a/gix-pack/src/data/file/init.rs +++ b/gix-pack/src/data/file/init.rs @@ -51,7 +51,6 @@ where id, version: kind, num_objects, - hash_len, object_hash, alloc_limit_bytes: None, }) diff --git a/gix-pack/src/data/file/verify.rs b/gix-pack/src/data/file/verify.rs index 49f2fb87b44..a01fa7110ce 100644 --- a/gix-pack/src/data/file/verify.rs +++ b/gix-pack/src/data/file/verify.rs @@ -17,7 +17,7 @@ where { /// The checksum in the trailer of this pack data file pub fn checksum(&self) -> gix_hash::ObjectId { - gix_hash::ObjectId::from_bytes_or_panic(&self.data[self.data.len() - self.hash_len..]) + gix_hash::ObjectId::from_bytes_or_panic(&self.data[self.data.len() - self.object_hash.len_in_bytes()..]) } /// Verifies that the checksum of the packfile over all bytes preceding it indeed matches the actual checksum, diff --git a/gix-pack/src/data/mod.rs b/gix-pack/src/data/mod.rs index 50bc002db1c..d348a666cfa 100644 --- a/gix-pack/src/data/mod.rs +++ b/gix-pack/src/data/mod.rs @@ -96,9 +96,8 @@ pub struct File { pub id: Id, version: Version, num_objects: u32, - /// The size of the hash contained within. This is entirely determined by the caller, and repositories have to know which hash to use + /// The kind of hash contained within. This is entirely determined by the caller, and repositories have to know which hash to use /// based on their configuration. - hash_len: usize, object_hash: gix_hash::Kind, /// The maximum size of a single allocation caused by user-controlled on-disk pack data. /// @@ -135,7 +134,7 @@ where } /// The position of the byte one past the last pack entry, or in other terms, the first byte of the trailing hash. pub fn pack_end(&self) -> usize { - self.data.len() - self.hash_len + self.data.len() - self.object_hash.len_in_bytes() } /// The path to the pack data file on disk diff --git a/gix-pack/src/data/output/entry/mod.rs b/gix-pack/src/data/output/entry/mod.rs index 0f9ff877610..2fd57e57e33 100644 --- a/gix-pack/src/data/output/entry/mod.rs +++ b/gix-pack/src/data/output/entry/mod.rs @@ -75,8 +75,7 @@ impl output::Entry { } let pack_offset_must_be_zero = 0; - let pack_entry = match data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.as_slice().len()) - { + let pack_entry = match data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.kind()) { Ok(e) => e, Err(err) => return Some(Err(err.into())), }; diff --git a/gix-pack/tests/pack/data/fuzzed.rs b/gix-pack/tests/pack/data/fuzzed.rs index 499ffe064cc..2396b34cabb 100644 --- a/gix-pack/tests/pack/data/fuzzed.rs +++ b/gix-pack/tests/pack/data/fuzzed.rs @@ -21,7 +21,7 @@ fn artifact_inputs_can_be_opened_without_panicking() { /// while attempting to read the base object id. #[test] fn truncated_ref_delta_metadata_is_reported_without_panicking() { - let result = catch_unwind(|| data::Entry::from_bytes(&[0x70], 0, gix_hash::Kind::Sha1.len_in_bytes())); + let result = catch_unwind(|| data::Entry::from_bytes(&[0x70], 0, gix_hash::Kind::Sha1)); assert!( result diff --git a/gix-pack/tests/pack/iter.rs b/gix-pack/tests/pack/iter.rs index 9571bde872d..3fb6e277f78 100644 --- a/gix-pack/tests/pack/iter.rs +++ b/gix-pack/tests/pack/iter.rs @@ -37,8 +37,7 @@ mod new_from_header { let mut buf = Vec::::new(); entry.header.write_to(entry.decompressed_size, &mut buf)?; - let new_entry = - pack::data::Entry::from_bytes(&buf, entry.pack_offset, gix_hash::Kind::Sha1.len_in_bytes())?; + let new_entry = pack::data::Entry::from_bytes(&buf, entry.pack_offset, gix_hash::Kind::Sha1)?; assert_eq!( new_entry.header_size(),