From f8aee361291fc9505e62f9a75a60ae6e3905bcb6 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Thu, 25 Nov 2021 11:39:39 -0500 Subject: [PATCH] miniso: handle hardlinked files On ppc64le, some files are hardlinked. The way hardlinks are represented in ISO9660 is simply that the dirents point to the same sector and have identical sizes. So they show up as exact copies in the table. So we can get rid of them by deduplicating. Rust's `Vec::dedup()` makes this easy. See: https://github.com/coreos/coreos-assembler/issues/2583 --- src/iso9660.rs | 2 +- src/miniso.rs | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/iso9660.rs b/src/iso9660.rs index 9f5482d..7b82fcf 100644 --- a/src/iso9660.rs +++ b/src/iso9660.rs @@ -221,7 +221,7 @@ pub struct File { pub length: u32, } -#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] pub struct Address(u32); impl Address { diff --git a/src/miniso.rs b/src/miniso.rs index 63bea6c..9d2ce0b 100644 --- a/src/miniso.rs +++ b/src/miniso.rs @@ -47,7 +47,7 @@ impl Table { fn new( full_files: &HashMap, minimal_files: &HashMap, - ) -> Result { + ) -> Result<(Self, usize)> { let mut entries: Vec = Vec::new(); for (path, minimal_entry) in minimal_files { let full_entry = full_files @@ -67,9 +67,13 @@ impl Table { } entries.sort_by_key(|e| e.minimal.as_sector()); + // drop duplicate entries (hardlinks), and calculate how many there were for reporting + let size = entries.len(); + entries.dedup(); + let hardlinks = size - entries.len(); let table = Table { entries }; table.validate().context("validating table")?; - Ok(table) + Ok((table, hardlinks)) } fn validate(&self) -> Result<()> { @@ -91,7 +95,7 @@ impl Table { } } -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] struct TableEntry { minimal: iso9660::Address, full: iso9660::Address, @@ -148,7 +152,7 @@ impl Data { full_files: &HashMap, minimal_files: &HashMap, ) -> Result<(Self, usize, u64, u64, u64)> { - let table = Table::new(full_files, minimal_files)?; + let (table, hardlinks) = Table::new(full_files, minimal_files)?; // A `ReadHasher` here would let us wrap the miniso so we calculate the digest as we read. let digest = Sha256Digest::from_file(miniso)?; @@ -183,7 +187,7 @@ impl Data { copy(miniso, &mut xzw).context("copying remaining miniso bytes")?; xzw.try_finish().context("trying to finish xz stream")?; - let matches = table.entries.len(); + let matches = table.entries.len() + hardlinks; let written = xzw.total_in(); let written_compressed = xzw.total_out(); Ok(( -- 2.33.1