Coverage Report

Created: 2025-12-31 06:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tar-rs/src/builder.rs
Line
Count
Source
1
use std::fs;
2
use std::io;
3
use std::io::prelude::*;
4
use std::path::Path;
5
use std::str;
6
7
use crate::header::BLOCK_SIZE;
8
use crate::header::GNU_SPARSE_HEADERS_COUNT;
9
use crate::header::{path2bytes, HeaderMode};
10
use crate::GnuExtSparseHeader;
11
use crate::{other, EntryType, Header};
12
13
/// A structure for building archives
14
///
15
/// This structure has methods for building up an archive from scratch into any
16
/// arbitrary writer.
17
pub struct Builder<W: Write> {
18
    options: BuilderOptions,
19
    finished: bool,
20
    obj: Option<W>,
21
}
22
23
#[derive(Clone, Copy)]
24
struct BuilderOptions {
25
    mode: HeaderMode,
26
    follow: bool,
27
    sparse: bool,
28
}
29
30
impl<W: Write> Builder<W> {
31
    /// Create a new archive builder with the underlying object as the
32
    /// destination of all data written. The builder will use
33
    /// `HeaderMode::Complete` by default.
34
5.12k
    pub fn new(obj: W) -> Builder<W> {
35
5.12k
        Builder {
36
5.12k
            options: BuilderOptions {
37
5.12k
                mode: HeaderMode::Complete,
38
5.12k
                follow: true,
39
5.12k
                sparse: true,
40
5.12k
            },
41
5.12k
            finished: false,
42
5.12k
            obj: Some(obj),
43
5.12k
        }
44
5.12k
    }
<tar::builder::Builder<alloc::vec::Vec<u8>>>::new
Line
Count
Source
34
2.30k
    pub fn new(obj: W) -> Builder<W> {
35
2.30k
        Builder {
36
2.30k
            options: BuilderOptions {
37
2.30k
                mode: HeaderMode::Complete,
38
2.30k
                follow: true,
39
2.30k
                sparse: true,
40
2.30k
            },
41
2.30k
            finished: false,
42
2.30k
            obj: Some(obj),
43
2.30k
        }
44
2.30k
    }
<tar::builder::Builder<std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::new
Line
Count
Source
34
2.82k
    pub fn new(obj: W) -> Builder<W> {
35
2.82k
        Builder {
36
2.82k
            options: BuilderOptions {
37
2.82k
                mode: HeaderMode::Complete,
38
2.82k
                follow: true,
39
2.82k
                sparse: true,
40
2.82k
            },
41
2.82k
            finished: false,
42
2.82k
            obj: Some(obj),
43
2.82k
        }
44
2.82k
    }
45
46
    /// Changes the HeaderMode that will be used when reading fs Metadata for
47
    /// methods that implicitly read metadata for an input Path. Notably, this
48
    /// does _not_ apply to `append(Header)`.
49
    pub fn mode(&mut self, mode: HeaderMode) {
50
        self.options.mode = mode;
51
    }
52
53
    /// Follow symlinks, archiving the contents of the file they point to rather
54
    /// than adding a symlink to the archive. Defaults to true.
55
    ///
56
    /// When true, it exhibits the same behavior as GNU `tar` command's
57
    /// `--dereference` or `-h` options <https://man7.org/linux/man-pages/man1/tar.1.html>.
58
    pub fn follow_symlinks(&mut self, follow: bool) {
59
        self.options.follow = follow;
60
    }
61
62
    /// Handle sparse files efficiently, if supported by the underlying
63
    /// filesystem. When true, sparse file information is read from disk and
64
    /// empty segments are omitted from the archive. Defaults to true.
65
    pub fn sparse(&mut self, sparse: bool) {
66
        self.options.sparse = sparse;
67
    }
68
69
    /// Gets shared reference to the underlying object.
70
    pub fn get_ref(&self) -> &W {
71
        self.obj.as_ref().unwrap()
72
    }
73
74
    /// Gets mutable reference to the underlying object.
75
    ///
76
    /// Note that care must be taken while writing to the underlying
77
    /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
78
    /// useful in the situations when one needs to be ensured that
79
    /// tar entry was flushed to the disk.
80
1.61M
    pub fn get_mut(&mut self) -> &mut W {
81
1.61M
        self.obj.as_mut().unwrap()
82
1.61M
    }
<tar::builder::Builder<alloc::vec::Vec<u8>>>::get_mut
Line
Count
Source
80
1.59M
    pub fn get_mut(&mut self) -> &mut W {
81
1.59M
        self.obj.as_mut().unwrap()
82
1.59M
    }
<tar::builder::Builder<std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::get_mut
Line
Count
Source
80
13.1k
    pub fn get_mut(&mut self) -> &mut W {
81
13.1k
        self.obj.as_mut().unwrap()
82
13.1k
    }
83
84
    /// Unwrap this archive, returning the underlying object.
85
    ///
86
    /// This function will finish writing the archive if the `finish` function
87
    /// hasn't yet been called, returning any I/O error which happens during
88
    /// that operation.
89
2.30k
    pub fn into_inner(mut self) -> io::Result<W> {
90
2.30k
        if !self.finished {
91
2.30k
            self.finish()?;
92
0
        }
93
2.30k
        Ok(self.obj.take().unwrap())
94
2.30k
    }
95
96
    /// Adds a new entry to this archive.
97
    ///
98
    /// This function will append the header specified, followed by contents of
99
    /// the stream specified by `data`. To produce a valid archive the `size`
100
    /// field of `header` must be the same as the length of the stream that's
101
    /// being written. Additionally the checksum for the header should have been
102
    /// set via the `set_cksum` method.
103
    ///
104
    /// Note that this will not attempt to seek the archive to a valid position,
105
    /// so if the archive is in the middle of a read or some other similar
106
    /// operation then this may corrupt the archive.
107
    ///
108
    /// Also note that after all entries have been written to an archive the
109
    /// `finish` function needs to be called to finish writing the archive.
110
    ///
111
    /// # Errors
112
    ///
113
    /// This function will return an error for any intermittent I/O error which
114
    /// occurs when either reading or writing.
115
    ///
116
    /// # Examples
117
    ///
118
    /// ```
119
    /// use tar::{Builder, Header};
120
    ///
121
    /// let mut header = Header::new_gnu();
122
    /// header.set_path("foo").unwrap();
123
    /// header.set_size(4);
124
    /// header.set_cksum();
125
    ///
126
    /// let mut data: &[u8] = &[1, 2, 3, 4];
127
    ///
128
    /// let mut ar = Builder::new(Vec::new());
129
    /// ar.append(&header, data).unwrap();
130
    /// let data = ar.into_inner().unwrap();
131
    /// ```
132
459k
    pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
133
459k
        append(self.get_mut(), header, &mut data)
134
459k
    }
<tar::builder::Builder<alloc::vec::Vec<u8>>>::append::<&mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>
Line
Count
Source
132
458k
    pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
133
458k
        append(self.get_mut(), header, &mut data)
134
458k
    }
<tar::builder::Builder<std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::append::<std::io::cursor::Cursor<&alloc::vec::Vec<u8>>>
Line
Count
Source
132
1.00k
    pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
133
1.00k
        append(self.get_mut(), header, &mut data)
134
1.00k
    }
<tar::builder::Builder<std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::append::<std::io::util::Empty>
Line
Count
Source
132
316
    pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
133
316
        append(self.get_mut(), header, &mut data)
134
316
    }
135
136
    /// Adds a new entry to this archive with the specified path.
137
    ///
138
    /// This function will set the specified path in the given header, which may
139
    /// require appending a GNU long-name extension entry to the archive first.
140
    /// The checksum for the header will be automatically updated via the
141
    /// `set_cksum` method after setting the path. No other metadata in the
142
    /// header will be modified.
143
    ///
144
    /// Then it will append the header, followed by contents of the stream
145
    /// specified by `data`. To produce a valid archive the `size` field of
146
    /// `header` must be the same as the length of the stream that's being
147
    /// written.
148
    ///
149
    /// Note that this will not attempt to seek the archive to a valid position,
150
    /// so if the archive is in the middle of a read or some other similar
151
    /// operation then this may corrupt the archive.
152
    ///
153
    /// Also note that after all entries have been written to an archive the
154
    /// `finish` function needs to be called to finish writing the archive.
155
    ///
156
    /// # Errors
157
    ///
158
    /// This function will return an error for any intermittent I/O error which
159
    /// occurs when either reading or writing.
160
    ///
161
    /// # Examples
162
    ///
163
    /// ```
164
    /// use tar::{Builder, Header};
165
    ///
166
    /// let mut header = Header::new_gnu();
167
    /// header.set_size(4);
168
    /// header.set_cksum();
169
    ///
170
    /// let mut data: &[u8] = &[1, 2, 3, 4];
171
    ///
172
    /// let mut ar = Builder::new(Vec::new());
173
    /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap();
174
    /// let data = ar.into_inner().unwrap();
175
    /// ```
176
1.12M
    pub fn append_data<P: AsRef<Path>, R: Read>(
177
1.12M
        &mut self,
178
1.12M
        header: &mut Header,
179
1.12M
        path: P,
180
1.12M
        data: R,
181
1.12M
    ) -> io::Result<()> {
182
1.12M
        prepare_header_path(self.get_mut(), header, path.as_ref())?;
183
459k
        header.set_cksum();
184
459k
        self.append(header, data)
185
1.12M
    }
<tar::builder::Builder<alloc::vec::Vec<u8>>>::append_data::<&str, &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>
Line
Count
Source
176
1.12M
    pub fn append_data<P: AsRef<Path>, R: Read>(
177
1.12M
        &mut self,
178
1.12M
        header: &mut Header,
179
1.12M
        path: P,
180
1.12M
        data: R,
181
1.12M
    ) -> io::Result<()> {
182
1.12M
        prepare_header_path(self.get_mut(), header, path.as_ref())?;
183
458k
        header.set_cksum();
184
458k
        self.append(header, data)
185
1.12M
    }
<tar::builder::Builder<std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::append_data::<&alloc::string::String, std::io::cursor::Cursor<&alloc::vec::Vec<u8>>>
Line
Count
Source
176
2.82k
    pub fn append_data<P: AsRef<Path>, R: Read>(
177
2.82k
        &mut self,
178
2.82k
        header: &mut Header,
179
2.82k
        path: P,
180
2.82k
        data: R,
181
2.82k
    ) -> io::Result<()> {
182
2.82k
        prepare_header_path(self.get_mut(), header, path.as_ref())?;
183
1.00k
        header.set_cksum();
184
1.00k
        self.append(header, data)
185
2.82k
    }
186
187
    /// Adds a new entry to this archive and returns an [`EntryWriter`] for
188
    /// adding its contents.
189
    ///
190
    /// This function is similar to [`Self::append_data`] but returns a
191
    /// [`io::Write`] implementation instead of taking data as a parameter.
192
    ///
193
    /// Similar constraints around the position of the archive and completion
194
    /// apply as with [`Self::append_data`]. It requires the underlying writer
195
    /// to implement [`Seek`] to update the header after writing the data.
196
    ///
197
    /// # Errors
198
    ///
199
    /// This function will return an error for any intermittent I/O error which
200
    /// occurs when either reading or writing.
201
    ///
202
    /// # Examples
203
    ///
204
    /// ```
205
    /// use std::io::Cursor;
206
    /// use std::io::Write as _;
207
    /// use tar::{Builder, Header};
208
    ///
209
    /// let mut header = Header::new_gnu();
210
    ///
211
    /// let mut ar = Builder::new(Cursor::new(Vec::new()));
212
    /// let mut entry = ar.append_writer(&mut header, "hi.txt").unwrap();
213
    /// entry.write_all(b"Hello, ").unwrap();
214
    /// entry.write_all(b"world!\n").unwrap();
215
    /// entry.finish().unwrap();
216
    /// ```
217
    pub fn append_writer<'a, P: AsRef<Path>>(
218
        &'a mut self,
219
        header: &'a mut Header,
220
        path: P,
221
    ) -> io::Result<EntryWriter<'a>>
222
    where
223
        W: Seek,
224
    {
225
        EntryWriter::start(self.get_mut(), header, path.as_ref())
226
    }
227
228
    /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target.
229
    ///
230
    /// This function is similar to [`Self::append_data`] which supports long filenames,
231
    /// but also supports long link targets using GNU extensions if necessary.
232
    /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`].
233
    /// The `set_cksum` method will be invoked after setting the path. No other metadata in the
234
    /// header will be modified.
235
    ///
236
    /// If you are intending to use GNU extensions, you must use this method over calling
237
    /// [`Header::set_link_name`] because that function will fail on long links.
238
    ///
239
    /// Similar constraints around the position of the archive and completion
240
    /// apply as with [`Self::append_data`].
241
    ///
242
    /// # Errors
243
    ///
244
    /// This function will return an error for any intermittent I/O error which
245
    /// occurs when either reading or writing.
246
    ///
247
    /// # Examples
248
    ///
249
    /// ```
250
    /// use tar::{Builder, Header, EntryType};
251
    ///
252
    /// let mut ar = Builder::new(Vec::new());
253
    /// let mut header = Header::new_gnu();
254
    /// header.set_username("foo");
255
    /// header.set_entry_type(EntryType::Symlink);
256
    /// header.set_size(0);
257
    /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap();
258
    /// let data = ar.into_inner().unwrap();
259
    /// ```
260
2.82k
    pub fn append_link<P: AsRef<Path>, T: AsRef<Path>>(
261
2.82k
        &mut self,
262
2.82k
        header: &mut Header,
263
2.82k
        path: P,
264
2.82k
        target: T,
265
2.82k
    ) -> io::Result<()> {
266
2.82k
        self._append_link(header, path.as_ref(), target.as_ref())
267
2.82k
    }
268
269
2.82k
    fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> {
270
2.82k
        prepare_header_path(self.get_mut(), header, path)?;
271
498
        prepare_header_link(self.get_mut(), header, target)?;
272
316
        header.set_cksum();
273
316
        self.append(header, std::io::empty())
274
2.82k
    }
275
276
    /// Adds a file on the local filesystem to this archive.
277
    ///
278
    /// This function will open the file specified by `path` and insert the file
279
    /// into the archive with the appropriate metadata set, returning any I/O
280
    /// error which occurs while writing. The path name for the file inside of
281
    /// this archive will be the same as `path`, and it is required that the
282
    /// path is a relative path.
283
    ///
284
    /// Note that this will not attempt to seek the archive to a valid position,
285
    /// so if the archive is in the middle of a read or some other similar
286
    /// operation then this may corrupt the archive.
287
    ///
288
    /// Also note that after all files have been written to an archive the
289
    /// `finish` function needs to be called to finish writing the archive.
290
    ///
291
    /// # Examples
292
    ///
293
    /// ```no_run
294
    /// use tar::Builder;
295
    ///
296
    /// let mut ar = Builder::new(Vec::new());
297
    ///
298
    /// ar.append_path("foo/bar.txt").unwrap();
299
    /// ```
300
    pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
301
        let options = self.options;
302
        append_path_with_name(self.get_mut(), path.as_ref(), None, options)
303
    }
304
305
    /// Adds a file on the local filesystem to this archive under another name.
306
    ///
307
    /// This function will open the file specified by `path` and insert the file
308
    /// into the archive as `name` with appropriate metadata set, returning any
309
    /// I/O error which occurs while writing. The path name for the file inside
310
    /// of this archive will be `name` is required to be a relative path.
311
    ///
312
    /// Note that this will not attempt to seek the archive to a valid position,
313
    /// so if the archive is in the middle of a read or some other similar
314
    /// operation then this may corrupt the archive.
315
    ///
316
    /// Note if the `path` is a directory. This will just add an entry to the archive,
317
    /// rather than contents of the directory.
318
    ///
319
    /// Also note that after all files have been written to an archive the
320
    /// `finish` function needs to be called to finish writing the archive.
321
    ///
322
    /// # Examples
323
    ///
324
    /// ```no_run
325
    /// use tar::Builder;
326
    ///
327
    /// let mut ar = Builder::new(Vec::new());
328
    ///
329
    /// // Insert the local file "foo/bar.txt" in the archive but with the name
330
    /// // "bar/foo.txt".
331
    /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap();
332
    /// ```
333
    pub fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
334
        &mut self,
335
        path: P,
336
        name: N,
337
    ) -> io::Result<()> {
338
        let options = self.options;
339
        append_path_with_name(self.get_mut(), path.as_ref(), Some(name.as_ref()), options)
340
    }
341
342
    /// Adds a file to this archive with the given path as the name of the file
343
    /// in the archive.
344
    ///
345
    /// This will use the metadata of `file` to populate a `Header`, and it will
346
    /// then append the file to the archive with the name `path`.
347
    ///
348
    /// Note that this will not attempt to seek the archive to a valid position,
349
    /// so if the archive is in the middle of a read or some other similar
350
    /// operation then this may corrupt the archive.
351
    ///
352
    /// Also note that after all files have been written to an archive the
353
    /// `finish` function needs to be called to finish writing the archive.
354
    ///
355
    /// # Examples
356
    ///
357
    /// ```no_run
358
    /// use std::fs::File;
359
    /// use tar::Builder;
360
    ///
361
    /// let mut ar = Builder::new(Vec::new());
362
    ///
363
    /// // Open the file at one location, but insert it into the archive with a
364
    /// // different name.
365
    /// let mut f = File::open("foo/bar/baz.txt").unwrap();
366
    /// ar.append_file("bar/baz.txt", &mut f).unwrap();
367
    /// ```
368
2.82k
    pub fn append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> {
369
2.82k
        let options = self.options;
370
2.82k
        append_file(self.get_mut(), path.as_ref(), file, options)
371
2.82k
    }
372
373
    /// Adds a directory to this archive with the given path as the name of the
374
    /// directory in the archive.
375
    ///
376
    /// This will use `stat` to populate a `Header`, and it will then append the
377
    /// directory to the archive with the name `path`.
378
    ///
379
    /// Note that this will not attempt to seek the archive to a valid position,
380
    /// so if the archive is in the middle of a read or some other similar
381
    /// operation then this may corrupt the archive.
382
    ///
383
    /// Note this will not add the contents of the directory to the archive.
384
    /// See `append_dir_all` for recursively adding the contents of the directory.
385
    ///
386
    /// Also note that after all files have been written to an archive the
387
    /// `finish` function needs to be called to finish writing the archive.
388
    ///
389
    /// # Examples
390
    ///
391
    /// ```
392
    /// use std::fs;
393
    /// use tar::Builder;
394
    ///
395
    /// let mut ar = Builder::new(Vec::new());
396
    ///
397
    /// // Use the directory at one location, but insert it into the archive
398
    /// // with a different name.
399
    /// ar.append_dir("bardir", ".").unwrap();
400
    /// ```
401
13.2k
    pub fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
402
13.2k
    where
403
13.2k
        P: AsRef<Path>,
404
13.2k
        Q: AsRef<Path>,
405
    {
406
13.2k
        let options = self.options;
407
13.2k
        append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
408
13.2k
    }
409
410
    /// Adds a directory and all of its contents (recursively) to this archive
411
    /// with the given path as the name of the directory in the archive.
412
    ///
413
    /// Note that this will not attempt to seek the archive to a valid position,
414
    /// so if the archive is in the middle of a read or some other similar
415
    /// operation then this may corrupt the archive.
416
    ///
417
    /// Also note that after all files have been written to an archive the
418
    /// `finish` or `into_inner` function needs to be called to finish
419
    /// writing the archive.
420
    ///
421
    /// # Examples
422
    ///
423
    /// ```
424
    /// use std::fs;
425
    /// use tar::Builder;
426
    ///
427
    /// let mut ar = Builder::new(Vec::new());
428
    ///
429
    /// // Use the directory at one location ("."), but insert it into the archive
430
    /// // with a different name ("bardir").
431
    /// ar.append_dir_all("bardir", ".").unwrap();
432
    /// ar.finish().unwrap();
433
    /// ```
434
    ///
435
    /// Use `append_dir_all` with an empty string as the first path argument to
436
    /// create an archive from all files in a directory without renaming.
437
    ///
438
    /// ```
439
    /// use std::fs;
440
    /// use std::path::PathBuf;
441
    /// use tar::{Archive, Builder};
442
    ///
443
    /// let tmpdir = tempfile::tempdir().unwrap();
444
    /// let path = tmpdir.path();
445
    /// fs::write(path.join("a.txt"), b"hello").unwrap();
446
    /// fs::write(path.join("b.txt"), b"world").unwrap();
447
    ///
448
    /// // Create a tarball from the files in the directory
449
    /// let mut ar = Builder::new(Vec::new());
450
    /// ar.append_dir_all("", path).unwrap();
451
    ///
452
    /// // List files in the archive
453
    /// let archive = ar.into_inner().unwrap();
454
    /// let archived_files = Archive::new(archive.as_slice())
455
    ///     .entries()
456
    ///     .unwrap()
457
    ///     .map(|entry| entry.unwrap().path().unwrap().into_owned())
458
    ///     .collect::<Vec<_>>();
459
    ///
460
    /// assert!(archived_files.contains(&PathBuf::from("a.txt")));
461
    /// assert!(archived_files.contains(&PathBuf::from("b.txt")));
462
    /// ```
463
    pub fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
464
    where
465
        P: AsRef<Path>,
466
        Q: AsRef<Path>,
467
    {
468
        let options = self.options;
469
        append_dir_all(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
470
    }
471
472
    /// Finish writing this archive, emitting the termination sections.
473
    ///
474
    /// This function should only be called when the archive has been written
475
    /// entirely and if an I/O error happens the underlying object still needs
476
    /// to be acquired.
477
    ///
478
    /// In most situations the `into_inner` method should be preferred.
479
10.2k
    pub fn finish(&mut self) -> io::Result<()> {
480
10.2k
        if self.finished {
481
5.12k
            return Ok(());
482
5.12k
        }
483
5.12k
        self.finished = true;
484
5.12k
        self.get_mut().write_all(&[0; 1024])
485
10.2k
    }
<tar::builder::Builder<alloc::vec::Vec<u8>>>::finish
Line
Count
Source
479
4.60k
    pub fn finish(&mut self) -> io::Result<()> {
480
4.60k
        if self.finished {
481
2.30k
            return Ok(());
482
2.30k
        }
483
2.30k
        self.finished = true;
484
2.30k
        self.get_mut().write_all(&[0; 1024])
485
4.60k
    }
<tar::builder::Builder<std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::finish
Line
Count
Source
479
5.65k
    pub fn finish(&mut self) -> io::Result<()> {
480
5.65k
        if self.finished {
481
2.82k
            return Ok(());
482
2.82k
        }
483
2.82k
        self.finished = true;
484
2.82k
        self.get_mut().write_all(&[0; 1024])
485
5.65k
    }
486
}
487
488
trait SeekWrite: Write + Seek {
489
    fn as_write(&mut self) -> &mut dyn Write;
490
}
491
492
impl<T: Write + Seek> SeekWrite for T {
493
    fn as_write(&mut self) -> &mut dyn Write {
494
        self
495
    }
496
}
497
498
/// A writer for a single entry in a tar archive.
499
///
500
/// This struct is returned by [`Builder::append_writer`] and provides a
501
/// [`Write`] implementation for adding content to an archive entry.
502
///
503
/// After writing all data to the entry, it must be finalized either by
504
/// explicitly calling [`EntryWriter::finish`] or by letting it drop.
505
pub struct EntryWriter<'a> {
506
    // NOTE: Do not add any fields here which require Drop!
507
    // See the comment below in finish().
508
    obj: &'a mut dyn SeekWrite,
509
    header: &'a mut Header,
510
    written: u64,
511
}
512
513
impl EntryWriter<'_> {
514
0
    fn start<'a>(
515
0
        obj: &'a mut dyn SeekWrite,
516
0
        header: &'a mut Header,
517
0
        path: &Path,
518
0
    ) -> io::Result<EntryWriter<'a>> {
519
0
        prepare_header_path(obj.as_write(), header, path)?;
520
521
        // Reserve space for header, will be overwritten once data is written.
522
0
        obj.write_all([0u8; BLOCK_SIZE as usize].as_ref())?;
523
524
0
        Ok(EntryWriter {
525
0
            obj,
526
0
            header,
527
0
            written: 0,
528
0
        })
529
0
    }
530
531
    /// Finish writing the current entry in the archive.
532
0
    pub fn finish(self) -> io::Result<()> {
533
        // NOTE: This is an optimization for "fallible destructuring".
534
        // We want finish() to return an error, but we also need to invoke
535
        // cleanup in our Drop handler, which will run unconditionally
536
        // and try to do the same work.
537
        // By using ManuallyDrop, we suppress that drop. However, this would
538
        // be a memory leak if we ever had any struct members which required
539
        // Drop - which we don't right now.
540
        // But if we ever gain one, we will need to change to use e.g. Option<>
541
        // around some of the fields or have a `bool finished` etc.
542
0
        let mut this = std::mem::ManuallyDrop::new(self);
543
0
        this.do_finish()
544
0
    }
545
546
0
    fn do_finish(&mut self) -> io::Result<()> {
547
        // Pad with zeros if necessary.
548
0
        let buf = [0u8; BLOCK_SIZE as usize];
549
0
        let remaining = BLOCK_SIZE.wrapping_sub(self.written) % BLOCK_SIZE;
550
0
        self.obj.write_all(&buf[..remaining as usize])?;
551
0
        let written = (self.written + remaining) as i64;
552
553
        // Seek back to the header position.
554
0
        self.obj
555
0
            .seek(io::SeekFrom::Current(-written - BLOCK_SIZE as i64))?;
556
557
0
        self.header.set_size(self.written);
558
0
        self.header.set_cksum();
559
0
        self.obj.write_all(self.header.as_bytes())?;
560
561
        // Seek forward to restore the position.
562
0
        self.obj.seek(io::SeekFrom::Current(written))?;
563
564
0
        Ok(())
565
0
    }
566
}
567
568
impl Write for EntryWriter<'_> {
569
0
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
570
0
        let len = self.obj.write(buf)?;
571
0
        self.written += len as u64;
572
0
        Ok(len)
573
0
    }
574
575
0
    fn flush(&mut self) -> io::Result<()> {
576
0
        self.obj.flush()
577
0
    }
578
}
579
580
impl Drop for EntryWriter<'_> {
581
0
    fn drop(&mut self) {
582
0
        let _ = self.do_finish();
583
0
    }
584
}
585
586
473k
fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> {
587
473k
    dst.write_all(header.as_bytes())?;
588
473k
    let len = io::copy(&mut data, &mut dst)?;
589
473k
    pad_zeroes(&mut dst, len)?;
590
473k
    Ok(())
591
473k
}
592
593
476k
fn pad_zeroes(dst: &mut dyn Write, len: u64) -> io::Result<()> {
594
476k
    let buf = [0; BLOCK_SIZE as usize];
595
476k
    let remaining = BLOCK_SIZE - (len % BLOCK_SIZE);
596
476k
    if remaining < BLOCK_SIZE {
597
460k
        dst.write_all(&buf[..remaining as usize])?;
598
15.8k
    }
599
476k
    Ok(())
600
476k
}
601
602
0
fn append_path_with_name(
603
0
    dst: &mut dyn Write,
604
0
    path: &Path,
605
0
    name: Option<&Path>,
606
0
    options: BuilderOptions,
607
0
) -> io::Result<()> {
608
0
    let stat = if options.follow {
609
0
        fs::metadata(path).map_err(|err| {
610
0
            io::Error::new(
611
0
                err.kind(),
612
0
                format!("{} when getting metadata for {}", err, path.display()),
613
            )
614
0
        })?
615
    } else {
616
0
        fs::symlink_metadata(path).map_err(|err| {
617
0
            io::Error::new(
618
0
                err.kind(),
619
0
                format!("{} when getting metadata for {}", err, path.display()),
620
            )
621
0
        })?
622
    };
623
0
    let ar_name = name.unwrap_or(path);
624
0
    if stat.is_file() {
625
0
        append_file(dst, ar_name, &mut fs::File::open(path)?, options)
626
0
    } else if stat.is_dir() {
627
0
        append_fs(dst, ar_name, &stat, options.mode, None)
628
0
    } else if stat.file_type().is_symlink() {
629
0
        let link_name = fs::read_link(path)?;
630
0
        append_fs(dst, ar_name, &stat, options.mode, Some(&link_name))
631
    } else {
632
        #[cfg(unix)]
633
        {
634
0
            append_special(dst, path, &stat, options.mode)
635
        }
636
        #[cfg(not(unix))]
637
        {
638
            Err(other(&format!("{} has unknown file type", path.display())))
639
        }
640
    }
641
0
}
642
643
#[cfg(unix)]
644
0
fn append_special(
645
0
    dst: &mut dyn Write,
646
0
    path: &Path,
647
0
    stat: &fs::Metadata,
648
0
    mode: HeaderMode,
649
0
) -> io::Result<()> {
650
    use ::std::os::unix::fs::{FileTypeExt, MetadataExt};
651
652
0
    let file_type = stat.file_type();
653
    let entry_type;
654
0
    if file_type.is_socket() {
655
        // sockets can't be archived
656
0
        return Err(other(&format!(
657
0
            "{}: socket can not be archived",
658
0
            path.display()
659
0
        )));
660
0
    } else if file_type.is_fifo() {
661
0
        entry_type = EntryType::Fifo;
662
0
    } else if file_type.is_char_device() {
663
0
        entry_type = EntryType::Char;
664
0
    } else if file_type.is_block_device() {
665
0
        entry_type = EntryType::Block;
666
0
    } else {
667
0
        return Err(other(&format!("{} has unknown file type", path.display())));
668
    }
669
670
0
    let mut header = Header::new_gnu();
671
0
    header.set_metadata_in_mode(stat, mode);
672
0
    prepare_header_path(dst, &mut header, path)?;
673
674
0
    header.set_entry_type(entry_type);
675
0
    let dev_id = stat.rdev();
676
0
    let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff);
677
0
    let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff);
678
0
    header.set_device_major(dev_major as u32)?;
679
0
    header.set_device_minor(dev_minor as u32)?;
680
681
0
    header.set_cksum();
682
0
    dst.write_all(header.as_bytes())?;
683
684
0
    Ok(())
685
0
}
686
687
2.82k
fn append_file(
688
2.82k
    dst: &mut dyn Write,
689
2.82k
    path: &Path,
690
2.82k
    file: &mut fs::File,
691
2.82k
    options: BuilderOptions,
692
2.82k
) -> io::Result<()> {
693
2.82k
    let stat = file.metadata()?;
694
2.82k
    let mut header = Header::new_gnu();
695
696
2.82k
    prepare_header_path(dst, &mut header, path)?;
697
2.82k
    header.set_metadata_in_mode(&stat, options.mode);
698
2.82k
    let sparse_entries = if options.sparse {
699
2.82k
        prepare_header_sparse(file, &stat, &mut header)?
700
    } else {
701
0
        None
702
    };
703
2.82k
    header.set_cksum();
704
2.82k
    dst.write_all(header.as_bytes())?;
705
706
2.82k
    if let Some(sparse_entries) = sparse_entries {
707
0
        append_extended_sparse_headers(dst, &sparse_entries)?;
708
0
        for entry in sparse_entries.entries {
709
0
            file.seek(io::SeekFrom::Start(entry.offset))?;
710
0
            io::copy(&mut file.take(entry.num_bytes), dst)?;
711
        }
712
0
        pad_zeroes(dst, sparse_entries.on_disk_size)?;
713
    } else {
714
2.82k
        let len = io::copy(file, dst)?;
715
2.82k
        pad_zeroes(dst, len)?;
716
    }
717
718
2.82k
    Ok(())
719
2.82k
}
720
721
13.2k
fn append_dir(
722
13.2k
    dst: &mut dyn Write,
723
13.2k
    path: &Path,
724
13.2k
    src_path: &Path,
725
13.2k
    options: BuilderOptions,
726
13.2k
) -> io::Result<()> {
727
13.2k
    let stat = fs::metadata(src_path)?;
728
7.65k
    append_fs(dst, path, &stat, options.mode, None)
729
13.2k
}
730
731
14.4k
fn prepare_header(size: u64, entry_type: u8) -> Header {
732
14.4k
    let mut header = Header::new_gnu();
733
14.4k
    let name = b"././@LongLink";
734
14.4k
    header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
735
14.4k
    header.set_mode(0o644);
736
14.4k
    header.set_uid(0);
737
14.4k
    header.set_gid(0);
738
14.4k
    header.set_mtime(0);
739
    // + 1 to be compliant with GNU tar
740
14.4k
    header.set_size(size + 1);
741
14.4k
    header.set_entry_type(EntryType::new(entry_type));
742
14.4k
    header.set_cksum();
743
14.4k
    header
744
14.4k
}
745
746
1.13M
fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> {
747
    // Try to encode the path directly in the header, but if it ends up not
748
    // working (probably because it's too long) then try to use the GNU-specific
749
    // long name extension by emitting an entry which indicates that it's the
750
    // filename.
751
1.13M
    if let Err(e) = header.set_path(path) {
752
678k
        let data = path2bytes(path)?;
753
678k
        let max = header.as_old().name.len();
754
        // Since `e` isn't specific enough to let us know the path is indeed too
755
        // long, verify it first before using the extension.
756
678k
        if data.len() < max {
757
664k
            return Err(e);
758
14.2k
        }
759
14.2k
        let header2 = prepare_header(data.len() as u64, b'L');
760
        // null-terminated string
761
14.2k
        let mut data2 = data.chain(io::repeat(0).take(1));
762
14.2k
        append(dst, &header2, &mut data2)?;
763
764
        // Truncate the path to store in the header we're about to emit to
765
        // ensure we've got something at least mentioned. Note that we use
766
        // `str`-encoding to be compatible with Windows, but in general the
767
        // entry in the header itself shouldn't matter too much since extraction
768
        // doesn't look at it.
769
14.2k
        let truncated = match str::from_utf8(&data[..max]) {
770
13.5k
            Ok(s) => s,
771
719
            Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
772
        };
773
14.2k
        header.set_truncated_path_for_gnu_header(truncated)?;
774
461k
    }
775
470k
    Ok(())
776
1.13M
}
777
778
498
fn prepare_header_link(
779
498
    dst: &mut dyn Write,
780
498
    header: &mut Header,
781
498
    link_name: &Path,
782
498
) -> io::Result<()> {
783
    // Same as previous function but for linkname
784
498
    if let Err(e) = header.set_link_name(link_name) {
785
329
        let data = path2bytes(link_name)?;
786
329
        if data.len() < header.as_old().linkname.len() {
787
182
            return Err(e);
788
147
        }
789
147
        let header2 = prepare_header(data.len() as u64, b'K');
790
147
        let mut data2 = data.chain(io::repeat(0).take(1));
791
147
        append(dst, &header2, &mut data2)?;
792
169
    }
793
316
    Ok(())
794
498
}
795
796
2.82k
fn prepare_header_sparse(
797
2.82k
    file: &mut fs::File,
798
2.82k
    stat: &fs::Metadata,
799
2.82k
    header: &mut Header,
800
2.82k
) -> io::Result<Option<SparseEntries>> {
801
2.82k
    let entries = match find_sparse_entries(file, stat)? {
802
0
        Some(entries) => entries,
803
2.82k
        _ => return Ok(None),
804
    };
805
806
0
    header.set_entry_type(EntryType::GNUSparse);
807
0
    header.set_size(entries.on_disk_size);
808
809
    // Write the first 4 (GNU_SPARSE_HEADERS_COUNT) entries to the given header.
810
    // The remaining entries will be written as subsequent extended headers. See
811
    // https://www.gnu.org/software/tar/manual/html_section/Sparse-Formats.html#Old-GNU-Format
812
    // for details on the format.
813
0
    let gnu_header = &mut header.as_gnu_mut().unwrap();
814
0
    gnu_header.set_real_size(entries.size());
815
816
0
    for (entry, header_entry) in std::iter::zip(&entries.entries, &mut gnu_header.sparse) {
817
0
        header_entry.set_offset(entry.offset);
818
0
        header_entry.set_length(entry.num_bytes);
819
0
    }
820
0
    gnu_header.set_is_extended(entries.entries.len() > gnu_header.sparse.len());
821
822
0
    Ok(Some(entries))
823
2.82k
}
824
825
/// Write extra sparse headers into `dst` for those entries that did not fit in the main header.
826
0
fn append_extended_sparse_headers(dst: &mut dyn Write, entries: &SparseEntries) -> io::Result<()> {
827
    // The first `GNU_SPARSE_HEADERS_COUNT` entries are written to the main header, so skip them.
828
0
    let mut it = entries
829
0
        .entries
830
0
        .iter()
831
0
        .skip(GNU_SPARSE_HEADERS_COUNT)
832
0
        .peekable();
833
834
    // Each GnuExtSparseHeader can hold up to fixed number of sparse entries (21).
835
    // So we pack entries into multiple headers if necessary.
836
0
    while it.peek().is_some() {
837
0
        let mut ext_header = GnuExtSparseHeader::new();
838
0
        for header_entry in ext_header.sparse.iter_mut() {
839
0
            if let Some(entry) = it.next() {
840
0
                header_entry.set_offset(entry.offset);
841
0
                header_entry.set_length(entry.num_bytes);
842
0
            } else {
843
0
                break;
844
            }
845
        }
846
0
        ext_header.set_is_extended(it.peek().is_some());
847
0
        dst.write_all(ext_header.as_bytes())?;
848
    }
849
850
0
    Ok(())
851
0
}
852
853
7.65k
fn append_fs(
854
7.65k
    dst: &mut dyn Write,
855
7.65k
    path: &Path,
856
7.65k
    meta: &fs::Metadata,
857
7.65k
    mode: HeaderMode,
858
7.65k
    link_name: Option<&Path>,
859
7.65k
) -> io::Result<()> {
860
7.65k
    let mut header = Header::new_gnu();
861
862
7.65k
    prepare_header_path(dst, &mut header, path)?;
863
7.65k
    header.set_metadata_in_mode(meta, mode);
864
7.65k
    if let Some(link_name) = link_name {
865
0
        prepare_header_link(dst, &mut header, link_name)?;
866
7.65k
    }
867
7.65k
    header.set_cksum();
868
7.65k
    dst.write_all(header.as_bytes())
869
7.65k
}
870
871
0
fn append_dir_all(
872
0
    dst: &mut dyn Write,
873
0
    path: &Path,
874
0
    src_path: &Path,
875
0
    options: BuilderOptions,
876
0
) -> io::Result<()> {
877
0
    let mut stack = vec![(src_path.to_path_buf(), true, false)];
878
0
    while let Some((src, is_dir, is_symlink)) = stack.pop() {
879
0
        let dest = path.join(src.strip_prefix(src_path).unwrap());
880
        // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
881
0
        if is_dir || (is_symlink && options.follow && src.is_dir()) {
882
0
            for entry in fs::read_dir(&src)? {
883
0
                let entry = entry?;
884
0
                let file_type = entry.file_type()?;
885
0
                stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
886
            }
887
0
            if dest != Path::new("") {
888
0
                append_dir(dst, &dest, &src, options)?;
889
0
            }
890
0
        } else if !options.follow && is_symlink {
891
0
            let stat = fs::symlink_metadata(&src)?;
892
0
            let link_name = fs::read_link(&src)?;
893
0
            append_fs(dst, &dest, &stat, options.mode, Some(&link_name))?;
894
        } else {
895
            #[cfg(unix)]
896
            {
897
0
                let stat = fs::metadata(&src)?;
898
0
                if !stat.is_file() {
899
0
                    append_special(dst, &dest, &stat, options.mode)?;
900
0
                    continue;
901
0
                }
902
            }
903
0
            append_file(dst, &dest, &mut fs::File::open(src)?, options)?;
904
        }
905
    }
906
0
    Ok(())
907
0
}
908
909
#[derive(Debug, Clone, PartialEq, Eq)]
910
struct SparseEntries {
911
    entries: Vec<SparseEntry>,
912
    on_disk_size: u64,
913
}
914
915
impl SparseEntries {
916
0
    fn size(&self) -> u64 {
917
0
        self.entries.last().map_or(0, |e| e.offset + e.num_bytes)
918
0
    }
919
}
920
921
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
922
struct SparseEntry {
923
    offset: u64,
924
    num_bytes: u64,
925
}
926
927
/// Find sparse entries in a file. Returns:
928
/// * `Ok(Some(_))` if the file is sparse.
929
/// * `Ok(None)` if the file is not sparse, or if the file system does not support sparse files.
930
/// * `Err(_)` if an error occurred. The lack of support for sparse files is not
931
///   considered an error. It might return an error if the file is modified
932
///   while reading.
933
2.82k
fn find_sparse_entries(
934
2.82k
    file: &mut fs::File,
935
2.82k
    stat: &fs::Metadata,
936
2.82k
) -> io::Result<Option<SparseEntries>> {
937
    #[cfg(not(any(target_os = "android", target_os = "freebsd", target_os = "linux")))]
938
    {
939
        let _ = file;
940
        let _ = stat;
941
        Ok(None)
942
    }
943
944
    #[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
945
2.82k
    find_sparse_entries_seek(file, stat)
946
2.82k
}
947
948
/// Implementation of `find_sparse_entries` using `SEEK_HOLE` and `SEEK_DATA`.
949
#[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
950
2.82k
fn find_sparse_entries_seek(
951
2.82k
    file: &mut fs::File,
952
2.82k
    stat: &fs::Metadata,
953
2.82k
) -> io::Result<Option<SparseEntries>> {
954
    use std::os::unix::fs::MetadataExt as _;
955
    use std::os::unix::io::AsRawFd as _;
956
957
5.33k
    fn lseek(file: &fs::File, offset: i64, whence: libc::c_int) -> Result<i64, i32> {
958
        #[cfg(any(target_os = "linux", target_os = "android"))]
959
5.33k
        let lseek = libc::lseek64;
960
        #[cfg(not(any(target_os = "linux", target_os = "android")))]
961
        let lseek = libc::lseek;
962
963
5.33k
        match unsafe { lseek(file.as_raw_fd(), offset, whence) } {
964
0
            -1 => Err(io::Error::last_os_error().raw_os_error().unwrap()),
965
5.33k
            off => Ok(off),
966
        }
967
5.33k
    }
968
969
2.82k
    if stat.blocks() == 0 {
970
1.04k
        return Ok(if stat.size() == 0 {
971
            // Empty file.
972
1.04k
            None
973
        } else {
974
            // Fully sparse file.
975
0
            Some(SparseEntries {
976
0
                entries: vec![SparseEntry {
977
0
                    offset: stat.size(),
978
0
                    num_bytes: 0,
979
0
                }],
980
0
                on_disk_size: 0,
981
0
            })
982
        });
983
1.77k
    }
984
985
    // On most Unixes, we need to read `_PC_MIN_HOLE_SIZE` to see if the file
986
    // system supports `SEEK_HOLE`.
987
    // FreeBSD: https://man.freebsd.org/cgi/man.cgi?query=lseek&sektion=2&manpath=FreeBSD+14.1-STABLE
988
    #[cfg(not(any(target_os = "linux", target_os = "android")))]
989
    if unsafe { libc::fpathconf(file.as_raw_fd(), libc::_PC_MIN_HOLE_SIZE) } == -1 {
990
        return Ok(None);
991
    }
992
993
    // Linux is the only UNIX-like without support for `_PC_MIN_HOLE_SIZE`, so
994
    // instead we try to call `lseek` and see if it fails.
995
    #[cfg(any(target_os = "linux", target_os = "android"))]
996
1.77k
    match lseek(file, 0, libc::SEEK_HOLE) {
997
1.77k
        Ok(_) => (),
998
        Err(libc::ENXIO) => {
999
            // The file is empty. Treat it as non-sparse.
1000
0
            return Ok(None);
1001
        }
1002
0
        Err(_) => return Ok(None),
1003
    }
1004
1005
1.77k
    let mut entries = Vec::new();
1006
1.77k
    let mut on_disk_size = 0;
1007
1.77k
    let mut off_s = 0;
1008
    loop {
1009
        //  off_s=0      │     off_s               │ off_s
1010
        //    ↓          │       ↓                 │   ↓
1011
        //    | DATA |…  │  ……………| HOLE | DATA |…  │  …|×EOF×
1012
        //    ↑          │       ↑      ↑          │
1013
        //   (a)         │  (b) (c)    (d)         │     (e)
1014
1.77k
        match lseek(file, off_s, libc::SEEK_DATA) {
1015
1.77k
            Ok(0) if off_s == 0 => (), // (a) The file starts with data.
1016
0
            Ok(off) if off < off_s => {
1017
                // (b) Unlikely.
1018
0
                return Err(std::io::Error::new(
1019
0
                    io::ErrorKind::Other,
1020
0
                    "lseek(SEEK_DATA) went backwards",
1021
0
                ));
1022
            }
1023
0
            Ok(off) if off == off_s => {
1024
                // (c) The data at the same offset as the hole.
1025
0
                return Err(std::io::Error::new(
1026
0
                    io::ErrorKind::Other,
1027
0
                    "lseek(SEEK_DATA) did not advance. \
1028
0
                     Did the file change while appending?",
1029
0
                ));
1030
            }
1031
0
            Ok(off) => off_s = off,    // (d) Jump to the next hole.
1032
0
            Err(libc::ENXIO) => break, // (e) Reached the end of the file.
1033
0
            Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1034
        };
1035
1036
        // off_s=0          │     off_s               │    off_s
1037
        //   ↓              │       ↓                 │      ↓
1038
        //   | DATA |×EOF×  │  ……………| DATA | HOLE |…  │  …|×EOF×
1039
        //          ↑       │       ↑      ↑          │
1040
        //         (a)      │  (b) (c)    (d)         │     (e)
1041
1.77k
        match lseek(file, off_s, libc::SEEK_HOLE) {
1042
1.77k
            Ok(off_e) if off_s == 0 && (off_e as u64) == stat.size() => {
1043
                // (a) The file is not sparse.
1044
1.77k
                file.seek(io::SeekFrom::Start(0))?;
1045
1.77k
                return Ok(None);
1046
            }
1047
0
            Ok(off_e) if off_e < off_s => {
1048
                // (b) Unlikely.
1049
0
                return Err(std::io::Error::new(
1050
0
                    io::ErrorKind::Other,
1051
0
                    "lseek(SEEK_HOLE) went backwards",
1052
0
                ));
1053
            }
1054
0
            Ok(off_e) if off_e == off_s => {
1055
                // (c) The hole at the same offset as the data.
1056
0
                return Err(std::io::Error::new(
1057
0
                    io::ErrorKind::Other,
1058
0
                    "lseek(SEEK_HOLE) did not advance. \
1059
0
                     Did the file change while appending?",
1060
0
                ));
1061
            }
1062
0
            Ok(off_e) => {
1063
0
                // (d) Found a hole or reached the end of the file (implicit
1064
0
                // zero-length hole).
1065
0
                entries.push(SparseEntry {
1066
0
                    offset: off_s as u64,
1067
0
                    num_bytes: off_e as u64 - off_s as u64,
1068
0
                });
1069
0
                on_disk_size += off_e as u64 - off_s as u64;
1070
0
                off_s = off_e;
1071
0
            }
1072
            Err(libc::ENXIO) => {
1073
                // (e) off_s was already beyond the end of the file.
1074
0
                return Err(std::io::Error::new(
1075
0
                    io::ErrorKind::Other,
1076
0
                    "lseek(SEEK_HOLE) returned ENXIO. \
1077
0
                     Did the file change while appending?",
1078
0
                ));
1079
            }
1080
0
            Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1081
        };
1082
    }
1083
1084
0
    if off_s as u64 > stat.size() {
1085
0
        return Err(std::io::Error::new(
1086
0
            io::ErrorKind::Other,
1087
0
            "lseek(SEEK_DATA) went beyond the end of the file. \
1088
0
             Did the file change while appending?",
1089
0
        ));
1090
0
    }
1091
1092
    // Add a final zero-length entry. It is required if the file ends with a
1093
    // hole, and redundant otherwise. However, we add it unconditionally to
1094
    // mimic GNU tar behavior.
1095
0
    entries.push(SparseEntry {
1096
0
        offset: stat.size(),
1097
0
        num_bytes: 0,
1098
0
    });
1099
1100
0
    file.seek(io::SeekFrom::Start(0))?;
1101
1102
0
    Ok(Some(SparseEntries {
1103
0
        entries,
1104
0
        on_disk_size,
1105
0
    }))
1106
2.82k
}
1107
1108
impl<W: Write> Drop for Builder<W> {
1109
5.12k
    fn drop(&mut self) {
1110
5.12k
        let _ = self.finish();
1111
5.12k
    }
<tar::builder::Builder<alloc::vec::Vec<u8>> as core::ops::drop::Drop>::drop
Line
Count
Source
1109
2.30k
    fn drop(&mut self) {
1110
2.30k
        let _ = self.finish();
1111
2.30k
    }
<tar::builder::Builder<std::io::cursor::Cursor<alloc::vec::Vec<u8>>> as core::ops::drop::Drop>::drop
Line
Count
Source
1109
2.82k
    fn drop(&mut self) {
1110
2.82k
        let _ = self.finish();
1111
2.82k
    }
1112
}
1113
1114
#[cfg(test)]
1115
mod tests {
1116
    use super::*;
1117
1118
    /// Should be multiple of 4KiB on ext4, multiple of 32KiB on FreeBSD/UFS, multiple of 64KiB on
1119
    /// ppc64el
1120
    const SPARSE_BLOCK_SIZE: u64 = 64 * 1024;
1121
1122
    #[test]
1123
    fn test_find_sparse_entries() {
1124
        let cases: &[(&str, &[SparseEntry])] = &[
1125
            ("|", &[]),
1126
            (
1127
                "|    |    |    |    |",
1128
                &[SparseEntry {
1129
                    offset: 4 * SPARSE_BLOCK_SIZE,
1130
                    num_bytes: 0,
1131
                }],
1132
            ),
1133
            (
1134
                "|####|####|####|####|",
1135
                &[
1136
                    SparseEntry {
1137
                        offset: 0,
1138
                        num_bytes: 4 * SPARSE_BLOCK_SIZE,
1139
                    },
1140
                    SparseEntry {
1141
                        offset: 4 * SPARSE_BLOCK_SIZE,
1142
                        num_bytes: 0,
1143
                    },
1144
                ],
1145
            ),
1146
            (
1147
                "|####|####|    |    |",
1148
                &[
1149
                    SparseEntry {
1150
                        offset: 0,
1151
                        num_bytes: 2 * SPARSE_BLOCK_SIZE,
1152
                    },
1153
                    SparseEntry {
1154
                        offset: 4 * SPARSE_BLOCK_SIZE,
1155
                        num_bytes: 0,
1156
                    },
1157
                ],
1158
            ),
1159
            (
1160
                "|    |    |####|####|",
1161
                &[
1162
                    SparseEntry {
1163
                        offset: 2 * SPARSE_BLOCK_SIZE,
1164
                        num_bytes: 2 * SPARSE_BLOCK_SIZE,
1165
                    },
1166
                    SparseEntry {
1167
                        offset: 4 * SPARSE_BLOCK_SIZE,
1168
                        num_bytes: 0,
1169
                    },
1170
                ],
1171
            ),
1172
            (
1173
                "|####|    |####|    |",
1174
                &[
1175
                    SparseEntry {
1176
                        offset: 0,
1177
                        num_bytes: SPARSE_BLOCK_SIZE,
1178
                    },
1179
                    SparseEntry {
1180
                        offset: 2 * SPARSE_BLOCK_SIZE,
1181
                        num_bytes: SPARSE_BLOCK_SIZE,
1182
                    },
1183
                    SparseEntry {
1184
                        offset: 4 * SPARSE_BLOCK_SIZE,
1185
                        num_bytes: 0,
1186
                    },
1187
                ],
1188
            ),
1189
            (
1190
                "|####|    |    |####|",
1191
                &[
1192
                    SparseEntry {
1193
                        offset: 0,
1194
                        num_bytes: SPARSE_BLOCK_SIZE,
1195
                    },
1196
                    SparseEntry {
1197
                        offset: 3 * SPARSE_BLOCK_SIZE,
1198
                        num_bytes: SPARSE_BLOCK_SIZE,
1199
                    },
1200
                    SparseEntry {
1201
                        offset: 4 * SPARSE_BLOCK_SIZE,
1202
                        num_bytes: 0,
1203
                    },
1204
                ],
1205
            ),
1206
            (
1207
                "|    |####|####|    |",
1208
                &[
1209
                    SparseEntry {
1210
                        offset: SPARSE_BLOCK_SIZE,
1211
                        num_bytes: 2 * SPARSE_BLOCK_SIZE,
1212
                    },
1213
                    SparseEntry {
1214
                        offset: 4 * SPARSE_BLOCK_SIZE,
1215
                        num_bytes: 0,
1216
                    },
1217
                ],
1218
            ),
1219
        ];
1220
1221
        let mut file = tempfile::tempfile().unwrap();
1222
1223
        for &(description, map) in cases {
1224
            file.set_len(0).unwrap();
1225
            file.set_len(map.last().map_or(0, |e| e.offset + e.num_bytes))
1226
                .unwrap();
1227
1228
            for e in map {
1229
                file.seek(io::SeekFrom::Start(e.offset)).unwrap();
1230
                for _ in 0..e.num_bytes / SPARSE_BLOCK_SIZE {
1231
                    file.write_all(&[0xFF; SPARSE_BLOCK_SIZE as usize]).unwrap();
1232
                }
1233
            }
1234
1235
            let expected = match map {
1236
                // Empty file.
1237
                &[] => None,
1238
1239
                // 100% dense.
1240
                &[SparseEntry {
1241
                    offset: 0,
1242
                    num_bytes: x1,
1243
                }, SparseEntry {
1244
                    offset: x2,
1245
                    num_bytes: 0,
1246
                }] if x1 == x2 => None,
1247
1248
                // Sparse.
1249
                map => Some(SparseEntries {
1250
                    entries: map.to_vec(),
1251
                    on_disk_size: map.iter().map(|e| e.num_bytes).sum(),
1252
                }),
1253
            };
1254
1255
            let stat = file.metadata().unwrap();
1256
            let reported = find_sparse_entries(&mut file, &stat).unwrap();
1257
1258
            // Loose check: we did not miss any data blocks.
1259
            if let Err(e) = loose_check_sparse_entries(reported.as_ref(), expected.as_ref()) {
1260
                panic!(
1261
                    "Case: {description}\n\
1262
                     Reported: {reported:?}\n\
1263
                     Expected: {expected:?}\n\
1264
                     Error: {e}",
1265
                );
1266
            }
1267
1268
            // On Linux, always do a strict check. Skip on FreeBSD, as on UFS
1269
            // the last block is always dense, even if it's zero-filled.
1270
            #[cfg(any(target_os = "android", target_os = "linux"))]
1271
            assert_eq!(reported, expected, "Case: {description}");
1272
        }
1273
    }
1274
1275
    fn loose_check_sparse_entries(
1276
        reported: Option<&SparseEntries>,
1277
        expected: Option<&SparseEntries>,
1278
    ) -> Result<(), &'static str> {
1279
        let reported = match reported {
1280
            Some(entries) => entries, // Reported as sparse.
1281
            // It's not an error to report a sparse file as non-sparse.
1282
            None => return Ok(()),
1283
        };
1284
        let expected = match expected {
1285
            Some(entries) => entries,
1286
            None => return Err("Expected dense file, but reported as sparse"),
1287
        };
1288
1289
        // Check that we didn't miss any data blocks. However, reporting some
1290
        // holes as data is not an error during the loose check.
1291
        if expected.entries.iter().any(|e| {
1292
            !reported
1293
                .entries
1294
                .iter()
1295
                .any(|r| e.offset >= r.offset && e.offset + e.num_bytes <= r.offset + r.num_bytes)
1296
        }) {
1297
            return Err("Reported is not a superset of expected");
1298
        }
1299
1300
        if reported.entries.last() != expected.entries.last() {
1301
            return Err("Last zero-length entry is not as expected");
1302
        }
1303
1304
        // Check invariants of SparseEntries.
1305
        let mut prev_end = None;
1306
        for e in &reported.entries[..reported.entries.len()] {
1307
            if prev_end.map_or(false, |p| e.offset < p) {
1308
                return Err("Overlapping or unsorted entries");
1309
            }
1310
            prev_end = Some(e.offset + e.num_bytes);
1311
        }
1312
1313
        if reported.on_disk_size != reported.entries.iter().map(|e| e.num_bytes).sum() {
1314
            return Err("Incorrect on-disk size");
1315
        }
1316
1317
        Ok(())
1318
    }
1319
}