Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
928a90f
Add document-container crate: container backends and archive codecs
TrueDoctor Jun 2, 2026
cd9eccd
Address PR review: path/prefix split, safe size casts, OPFS stream ab…
TrueDoctor Jun 2, 2026
6409d4a
Address PR review round 2: mmap read check, UTF8 entry names, prefix …
TrueDoctor Jun 2, 2026
da956d2
Make MmappedBytes::new fallible so mmap reads can't silently degrade
TrueDoctor Jun 2, 2026
494dc42
Address PR review round 3: backend contract uniformity (symlinks, rem…
TrueDoctor Jun 2, 2026
9aebc20
Apply symlink-component check to FolderBackend listing paths
TrueDoctor Jun 3, 2026
f33f453
Address PR review: idempotent OPFS delete logging, tar default-featur…
TrueDoctor Jun 3, 2026
309a295
Fix validate_path doc: dotfiles pass, CurDir/ParentDir rejected
TrueDoctor Jun 3, 2026
fd5798e
Omit symlink entries from FolderBackend listings for consistency with…
TrueDoctor Jun 3, 2026
a911854
Preserve zip I/O errors and reject non-canonical paths in validate_path
TrueDoctor Jun 3, 2026
7d084b0
Extend archive apis to return the archive writer
TrueDoctor Jun 3, 2026
efb2a99
Rename document/document-container directory to document/container
TrueDoctor Jun 3, 2026
1a11e98
Add archive format sniffing and deserialize_auto
TrueDoctor Jun 3, 2026
ae62347
Drop temporal hedge from checked_entry_size comment
TrueDoctor Jun 3, 2026
e4b62a6
Tighten verbose doc comments in document-container
TrueDoctor Jun 3, 2026
34e7ab2
Coalesce consecutive same-path OPFS appends to avoid O(n^2) file copies
TrueDoctor Jun 3, 2026
fac36fc
Review
timon-schelling Jun 4, 2026
1aaf3b3
Update document-container for the deserialize/store_non_blocking renames
TrueDoctor Jun 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ members = [
"desktop/platform/linux",
"desktop/platform/mac",
"desktop/platform/win",
"document/container",
"editor",
"frontend/wrapper",
"libraries/dyn-any",
Expand Down
48 changes: 48 additions & 0 deletions document/container/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
[package]
name = "document-container"
description = "Container abstraction for the on-disk side of the .gdd document format"
edition.workspace = true
version.workspace = true
license.workspace = true
authors.workspace = true

[features]
default = []
zip = ["dep:zip"]
xz = ["dep:lzma-rust2", "dep:tar"]

[dependencies]
thiserror = "2.0"
log = { workspace = true }
zip = { workspace = true, optional = true, features = ["deflate-flate2-zlib-rs"], default-features = false}
lzma-rust2 = { workspace = true, optional = true }
tar = { version = "0.4", optional = true, default-features = false }

[target.'cfg(not(target_family = "wasm"))'.dependencies]
mmap-io = { workspace = true }

[target.'cfg(target_family = "wasm")'.dependencies]
web-sys = { workspace = true, features = [
"Navigator",
"DomException",
"Window",
"StorageManager",
"FileSystemCreateWritableOptions",
"FileSystemDirectoryHandle",
"FileSystemFileHandle",
"FileSystemGetFileOptions",
"FileSystemGetDirectoryOptions",
"FileSystemHandle",
"FileSystemHandleKind",
"FileSystemWritableFileStream",
"WritableStream",
"Blob",
] }
js-sys = { workspace = true }
wasm-bindgen = { workspace = true }
wasm-bindgen-futures = { workspace = true }
futures = { workspace = true }

[dev-dependencies]
tempfile = "3"
futures = { workspace = true }
97 changes: 97 additions & 0 deletions document/container/src/archive.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
//! Archive codecs (zip, xz).
//!
//! Each codec streams entries in both directions: writers wrap an `io::Write` sink, and
//! `deserialize` reads from any `io::Read + Seek` source and streams entries into any [`Container`].

#[cfg(any(feature = "zip", feature = "xz"))]
use crate::ContainerError;
use crate::{Container, Result};
use std::io::{Read, Seek, Write};

/// Hard cap on the total decompressed size a codec will materialize from one archive.
/// Defends against decompression bombs at the cost of refusing legitimately huge archives.
#[cfg(any(feature = "zip", feature = "xz"))]
pub(crate) const MAX_DECOMPRESSED_SIZE: u64 = 4 * 1024 * 1024 * 1024; // 4GB

/// Fold one entry's declared `size` into the running `total` and return it as a `usize` for `write_sized`.
/// Both codecs route entries through here so the decompression-bomb cap and 32-bit-safe conversion live in
/// one place. `write_sized` pre-allocates the declared size, so an over-large one is rejected before that.
#[cfg(any(feature = "zip", feature = "xz"))]
pub(crate) fn checked_entry_size(total: &mut u64, size: u64) -> Result<usize> {
*total = total.saturating_add(size);
if *total > MAX_DECOMPRESSED_SIZE {
return Err(ContainerError::SizeLimitExceeded {
declared: *total,
limit: MAX_DECOMPRESSED_SIZE,
});
}

// `usize` is 32-bit on wasm, so convert fallibly to rule out a silent truncation into a smaller allocation.
usize::try_from(size).map_err(|_| ContainerError::SizeLimitExceeded {
declared: size,
limit: usize::MAX as u64,
})
}

#[cfg(feature = "zip")]
mod zip;
#[cfg(feature = "zip")]
pub use zip::{Zip, ZipWriter};

#[cfg(feature = "xz")]
mod xz;
#[cfg(feature = "xz")]
pub use xz::{Xz, XzWriter};

/// Streaming archive codec. The associated `Writer` type wraps a `Write + Seek` sink (zip needs
/// `Seek` for the central directory; xz doesn't but `Seek` is free on file-like sinks) and
/// accepts entries one at a time. `finish` flushes the codec's trailer and consumes the wrapper.
pub trait Archive {
type Writer<W: Write + Seek>: ArchiveWriter
where
W: Write + Seek;

fn writer<W: Write + Seek>(output: W) -> Result<Self::Writer<W>>;

/// Read entries from `source` and write each into `dest`, streaming so neither the full
/// archive nor the full container ever sits in memory at once.
fn open<R: Read + Seek, C: Container>(source: R, dest: &mut C) -> Result<()>;
}

pub trait ArchiveWriter {
fn write_entry(&mut self, path: &str, bytes: &[u8]) -> Result<()>;
fn finish(self) -> Result<()>;
}

/// Archive container formats distinguishable by their leading magic bytes.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ArchiveFormat {
Xz,
Zip,
}

impl ArchiveFormat {
/// Sniff the format from the leading magic bytes: xz streams start with `FD 37 7A 58 5A 00`,
/// zip archives with `50 4B 03 04` (`PK\x03\x04`). Returns `None` for anything else.
pub fn detect(bytes: &[u8]) -> Option<Self> {
if bytes.starts_with(&[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]) {
Some(Self::Xz)
} else if bytes.starts_with(&[0x50, 0x4B, 0x03, 0x04]) {
Some(Self::Zip)
} else {
None
}
}
}

/// Deserialize an archive into `dest`, auto-detecting the format from `bytes`' magic header.
/// Errors if the bytes are neither a recognized xz nor zip archive.
#[cfg(all(feature = "xz", feature = "zip"))]
pub fn open_auto<C: Container>(bytes: &[u8], dest: &mut C) -> Result<()> {
let source = std::io::Cursor::new(bytes);
match ArchiveFormat::detect(bytes) {
Some(ArchiveFormat::Xz) => Xz::open(source, dest),
Some(ArchiveFormat::Zip) => Zip::open(source, dest),
None => Err(ContainerError::Codec("unrecognized archive format (not xz or zip)".into())),
}
}
97 changes: 97 additions & 0 deletions document/container/src/archive/xz.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
//! Xz-compressed tarball archive codec.

use crate::archive::{Archive, ArchiveWriter, MAX_DECOMPRESSED_SIZE, checked_entry_size};
use crate::{Container, ContainerError, Result, validate_path};
use lzma_rust2::{XzOptions, XzReader, XzWriter as InnerXzWriter};
use std::io::{Read, Seek, Write};

pub struct Xz;

/// xz-tar writer. Held as an `Option` so `finish` can take ownership and unwind the layered
/// writers in the right order: drop the tar builder first to flush its trailer, then finish xz.
pub struct XzWriter<W: Write + Seek> {
tar: Option<tar::Builder<InnerXzWriter<W>>>,
}

impl Archive for Xz {
type Writer<W: Write + Seek> = XzWriter<W>;

fn writer<W: Write + Seek>(output: W) -> Result<Self::Writer<W>> {
let xz_writer = InnerXzWriter::new(output, XzOptions::default()).map_err(lzma_err)?;
Ok(XzWriter {
tar: Some(tar::Builder::new(xz_writer)),
})
}

fn open<R: Read + Seek, C: Container>(source: R, dest: &mut C) -> Result<()> {
// `take` bounds how many bytes we decompress from the xz stream, but each tar entry's declared
// size is fed to `write_sized`, which pre-allocates from it before reading. Cap the cumulative
// declared size too so a header claiming a huge size can't trigger a giant allocation up front.
let xz_reader = XzReader::new(source, false);
let bounded = xz_reader.take(MAX_DECOMPRESSED_SIZE);

let mut tar_reader = tar::Archive::new(bounded);
let mut total_size = 0u64;

for entry in tar_reader.entries()? {
let mut entry = entry?;
if entry.header().entry_type() != tar::EntryType::Regular {
continue;
}
// Reject non-UTF8 entry names rather than lossily rewriting them, so the path we store matches
// the archive exactly.
let path = entry.path()?;
let path = path.to_str().ok_or_else(|| ContainerError::Codec(format!("tar: non-UTF8 entry name {path:?}")))?.to_owned();
validate_path(&path)?;

let size = checked_entry_size(&mut total_size, entry.size())?;

dest.write_sized(&path, size, &mut |buffer| {
entry.read_exact(buffer).map_err(ContainerError::Io)?;
Ok(())
})?;
}

Ok(())
}
}

impl<W: Write + Seek> ArchiveWriter for XzWriter<W> {
fn write_entry(&mut self, path: &str, bytes: &[u8]) -> Result<()> {
validate_path(path)?;
let tar = self.tar.as_mut().ok_or_else(|| ContainerError::Codec("XzWriter already finished".into()))?;
let mut header = tar::Header::new_gnu();
header.set_path(path).map_err(|error| ContainerError::Codec(format!("tar: invalid path {path}: {error}")))?;
header.set_size(bytes.len() as u64);
header.set_mode(0o644);
header.set_cksum();
tar.append(&header, bytes)?;
Ok(())
}

fn finish(mut self) -> Result<()> {
self.finish_inner()?;
Ok(())
}
}

impl<W: Write + Seek> XzWriter<W> {
/// Finish the archive and return the underlying sink, for in-memory archives where the caller
/// wants the written bytes (e.g. `Cursor<Vec<u8>>`) back.
pub fn finish_into(mut self) -> Result<W> {
self.finish_inner()
}

/// Unwind the layered writers in order (flush the tar trailer, then finish xz) and hand back the
/// innermost sink. Shared by `finish` and `finish_into`.
fn finish_inner(&mut self) -> Result<W> {
let mut tar = self.tar.take().ok_or_else(|| ContainerError::Codec("XzWriter already finished".into()))?;
tar.finish()?;
let xz_writer = tar.into_inner()?;
xz_writer.finish().map_err(lzma_err)
}
}

fn lzma_err(error: std::io::Error) -> ContainerError {
ContainerError::Codec(format!("lzma: {error}"))
}
Loading
Loading