Compare commits

..

3 commits

Author SHA1 Message Date
6f307c139b
feat: add backwards compatibility for legacy msgpack storage format 2026-02-10 21:09:58 +00:00
a6b1a027c8
refactor: switch storage format from MessagePack to bincode
- Replace rmp-serde with bincode 1.x in Cargo.toml
- Update store.rs serialization/deserialization and ID hashing
- Rename model.rs helpers from to_msgpack/from_msgpack to to_bytes/from_bytes
- Consolidate MsgPack/MsgPackDecode error variants into single Bincode variant
- Remove skip_serializing_if on ssh_signature (incompatible with bincode)
- Update all documentation to reflect bincode storage format
2026-02-10 21:03:53 +00:00
adcdaa20c6
feat: add corruption prevention and integrity checking
- Add read-time validation of commit objects in store.rs (verify
  delta IDs, commit IDs, parent references)
- Enable zstd frame checksums on write
- Add directory fsync after atomic renames for durability
- Validate delta chain consistency in materialize_committed_tree
- Reject unsupported Patch delta variants in apply_delta
- Defer ref updates in graft to avoid dangling references
- Add 'arc check' command (fsck) with orphan detection
- Add CorruptObject and UnsupportedDelta error variants
2026-02-10 21:03:22 +00:00
16 changed files with 686 additions and 49 deletions

10
Cargo.lock generated
View file

@ -56,6 +56,7 @@ dependencies = [
name = "arc" name = "arc"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"bincode",
"clap", "clap",
"colored", "colored",
"git2", "git2",
@ -87,6 +88,15 @@ version = "1.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
[[package]]
name = "bincode"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.10.0" version = "2.10.0"

View file

@ -7,6 +7,7 @@ edition = "2024"
clap = { version = "4", features = ["derive"] } clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] } serde = { version = "1", features = ["derive"] }
serde_yaml = "0.9" serde_yaml = "0.9"
bincode = "1"
rmp-serde = "1" rmp-serde = "1"
zstd = "0.13" zstd = "0.13"
sha2 = "0.10" sha2 = "0.10"

View file

@ -6,7 +6,7 @@
A delta-based version control system written in Rust. A delta-based version control system written in Rust.
Unlike Git's snapshot-based model, Arc stores incremental deltas using Unlike Git's snapshot-based model, Arc stores incremental deltas using
ZSTD-compressed MessagePack files. Changes are automatically tracked ZSTD-compressed bincode files. Changes are automatically tracked
without manual staging, and commits are immutable once created. without manual staging, and commits are immutable once created.
Arc uses a **bookmark** system instead of branches, and bridges to Git Arc uses a **bookmark** system instead of branches, and bridges to Git
@ -14,7 +14,7 @@ remotes for push, pull, clone, and sync operations via `libgit2`.
## Features ## Features
- Incremental delta storage (ZSTD + MessagePack) - Incremental delta storage (ZSTD + bincode)
- Automatic change tracking (no staging step) - Automatic change tracking (no staging step)
- Bookmarks and immutable tags - Bookmarks and immutable tags
- Named stashes - Named stashes

View file

@ -11,7 +11,7 @@ An arc repository keeps all state in an `.arc/` directory at the worktree root:
|------|--------|---------| |------|--------|---------|
| `HEAD` | YAML | Current state — one of three variants: **unborn** (no commits yet; has `bookmark`), **attached** (on a bookmark; has `bookmark` + `commit`), or **detached** (raw commit; has `commit`). | | `HEAD` | YAML | Current state — one of three variants: **unborn** (no commits yet; has `bookmark`), **attached** (on a bookmark; has `bookmark` + `commit`), or **detached** (raw commit; has `commit`). |
| `config.yml` | YAML | Local repository configuration. | | `config.yml` | YAML | Local repository configuration. |
| `commits/<id>.zst` | Zstandard-compressed MessagePack | Commit objects. Each file contains a `CommitObject` that bundles a `Commit` and its `Delta`. | | `commits/<id>.zst` | Zstandard-compressed bincode | Commit objects. Each file contains a `CommitObject` that bundles a `Commit` and its `Delta`. |
| `bookmarks/<name>.yml` | YAML | One file per bookmark. Contains a `RefTarget` with an optional `commit` field. | | `bookmarks/<name>.yml` | YAML | One file per bookmark. Contains a `RefTarget` with an optional `commit` field. |
| `tags/<name>.yml` | YAML | Same format as bookmarks. | | `tags/<name>.yml` | YAML | Same format as bookmarks. |
| `stashes/state.yml` | YAML | Tracks the active stash. | | `stashes/state.yml` | YAML | Tracks the active stash. |
@ -42,9 +42,9 @@ hex hashes.
## Storage (`src/store.rs`) ## Storage (`src/store.rs`)
`CommitObject` bundles a `Commit` and its `Delta` into a single unit that is `CommitObject` bundles a `Commit` and its `Delta` into a single unit that is
serialized as MessagePack, then compressed with Zstandard at level 3. Files are serialized with bincode, then compressed with Zstandard at level 3. Files are
written atomically (write to `.tmp`, then rename). IDs are computed by SHA-256 written atomically (write to `.tmp`, then rename). IDs are computed by SHA-256
hashing the MessagePack-serialized content-addressable data. hashing the bincode-serialized content-addressable data.
## Tracking (`src/tracking.rs`) ## Tracking (`src/tracking.rs`)

View file

@ -14,7 +14,7 @@ Initialize a new arc repository. Creates the `.arc/` directory structure includi
### `arc commit <message>` ### `arc commit <message>`
Commit all current changes. No staging area is needed — changes are detected automatically by comparing the worktree to the last commit. Creates a ZSTD-compressed MessagePack commit object in `.arc/commits/`. If a signing key is configured (`user.key`), the commit is signed with SSH. Commit all current changes. No staging area is needed — changes are detected automatically by comparing the worktree to the last commit. Creates a ZSTD-compressed bincode commit object in `.arc/commits/`. If a signing key is configured (`user.key`), the commit is signed with SSH.
### `arc status` ### `arc status`

View file

@ -1,6 +1,6 @@
# Git Bridge # Git Bridge
Arc uses an internal git bridge to interoperate with git remotes. Since Arc uses its own delta-based storage format (ZSTD-compressed MessagePack), it maintains a shadow bare git repository to translate between formats when communicating with git servers. Arc uses an internal git bridge to interoperate with git remotes. Since Arc uses its own delta-based storage format (ZSTD-compressed bincode), it maintains a shadow bare git repository to translate between formats when communicating with git servers.
## Shadow Repository ## Shadow Repository

View file

@ -20,7 +20,7 @@ This is an overview of the foundational rules that make the software.
8a. use `feat: <message>` for new features, `fix: <message>` for bug fixes, `refactor: <message>` for changes. 8a. use `feat: <message>` for new features, `fix: <message>` for bug fixes, `refactor: <message>` for changes.
8b. use `docs: <message>` for docs changes, `build: <message>` for build system changes, etc. 8b. use `docs: <message>` for docs changes, `build: <message>` for build system changes, etc.
9. Anything involving remotes should use `libgit` or `git2` libraries for compatibility. 9. Anything involving remotes should use `libgit` or `git2` libraries for compatibility.
10. Deltas should be stored using ZSTD compressed Messagepack files for easy storage. 10. Deltas should be stored using ZSTD compressed bincode files for easy storage.
11. When pushing, pulling, and fetching from remotes, it should be bridged to git. 11. When pushing, pulling, and fetching from remotes, it should be bridged to git.
12. Lastly, it should cover 90% of use cases that git has, for full feature support. 12. Lastly, it should cover 90% of use cases that git has, for full feature support.
13. Arc should support **optional** commit signing via SSH keys. 13. Arc should support **optional** commit signing via SSH keys.
@ -117,7 +117,7 @@ These are the implementation phases that should be implemented incrementally.
1. **Project scaffolding** - Nix flake, direnv, Rust project structure, CLI skeleton with clap, help 1. **Project scaffolding** - Nix flake, direnv, Rust project structure, CLI skeleton with clap, help
2. **Core repo structure** - init, internal data model (commits, deltas, YAML config), .arcignore 2. **Core repo structure** - init, internal data model (commits, deltas, YAML config), .arcignore
3. **Tracking & committing** - commit, status, diff, auto-change detection, ZSTD + MessagePack storage 3. **Tracking & committing** - commit, status, diff, auto-change detection, ZSTD + bincode storage
4. **History & inspection** - log, show, history, state reconstruction from delta chains 4. **History & inspection** - log, show, history, state reconstruction from delta chains
5. **Bookmarks & tags** - mark commands, tag commands, and switch command 5. **Bookmarks & tags** - mark commands, tag commands, and switch command
6. **Undo & modification** - revert, reset, graft, three-way merge 6. **Undo & modification** - revert, reset, graft, three-way merge

194
src/check.rs Normal file
View file

@ -0,0 +1,194 @@
use std::collections::HashSet;
use std::fs;
use crate::error::Result;
use crate::model::{CommitId, RefTarget};
use crate::repo::Repository;
use crate::store;
use crate::tracking;
use crate::ui;
pub struct CheckReport {
pub commits_checked: usize,
pub refs_checked: usize,
pub errors: Vec<String>,
}
impl CheckReport {
pub fn is_ok(&self) -> bool {
self.errors.is_empty()
}
}
impl std::fmt::Display for CheckReport {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.is_ok() {
writeln!(
f,
"{}",
ui::success(&format!(
"repository ok: {} commit(s), {} ref(s) checked",
self.commits_checked, self.refs_checked
))
)
} else {
for err in &self.errors {
writeln!(f, "{}", ui::error(err))?;
}
writeln!(
f,
"\n{} error(s) found in {} commit(s), {} ref(s)",
self.errors.len(),
self.commits_checked,
self.refs_checked
)
}
}
}
pub fn check(repo: &Repository) -> Result<CheckReport> {
debug!(1, "running repository integrity check");
let mut errors = Vec::new();
let mut visited = HashSet::new();
let mut refs_checked = 0usize;
let bookmark_ids = collect_ref_targets(repo, &repo.bookmarks_dir(), &mut errors);
refs_checked += bookmark_ids.len();
let tag_ids = collect_ref_targets(repo, &repo.tags_dir(), &mut errors);
refs_checked += tag_ids.len();
let head = match repo.load_head() {
Ok(h) => Some(h),
Err(e) => {
errors.push(format!("failed to load HEAD: {e}"));
None
}
};
let head_commit = match &head {
Some(crate::model::Head::Attached { commit, .. }) => Some(commit.clone()),
Some(crate::model::Head::Detached { commit }) => Some(commit.clone()),
_ => None,
};
let mut all_roots: Vec<CommitId> = Vec::new();
if let Some(id) = head_commit {
all_roots.push(id);
}
all_roots.extend(bookmark_ids);
all_roots.extend(tag_ids);
for root in &all_roots {
walk_commits(repo, root, &mut visited, &mut errors);
}
if let Some(tip) = all_roots.first() {
debug!(2, "verifying delta chain replay from HEAD");
if let Err(e) = tracking::materialize_committed_tree(repo, tip) {
errors.push(format!("delta chain replay failed: {e}"));
}
}
let commits_checked = visited.len();
let orphans = find_orphan_files(repo, &visited);
for orphan in &orphans {
errors.push(format!("orphan commit object: {orphan}"));
}
debug!(
1,
"check complete: {} commit(s), {} ref(s), {} error(s)",
commits_checked,
refs_checked,
errors.len()
);
Ok(CheckReport {
commits_checked,
refs_checked,
errors,
})
}
fn collect_ref_targets(
repo: &Repository,
dir: &std::path::Path,
errors: &mut Vec<String>,
) -> Vec<CommitId> {
let mut ids = Vec::new();
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return ids,
};
for entry in entries.flatten() {
if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
continue;
}
let name = entry.file_name().to_string_lossy().to_string();
let path = entry.path();
match fs::read_to_string(&path) {
Ok(contents) => match serde_yaml::from_str::<RefTarget>(&contents) {
Ok(ref_target) => {
if let Some(id) = ref_target.commit {
if !store::commit_object_path(repo, &id).exists() {
errors.push(format!("ref '{}' points to missing commit {}", name, id));
}
ids.push(id);
}
}
Err(e) => errors.push(format!("ref '{}' has invalid format: {}", name, e)),
},
Err(e) => errors.push(format!("cannot read ref '{}': {}", name, e)),
}
}
ids
}
fn walk_commits(
repo: &Repository,
start: &CommitId,
visited: &mut HashSet<String>,
errors: &mut Vec<String>,
) {
let mut queue = vec![start.clone()];
while let Some(id) = queue.pop() {
if !visited.insert(id.0.clone()) {
continue;
}
match store::read_commit_object(repo, &id) {
Ok(obj) => {
for parent in &obj.commit.parents {
queue.push(parent.clone());
}
}
Err(e) => {
errors.push(format!("commit {}: {}", &id.0[..id.0.len().min(12)], e));
}
}
}
}
fn find_orphan_files(repo: &Repository, reachable: &HashSet<String>) -> Vec<String> {
let dir = repo.commits_dir();
let entries = match fs::read_dir(&dir) {
Ok(e) => e,
Err(_) => return Vec::new(),
};
let mut orphans = Vec::new();
for entry in entries.flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if let Some(id) = name.strip_suffix(".zst")
&& !reachable.contains(id)
{
orphans.push(id.to_string());
}
}
orphans.sort();
orphans
}

View file

@ -5,6 +5,7 @@ use std::sync::atomic::{AtomicU8, Ordering};
use clap::{ArgAction, Parser, Subcommand}; use clap::{ArgAction, Parser, Subcommand};
use crate::bridge; use crate::bridge;
use crate::check;
use crate::config; use crate::config;
use crate::diff; use crate::diff;
use crate::ignore::IgnoreRules; use crate::ignore::IgnoreRules;
@ -134,6 +135,9 @@ pub enum Command {
/// Convert a git repo to an arc repo /// Convert a git repo to an arc repo
Migrate, Migrate,
/// Verify repository integrity
Check,
/// Manage bookmarks /// Manage bookmarks
Mark { Mark {
#[command(subcommand)] #[command(subcommand)]
@ -580,6 +584,22 @@ pub fn dispatch(cli: Cli) {
} }
} }
} }
Command::Check => {
debug!(1, "command: check");
let repo = open_repo_or_exit();
match check::check(&repo) {
Ok(report) => {
print!("{report}");
if !report.errors.is_empty() {
std::process::exit(1);
}
}
Err(e) => {
eprintln!("{}", ui::error(&e.to_string()));
std::process::exit(1);
}
}
}
Command::Mark { command } => { Command::Mark { command } => {
debug!(1, "command: mark"); debug!(1, "command: mark");
let repo = open_repo_or_exit(); let repo = open_repo_or_exit();
@ -882,11 +902,11 @@ fn run_diff(repo: &Repository, range: Option<&str>) -> crate::error::Result<Stri
let resolved = resolve::parse_and_resolve_range(repo, Some(spec))?; let resolved = resolve::parse_and_resolve_range(repo, Some(spec))?;
let mut old_tree = BTreeMap::new(); let mut old_tree = BTreeMap::new();
for obj in &resolved.chain[..=resolved.start_idx] { for obj in &resolved.chain[..=resolved.start_idx] {
tracking::apply_delta(&mut old_tree, &obj.delta); tracking::apply_delta(&mut old_tree, &obj.delta)?;
} }
let mut new_tree = old_tree.clone(); let mut new_tree = old_tree.clone();
for obj in &resolved.chain[resolved.start_idx + 1..] { for obj in &resolved.chain[resolved.start_idx + 1..] {
tracking::apply_delta(&mut new_tree, &obj.delta); tracking::apply_delta(&mut new_tree, &obj.delta)?;
} }
let changes = tracking::detect_changes(&old_tree, &new_tree); let changes = tracking::detect_changes(&old_tree, &new_tree);
Ok(diff::render_diff(&old_tree, &changes)) Ok(diff::render_diff(&old_tree, &changes))

View file

@ -5,8 +5,7 @@ use std::io;
pub enum ArcError { pub enum ArcError {
Io(io::Error), Io(io::Error),
Yaml(serde_yaml::Error), Yaml(serde_yaml::Error),
MsgPack(rmp_serde::encode::Error), Bincode(Box<bincode::ErrorKind>),
MsgPackDecode(rmp_serde::decode::Error),
RepoNotFound, RepoNotFound,
RepoAlreadyExists, RepoAlreadyExists,
InvalidPath(String), InvalidPath(String),
@ -41,6 +40,8 @@ pub enum ArcError {
NotAGitRepo, NotAGitRepo,
FastForwardOnly(String), FastForwardOnly(String),
SigningError(String), SigningError(String),
CorruptObject(String),
UnsupportedDelta(String),
} }
impl fmt::Display for ArcError { impl fmt::Display for ArcError {
@ -48,8 +49,7 @@ impl fmt::Display for ArcError {
match self { match self {
Self::Io(e) => write!(f, "io error: {e}"), Self::Io(e) => write!(f, "io error: {e}"),
Self::Yaml(e) => write!(f, "yaml error: {e}"), Self::Yaml(e) => write!(f, "yaml error: {e}"),
Self::MsgPack(e) => write!(f, "msgpack encode error: {e}"), Self::Bincode(e) => write!(f, "bincode error: {e}"),
Self::MsgPackDecode(e) => write!(f, "msgpack decode error: {e}"),
Self::RepoNotFound => write!(f, "not an arc repository (or any parent)"), Self::RepoNotFound => write!(f, "not an arc repository (or any parent)"),
Self::RepoAlreadyExists => { Self::RepoAlreadyExists => {
write!(f, "arc repository already exists in this directory") write!(f, "arc repository already exists in this directory")
@ -97,6 +97,8 @@ impl fmt::Display for ArcError {
Self::NotAGitRepo => write!(f, "not a git repository"), Self::NotAGitRepo => write!(f, "not a git repository"),
Self::FastForwardOnly(reason) => write!(f, "cannot fast-forward: {reason}"), Self::FastForwardOnly(reason) => write!(f, "cannot fast-forward: {reason}"),
Self::SigningError(msg) => write!(f, "signing error: {msg}"), Self::SigningError(msg) => write!(f, "signing error: {msg}"),
Self::CorruptObject(msg) => write!(f, "corrupt object: {msg}"),
Self::UnsupportedDelta(msg) => write!(f, "unsupported delta format: {msg}"),
} }
} }
} }
@ -115,15 +117,9 @@ impl From<serde_yaml::Error> for ArcError {
} }
} }
impl From<rmp_serde::encode::Error> for ArcError { impl From<Box<bincode::ErrorKind>> for ArcError {
fn from(e: rmp_serde::encode::Error) -> Self { fn from(e: Box<bincode::ErrorKind>) -> Self {
Self::MsgPack(e) Self::Bincode(e)
}
}
impl From<rmp_serde::decode::Error> for ArcError {
fn from(e: rmp_serde::decode::Error) -> Self {
Self::MsgPackDecode(e)
} }
} }

View file

@ -2,6 +2,7 @@
pub mod ui; pub mod ui;
pub mod bridge; pub mod bridge;
pub mod check;
mod cli; mod cli;
pub mod config; pub mod config;
pub mod diff; pub mod diff;

View file

@ -40,7 +40,6 @@ pub struct Commit {
pub message: String, pub message: String,
pub author: Option<Signature>, pub author: Option<Signature>,
pub timestamp: i64, pub timestamp: i64,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub ssh_signature: Option<String>, pub ssh_signature: Option<String>,
} }
@ -94,21 +93,21 @@ pub struct RefTarget {
} }
impl Commit { impl Commit {
pub fn to_msgpack(&self) -> crate::error::Result<Vec<u8>> { pub fn to_bytes(&self) -> crate::error::Result<Vec<u8>> {
Ok(rmp_serde::to_vec(self)?) Ok(bincode::serialize(self)?)
} }
pub fn from_msgpack(bytes: &[u8]) -> crate::error::Result<Self> { pub fn from_bytes(bytes: &[u8]) -> crate::error::Result<Self> {
Ok(rmp_serde::from_slice(bytes)?) Ok(bincode::deserialize(bytes)?)
} }
} }
impl Delta { impl Delta {
pub fn to_msgpack(&self) -> crate::error::Result<Vec<u8>> { pub fn to_bytes(&self) -> crate::error::Result<Vec<u8>> {
Ok(rmp_serde::to_vec(self)?) Ok(bincode::serialize(self)?)
} }
pub fn from_msgpack(bytes: &[u8]) -> crate::error::Result<Self> { pub fn from_bytes(bytes: &[u8]) -> crate::error::Result<Self> {
Ok(rmp_serde::from_slice(bytes)?) Ok(bincode::deserialize(bytes)?)
} }
} }

View file

@ -195,7 +195,7 @@ pub fn graft(repo: &Repository, target: &str, onto: &str) -> Result<Vec<CommitId
let short_id = &obj.commit.id.0[..obj.commit.id.0.len().min(12)]; let short_id = &obj.commit.id.0[..obj.commit.id.0.len().min(12)];
let message = format!("graft {short_id}: {}", obj.commit.message); let message = format!("graft {short_id}: {}", obj.commit.message);
let new_id = commit_tree_internal(repo, &message, vec![current_tip], &outcome.tree)?; let new_id = write_commit_object_only(repo, &message, vec![current_tip], &outcome.tree)?;
current_tip = new_id.clone(); current_tip = new_id.clone();
current_tree = outcome.tree; current_tree = outcome.tree;
@ -317,6 +317,63 @@ fn commit_tree(
Ok(id) Ok(id)
} }
fn write_commit_object_only(
repo: &Repository,
message: &str,
parents: Vec<CommitId>,
new_tree: &FileTree,
) -> Result<CommitId> {
let parent_tree = if parents.is_empty() {
BTreeMap::new()
} else {
tracking::materialize_committed_tree(repo, &parents[0])?
};
let changes = tracking::detect_changes(&parent_tree, new_tree);
if changes.is_empty() {
return Err(ArcError::NothingToCommit);
}
let delta_id = store::compute_delta_id(&parents.first().cloned(), &changes)?;
let delta = Delta {
id: delta_id.clone(),
base: parents.first().cloned(),
changes,
};
let config = crate::config::load_effective(repo);
let author = match (config.user_name, config.user_email) {
(Some(name), Some(email)) => Some(crate::model::Signature { name, email }),
_ => None,
};
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map_err(|_| ArcError::ClockError)?
.as_secs() as i64;
let commit_id = store::compute_commit_id(&parents, &delta_id, message, &author, timestamp)?;
let commit_obj = crate::model::Commit {
id: commit_id.clone(),
parents: parents.clone(),
delta: delta_id,
message: message.to_string(),
author,
timestamp,
ssh_signature: None,
};
let obj = CommitObject {
commit: commit_obj,
delta,
};
store::write_commit_object(repo, &obj)?;
Ok(commit_id)
}
fn commit_tree_internal( fn commit_tree_internal(
repo: &Repository, repo: &Repository,
message: &str, message: &str,

View file

@ -5,7 +5,7 @@ use std::path::PathBuf;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use crate::error::Result; use crate::error::{ArcError, Result};
use crate::model::{Commit, CommitId, Delta, DeltaId, FileChange, Signature}; use crate::model::{Commit, CommitId, Delta, DeltaId, FileChange, Signature};
use crate::repo::Repository; use crate::repo::Repository;
@ -15,15 +15,73 @@ pub struct CommitObject {
pub delta: Delta, pub delta: Delta,
} }
mod legacy {
use serde::{Deserialize, Serialize};
use crate::model::{CommitId, Delta, DeltaId, FileChange, Signature};
#[derive(Deserialize)]
pub struct LegacyCommit {
pub id: CommitId,
pub parents: Vec<CommitId>,
pub delta: DeltaId,
pub message: String,
pub author: Option<Signature>,
pub timestamp: i64,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub ssh_signature: Option<String>,
}
#[derive(Deserialize)]
pub struct LegacyCommitObject {
pub commit: LegacyCommit,
pub delta: Delta,
}
#[derive(Serialize)]
pub struct DeltaForHash<'a> {
pub base: &'a Option<CommitId>,
pub changes: &'a [FileChange],
}
#[derive(Serialize)]
pub struct CommitForHash<'a> {
pub parents: &'a [CommitId],
pub delta: &'a DeltaId,
pub message: &'a str,
pub author: &'a Option<Signature>,
pub timestamp: i64,
}
impl LegacyCommitObject {
pub fn into_commit_object(self) -> super::CommitObject {
super::CommitObject {
commit: crate::model::Commit {
id: self.commit.id,
parents: self.commit.parents,
delta: self.commit.delta,
message: self.commit.message,
author: self.commit.author,
timestamp: self.commit.timestamp,
ssh_signature: self.commit.ssh_signature,
},
delta: self.delta,
}
}
}
}
pub fn commit_object_path(repo: &Repository, id: &CommitId) -> PathBuf { pub fn commit_object_path(repo: &Repository, id: &CommitId) -> PathBuf {
repo.commits_dir().join(format!("{}.zst", id.0)) repo.commits_dir().join(format!("{}.zst", id.0))
} }
pub fn write_commit_object(repo: &Repository, obj: &CommitObject) -> Result<()> { pub fn write_commit_object(repo: &Repository, obj: &CommitObject) -> Result<()> {
debug!(3, "writing commit object {}", obj.commit.id.0); debug!(3, "writing commit object {}", obj.commit.id.0);
let msgpack = rmp_serde::to_vec(obj)?; let encoded = bincode::serialize(obj)?;
let compressed = let mut encoder = zstd::Encoder::new(Vec::new(), 3).map_err(std::io::Error::other)?;
zstd::stream::encode_all(Cursor::new(&msgpack), 3).map_err(std::io::Error::other)?; encoder.include_checksum(true).map_err(std::io::Error::other)?;
encoder.write_all(&encoded)?;
let compressed = encoder.finish().map_err(std::io::Error::other)?;
let path = commit_object_path(repo, &obj.commit.id); let path = commit_object_path(repo, &obj.commit.id);
let tmp_path = path.with_extension("zst.tmp"); let tmp_path = path.with_extension("zst.tmp");
@ -31,6 +89,8 @@ pub fn write_commit_object(repo: &Repository, obj: &CommitObject) -> Result<()>
f.write_all(&compressed)?; f.write_all(&compressed)?;
f.sync_all()?; f.sync_all()?;
fs::rename(&tmp_path, &path)?; fs::rename(&tmp_path, &path)?;
let parent_dir = std::fs::File::open(repo.commits_dir())?;
parent_dir.sync_all()?;
Ok(()) Ok(())
} }
@ -40,12 +100,142 @@ pub fn read_commit_object(repo: &Repository, id: &CommitId) -> Result<CommitObje
let compressed = fs::read(&path)?; let compressed = fs::read(&path)?;
let mut decoder = let mut decoder =
zstd::stream::Decoder::new(Cursor::new(&compressed)).map_err(std::io::Error::other)?; zstd::stream::Decoder::new(Cursor::new(&compressed)).map_err(std::io::Error::other)?;
let mut msgpack = Vec::new(); let mut decoded = Vec::new();
decoder decoder
.read_to_end(&mut msgpack) .read_to_end(&mut decoded)
.map_err(std::io::Error::other)?; .map_err(std::io::Error::other)?;
let obj: CommitObject = rmp_serde::from_slice(&msgpack)?;
match bincode::deserialize::<CommitObject>(&decoded) {
Ok(obj) => {
validate_commit_object(&obj, id)?;
Ok(obj) Ok(obj)
}
Err(bincode_err) => {
debug!(3, "bincode failed, trying legacy msgpack for {}", id.0);
match rmp_serde::from_slice::<legacy::LegacyCommitObject>(&decoded) {
Ok(legacy_obj) => {
let obj = legacy_obj.into_commit_object();
validate_legacy_commit_object(&obj, id)?;
Ok(obj)
}
Err(msgpack_err) => Err(ArcError::CorruptObject(format!(
"failed to decode object (bincode: {bincode_err}, msgpack: {msgpack_err})"
))),
}
}
}
}
fn validate_commit_object(obj: &CommitObject, id: &CommitId) -> Result<()> {
let expected_delta_id = compute_delta_id(&obj.delta.base, &obj.delta.changes)?;
if expected_delta_id != obj.delta.id {
return Err(ArcError::CorruptObject(format!(
"delta id mismatch: expected {}, found {}",
expected_delta_id, obj.delta.id
)));
}
if obj.commit.delta != obj.delta.id {
return Err(ArcError::CorruptObject(format!(
"commit references delta {}, but object contains delta {}",
obj.commit.delta, obj.delta.id
)));
}
let expected_commit_id = compute_commit_id(
&obj.commit.parents,
&obj.delta.id,
&obj.commit.message,
&obj.commit.author,
obj.commit.timestamp,
)?;
if expected_commit_id != obj.commit.id {
return Err(ArcError::CorruptObject(format!(
"commit id mismatch: expected {}, found {}",
expected_commit_id, obj.commit.id
)));
}
if obj.commit.id != *id {
return Err(ArcError::CorruptObject(format!(
"commit id does not match expected id: expected {}, found {}",
id, obj.commit.id
)));
}
if obj.delta.base != obj.commit.parents.first().cloned() {
return Err(ArcError::CorruptObject(format!(
"delta base {:?} does not match first parent {:?}",
obj.delta.base,
obj.commit.parents.first()
)));
}
Ok(())
}
fn validate_legacy_commit_object(obj: &CommitObject, id: &CommitId) -> Result<()> {
let expected_delta_id = compute_legacy_delta_id(&obj.delta.base, &obj.delta.changes)?;
if expected_delta_id != obj.delta.id {
return Err(ArcError::CorruptObject(format!(
"delta id mismatch: expected {}, found {}",
expected_delta_id, obj.delta.id
)));
}
if obj.commit.delta != obj.delta.id {
return Err(ArcError::CorruptObject(format!(
"commit references delta {}, but object contains delta {}",
obj.commit.delta, obj.delta.id
)));
}
let expected_commit_id = compute_legacy_commit_id(
&obj.commit.parents,
&obj.delta.id,
&obj.commit.message,
&obj.commit.author,
obj.commit.timestamp,
)?;
if expected_commit_id != obj.commit.id {
return Err(ArcError::CorruptObject(format!(
"commit id mismatch: expected {}, found {}",
expected_commit_id, obj.commit.id
)));
}
if obj.commit.id != *id {
return Err(ArcError::CorruptObject(format!(
"commit id does not match expected id: expected {}, found {}",
id, obj.commit.id
)));
}
if obj.delta.base != obj.commit.parents.first().cloned() {
return Err(ArcError::CorruptObject(format!(
"delta base {:?} does not match first parent {:?}",
obj.delta.base,
obj.commit.parents.first()
)));
}
Ok(())
}
fn compute_legacy_delta_id(base: &Option<CommitId>, changes: &[FileChange]) -> Result<DeltaId> {
let hashable = legacy::DeltaForHash { base, changes };
let bytes = rmp_serde::to_vec(&hashable)
.map_err(|e| ArcError::HashError(e.to_string()))?;
Ok(DeltaId(sha256_hex(&bytes)))
}
fn compute_legacy_commit_id(
parents: &[CommitId],
delta: &DeltaId,
message: &str,
author: &Option<Signature>,
timestamp: i64,
) -> Result<CommitId> {
let hashable = legacy::CommitForHash {
parents,
delta,
message,
author,
timestamp,
};
let bytes = rmp_serde::to_vec(&hashable)
.map_err(|e| ArcError::HashError(e.to_string()))?;
Ok(CommitId(sha256_hex(&bytes)))
} }
fn sha256_hex(bytes: &[u8]) -> String { fn sha256_hex(bytes: &[u8]) -> String {
@ -72,7 +262,7 @@ struct CommitForHash<'a> {
pub fn compute_delta_id(base: &Option<CommitId>, changes: &[FileChange]) -> Result<DeltaId> { pub fn compute_delta_id(base: &Option<CommitId>, changes: &[FileChange]) -> Result<DeltaId> {
debug!(3, "computing delta id (base: {:?})", base); debug!(3, "computing delta id (base: {:?})", base);
let hashable = DeltaForHash { base, changes }; let hashable = DeltaForHash { base, changes };
let bytes = rmp_serde::to_vec(&hashable) let bytes = bincode::serialize(&hashable)
.map_err(|e| crate::error::ArcError::HashError(e.to_string()))?; .map_err(|e| crate::error::ArcError::HashError(e.to_string()))?;
Ok(DeltaId(sha256_hex(&bytes))) Ok(DeltaId(sha256_hex(&bytes)))
} }
@ -92,7 +282,7 @@ pub fn compute_commit_id(
author, author,
timestamp, timestamp,
}; };
let bytes = rmp_serde::to_vec(&hashable) let bytes = bincode::serialize(&hashable)
.map_err(|e| crate::error::ArcError::HashError(e.to_string()))?; .map_err(|e| crate::error::ArcError::HashError(e.to_string()))?;
Ok(CommitId(sha256_hex(&bytes))) Ok(CommitId(sha256_hex(&bytes)))
} }

View file

@ -66,8 +66,16 @@ pub fn materialize_committed_tree(repo: &Repository, head: &CommitId) -> Result<
debug!(3, "materializing tree at commit {}", head.0); debug!(3, "materializing tree at commit {}", head.0);
let history = load_linear_history(repo, head)?; let history = load_linear_history(repo, head)?;
let mut tree = BTreeMap::new(); let mut tree = BTreeMap::new();
let mut expected_base: Option<crate::model::CommitId> = None;
for obj in &history { for obj in &history {
apply_delta(&mut tree, &obj.delta); if obj.delta.base != expected_base {
return Err(crate::error::ArcError::CorruptObject(format!(
"delta chain broken at commit {}",
obj.commit.id.0
)));
}
apply_delta(&mut tree, &obj.delta)?;
expected_base = Some(obj.commit.id.clone());
} }
debug!(3, "materialized tree with {} file(s)", tree.len()); debug!(3, "materialized tree with {} file(s)", tree.len());
Ok(tree) Ok(tree)
@ -93,14 +101,20 @@ pub fn load_linear_history(repo: &Repository, head: &CommitId) -> Result<Vec<Com
Ok(chain) Ok(chain)
} }
pub fn apply_delta(tree: &mut FileTree, delta: &Delta) { pub fn apply_delta(tree: &mut FileTree, delta: &Delta) -> crate::error::Result<()> {
for change in &delta.changes { for change in &delta.changes {
match &change.kind { match &change.kind {
FileChangeKind::Add { content } | FileChangeKind::Modify { content } => { FileChangeKind::Add { content } | FileChangeKind::Modify { content } => match content {
if let FileContentDelta::Full { bytes } = content { FileContentDelta::Full { bytes } => {
tree.insert(change.path.clone(), bytes.clone()); tree.insert(change.path.clone(), bytes.clone());
} }
FileContentDelta::Patch { format, .. } => {
return Err(crate::error::ArcError::UnsupportedDelta(format!(
"patch format '{}' on file '{}'",
format, change.path
)));
} }
},
FileChangeKind::Delete => { FileChangeKind::Delete => {
tree.remove(&change.path); tree.remove(&change.path);
} }
@ -111,6 +125,7 @@ pub fn apply_delta(tree: &mut FileTree, delta: &Delta) {
} }
} }
} }
Ok(())
} }
pub fn detect_changes(committed: &FileTree, worktree: &FileTree) -> Vec<FileChange> { pub fn detect_changes(committed: &FileTree, worktree: &FileTree) -> Vec<FileChange> {

154
tests/check.rs Normal file
View file

@ -0,0 +1,154 @@
use std::process::Command;
use tempfile::TempDir;
fn arc_cmd() -> Command {
let mut cmd = Command::new(env!("CARGO_BIN_EXE_arc"));
cmd.env("NO_COLOR", "1");
cmd
}
fn init_repo() -> TempDir {
let dir = TempDir::new().unwrap();
arc_cmd()
.arg("init")
.current_dir(dir.path())
.output()
.expect("failed to init");
dir
}
fn commit_file(dir: &TempDir, name: &str, content: &str, msg: &str) {
std::fs::write(dir.path().join(name), content).unwrap();
let output = arc_cmd()
.args(["commit", msg])
.current_dir(dir.path())
.output()
.expect("failed to commit");
assert!(output.status.success());
}
#[test]
fn check_clean_repo_succeeds() {
let dir = init_repo();
commit_file(&dir, "a.txt", "hello\n", "initial");
let output = arc_cmd()
.arg("check")
.current_dir(dir.path())
.output()
.expect("failed to run check");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("repository ok"));
}
#[test]
fn check_multi_commit_repo() {
let dir = init_repo();
commit_file(&dir, "a.txt", "hello\n", "first");
commit_file(&dir, "b.txt", "world\n", "second");
commit_file(&dir, "a.txt", "updated\n", "third");
let output = arc_cmd()
.arg("check")
.current_dir(dir.path())
.output()
.expect("failed to run check");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("3 commit(s)"));
}
#[test]
fn check_detects_corrupt_commit_file() {
let dir = init_repo();
commit_file(&dir, "a.txt", "hello\n", "initial");
let commits_dir = dir.path().join(".arc").join("commits");
let entries: Vec<_> = std::fs::read_dir(&commits_dir)
.unwrap()
.flatten()
.collect();
assert_eq!(entries.len(), 1);
let commit_path = entries[0].path();
std::fs::write(&commit_path, b"corrupted data").unwrap();
let output = arc_cmd()
.arg("check")
.current_dir(dir.path())
.output()
.expect("failed to run check");
assert!(!output.status.success());
}
#[test]
fn check_detects_missing_commit_from_ref() {
let dir = init_repo();
commit_file(&dir, "a.txt", "hello\n", "initial");
let commits_dir = dir.path().join(".arc").join("commits");
let entries: Vec<_> = std::fs::read_dir(&commits_dir)
.unwrap()
.flatten()
.collect();
for entry in entries {
std::fs::remove_file(entry.path()).unwrap();
}
let output = arc_cmd()
.arg("check")
.current_dir(dir.path())
.output()
.expect("failed to run check");
assert!(!output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("missing commit") || stdout.contains("error"));
}
#[test]
fn check_with_bookmarks_and_tags() {
let dir = init_repo();
commit_file(&dir, "a.txt", "hello\n", "initial");
arc_cmd()
.args(["mark", "add", "feature"])
.current_dir(dir.path())
.output()
.expect("failed");
arc_cmd()
.args(["tag", "add", "v1"])
.current_dir(dir.path())
.output()
.expect("failed");
let output = arc_cmd()
.arg("check")
.current_dir(dir.path())
.output()
.expect("failed to run check");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("repository ok"));
}
#[test]
fn check_empty_repo() {
let dir = init_repo();
let output = arc_cmd()
.arg("check")
.current_dir(dir.path())
.output()
.expect("failed to run check");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("repository ok"));
}