Skip to contents
# ---- setup, include=FALSE ----------------------------------------------------

if (requireNamespace("pkgload", quietly = TRUE)) {
  pkgload::load_all(".")
} else {
  library(stamp)
}
##  Loading stamp

Let’s initialize a lightweight project and walk through the most common workflows: formats, saving/loading, sidecars, versions/lineage, primary-key helpers, and retention. The examples use temporary directories so you can run them locally without touching your real project.

1. Initialize a project

st_init() prepares a small internal state directory (by default .stamp/) to hold temporary files, logs, sidecars, and version snapshots. Use a temporary directory for vignette examples so saves and snapshots are isolated.

# Use a private temp dir so the vignette is reproducible locally
tdir <- fs::path_temp("stamp-vignette")
fs::dir_create(tdir)
st_init(tdir)
##  stamp initialized
##   root: /tmp/RtmpA8AdT4/stamp-vignette
##   state: /tmp/RtmpA8AdT4/stamp-vignette/.stamp
# Inspect created structure
fs::path(tdir, ".stamp") |>
  fs::dir_tree(recurse = TRUE, all = TRUE)
## /tmp/RtmpA8AdT4/stamp-vignette/.stamp
## ├── logs
## └── temp

Notes - Default state dir: .stamp/ (you can override via st_init(state_dir = "_stamp")).

2. Options (st_opts())

Global behavior is controlled via st_opts(). Typical options you will use:

  • meta_format: how sidecars are written. Allowed: "json", "qs2", or "both".
  • default_format: which format to use when none is inferred from a path.
  • versioning: controls whether saves create version snapshots ("content" vs "timestamp" vs `“off”).

Example:

# show defaults
st_opts(.get = TRUE)
## $force_on_code_change
## [1] TRUE
## 
## $retain_versions
## [1] Inf
## 
## $versioning
## [1] "content"
## 
## $meta_format
## [1] "json"
## 
## $usetz
## [1] FALSE
## 
## $timeformat
## [1] "%Y%m%d%H%M%S"
## 
## $code_hash
## [1] TRUE
## 
## $default_format
## [1] "qs2"
## 
## $verify_on_load
## [1] FALSE
## 
## $store_file_hash
## [1] FALSE
## 
## $verbose
## [1] TRUE
## 
## $timezone
## [1] "UTC"
## 
## $require_pk_on_load
## [1] FALSE
## 
## $warn_missing_pk_on_load
## [1] TRUE
# write both JSON and QS2 sidecars
st_opts(meta_format = "both")
##  stamp options updated
##   meta_format = "both"
st_opts("meta_format", .get = TRUE)
## [1] "both"

Use versioning to control when a version snapshot is recorded. The default "content" mode records a new version only when content/code changed; "timestamp" forces a version on every save (useful for audit trails).

3. Paths and format registry

st_path() wraps a path string and optionally carries an explicit format hint. Format inference also maps known extensions (via an internal registry).

p1 <- st_path("data/iris.qs2")
p2 <- st_path("data/mtcars.fst", format = "fst")
p1
## <st_path> data/iris.qs2 [format=qs2]
st_formats()  # built-in handlers: qs2, rds, csv, fst, json
## [1] "csv"     "fst"     "json"    "parquet" "qs"      "qs2"     "rds"

You can extend the registry with st_register_format() to add a new format (e.g. Parquet). The registry will also map file extensions if you provide them.

4. Save & load (atomic, with sidecar metadata)

Use st_save() and st_load() for robust writes. st_save() performs an atomic write (temp file then move), writes sidecar metadata, and — depending on versioning — records a version snapshot.

x <- data.frame(a = 1:3, b = letters[1:3])
outdir <- fs::path_temp("stamp-output")
fs::dir_create(outdir)

res <- st_save(x, fs::path(outdir, "example.qs2"), metadata = list(description = "toy"))
##  Saved [qs2] → /tmp/RtmpA8AdT4/stamp-output/example.qs2 @ version
##   721284d1c3c6e9cc
res$path
## /tmp/RtmpA8AdT4/stamp-output/example.qs2
# load back (format auto-detected)
y <- st_load(res$path)
## Warning: No primary key recorded for /tmp/RtmpA8AdT4/stamp-output/example.qs2.
##  You can add one with `st_add_pk()`.
##  Loaded [qs2] ←
## /tmp/RtmpA8AdT4/stamp-output/example.qs2
identical(x, y)
## [1] TRUE

st_save() accepts additional arguments useful for provenance:

  • code: a function/expression whose hash is recorded (via st_hash_code).
  • parents: optional list of parent descriptors (list(list(path=…, version_id=…), …)) to record provenance.
  • code_label: a short human label for the producing code.

4.1 Loading specific versions

The version argument in st_load() allows you to load historical versions of artifacts. This is useful for comparing changes over time or recovering from mistakes.

# Create multiple versions by modifying and saving
v_path <- fs::path(outdir, "versioned.qs2")

# Version 1
v1 <- data.frame(x = 1:3, y = c("a", "b", "c"))
st_save(v1, v_path, code_label = "initial")
##  Saved [qs2] → /tmp/RtmpA8AdT4/stamp-output/versioned.qs2 @ version
##   0498beb6e7e7ed9e
Sys.sleep(0.15)  # ensure distinct timestamps

# Version 2
v2 <- data.frame(x = 1:5, y = c("a", "b", "c", "d", "e"))
st_save(v2, v_path, code_label = "added rows")
##  Saved [qs2] → /tmp/RtmpA8AdT4/stamp-output/versioned.qs2 @ version
##   558ff723d2db4f42
Sys.sleep(0.15)

# Version 3
v3 <- data.frame(x = 1:5, y = c("a", "b", "c", "d", "e"), z = 10:14)
st_save(v3, v_path, code_label = "added column z")
##  Saved [qs2] → /tmp/RtmpA8AdT4/stamp-output/versioned.qs2 @ version
##   ee013c6c616ca8c8
# Check available versions
versions <- st_versions(v_path)
print(versions[, .(version_id, created_at, size_bytes)])
##          version_id                  created_at size_bytes
##              <char>                      <char>      <num>
## 1: ee013c6c616ca8c8 2025-12-22T11:01:22.388789Z        287
## 2: 558ff723d2db4f42 2025-12-22T11:01:22.166413Z        262
## 3: 0498beb6e7e7ed9e 2025-12-22T11:01:21.885867Z        261
# Load latest (default)
current <- st_load(v_path)
## Warning: No primary key recorded for /tmp/RtmpA8AdT4/stamp-output/versioned.qs2.
##  You can add one with `st_add_pk()`.
##  Loaded [qs2] ←
## /tmp/RtmpA8AdT4/stamp-output/versioned.qs2
nrow(current)  # 5 rows, 3 columns
## [1] 5
# Load previous version (version = -1)
previous <- st_load(v_path, version = -1)
##  Loaded ← /tmp/RtmpA8AdT4/stamp-output/versioned.qs2 @
## 558ff723d2db4f42 [qs2]
nrow(previous)  # 5 rows, 2 columns (before adding z)
## [1] 5
# Load two versions back (version = -2)
older <- st_load(v_path, version = -2)
##  Loaded ← /tmp/RtmpA8AdT4/stamp-output/versioned.qs2 @
## 0498beb6e7e7ed9e [qs2]
nrow(older)  # 3 rows, 2 columns (original)
## [1] 3
# Load specific version by ID
vid <- versions$version_id[1]  # oldest version
specific <- st_load(v_path, version = vid)
##  Loaded ← /tmp/RtmpA8AdT4/stamp-output/versioned.qs2 @
## ee013c6c616ca8c8 [qs2]
identical(specific, older)
## [1] FALSE

The version argument supports several modes:

  • NULL (default): loads the current artifact file
  • 0: same as NULL, loads latest version
  • Negative integers (-1, -2, etc.): load versions relative to latest
  • Character string: specific version ID (from st_versions())
  • "select", "pick", or "choose": interactive menu (in interactive sessions)
# Interactive menu to choose a version (only works in interactive R sessions)
# This will display a menu with formatted timestamps and file sizes
selected <- st_load(v_path, version = "select")

# The menu looks like:
# ℹ Select a version to load from versioned.qs2:
#   Latest version is [1]
# 
# Available versions:
# 
# 1: [1] 2025-11-26 10:30:15 (0.12 MB) - 20251126T103015Z
# 2: [2] 2025-11-26 10:30:10 (0.10 MB) - 20251126T103010Z
# 3: [3] 2025-11-26 10:30:05 (0.08 MB) - 20251126T103005Z
# 
# Selection: _

5. Sidecars (quick metadata)

Sidecars live in an stmeta/ sibling directory next to the artifact and contain metadata such as path, format, created_at (UTC), size_bytes, content_hash, code_hash, code_label, and parents (a quick view).

sc <- st_read_sidecar(res$path)
str(sc)
## List of 11
##  $ path        : chr "/tmp/RtmpA8AdT4/stamp-output/example.qs2"
##  $ format      : chr "qs2"
##  $ created_at  : chr "2025-12-22T11:01:21.669211Z"
##  $ size_bytes  : int 256
##  $ content_hash: chr "99235ac79dea7ab0"
##  $ code_hash   : NULL
##  $ file_hash   : NULL
##  $ code_label  : NULL
##  $ parents     : list()
##  $ attrs       : list()
##  $ description : chr "toy"

The sidecar is intended for quick inspection and for storing metadata even when a version snapshot may not be recorded (e.g., when versioning = "content" and nothing changed). For reproducible lineage and rebuilds, st_save() writes a snapshot under .stamp/versions/ that includes committed copies of the artifact and its parents.json.

6. Versions, lineage, and inspection

The package maintains a simple catalog of versions. Useful functions:

  • st_versions(path) — list recorded versions for an artifact.
  • st_latest(path) — get the latest version id.
  • st_load_version(path, version_id) — load a specific recorded version.
  • st_info(path) — quick inspection: returns current sidecar, catalog info, snapshot dir (if present), and parents (from snapshot or sidecar fallback).
  • st_lineage(path, depth=1) — walk immediate or recursive parents.

Example that demonstrates parents and lineage (parents passed to st_save):

# upstream artifact
in_path <- fs::path(outdir, "upstream.qs")
st_save(data.frame(id=1:3), in_path)
##  Saved [qs] → /tmp/RtmpA8AdT4/stamp-output/upstream.qs @ version
##   b7e6fd54b59f3a38
in_vid <- st_latest(in_path)

# derived artifact recording parent info
out_path <- fs::path(outdir, "derived.qs")
parents <- list(list(path = in_path, version_id = in_vid))
st_save(data.frame(id=1:3, v=10), out_path, parents = parents, code_label = "multiply")
##  Saved [qs] → /tmp/RtmpA8AdT4/stamp-output/derived.qs @ version
##   1fe557c0644ab0fb
st_info(out_path)$sidecar
## $path
## [1] "/tmp/RtmpA8AdT4/stamp-output/derived.qs"
## 
## $format
## [1] "qs"
## 
## $created_at
## [1] "2025-12-22T11:01:22.898703Z"
## 
## $size_bytes
## [1] 150
## 
## $content_hash
## [1] "a6f9da7f2b465601"
## 
## $code_hash
## NULL
## 
## $file_hash
## NULL
## 
## $code_label
## [1] "multiply"
## 
## $parents
##                                       path       version_id
## 1 /tmp/RtmpA8AdT4/stamp-output/upstream.qs b7e6fd54b59f3a38
## 
## $attrs
## list()
st_lineage(out_path, depth = 1)
##   level                              child_path    child_version
## 1     1 /tmp/RtmpA8AdT4/stamp-output/derived.qs 1fe557c0644ab0fb
##                                parent_path   parent_version
## 1 /tmp/RtmpA8AdT4/stamp-output/upstream.qs b7e6fd54b59f3a38

Notes on behavior - The sidecar always contains parents for quick inspection. However, in the default versioning = "content" mode a new committed snapshot (and its parents.json) will only be created when content or code changed. The vignette code above deliberately saves upstream and derived artifacts so a snapshot is recorded.

7. Primary-key helpers (optional)

You can record a primary-key (pk) for an artifact in its sidecar to make it easier to identify rows later. Helpers:

  • st_pk() — normalize/validate a pk spec.
  • st_add_pk(path, keys) — record a pk in the artifact sidecar (optionally validate against on-disk content).
  • st_inspect_pk(path) — read pk from sidecar.
  • st_with_pk(df, keys) and st_get_pk() — in-memory helpers.

Example:

st_add_pk(out_path, keys = c("id"))
##  stamp options updated
##   require_pk_on_load = "FALSE"
## Warning: No primary key recorded for /tmp/RtmpA8AdT4/stamp-output/derived.qs.
##  You can add one with `st_add_pk()`.
##  Loaded [qs] ← /tmp/RtmpA8AdT4/stamp-output/derived.qs
##  Recorded primary key for /tmp/RtmpA8AdT4/stamp-output/derived.qs --> id
##  stamp options updated
##   require_pk_on_load = "FALSE"
st_inspect_pk(out_path)
## [1] "id"
# load and filter by pk using st_filter
df <- st_load(out_path)
##  Loaded [qs] ←
## /tmp/RtmpA8AdT4/stamp-output/derived.qs
st_filter(df, list(id = 1))
##   id  v
## 1  1 10

8. Retention / pruning

To control disk usage, st_prune_versions() prunes older version snapshots according to a retention policy. The simplest call applies the default project policy; you can also pass policy or use dry_run = TRUE to preview.

# dry-run to preview deletions for this artifact
st_prune_versions(path = out_path, policy = 5, dry_run = TRUE)

# apply retention (non-dry)
st_prune_versions(path = out_path, policy = list(n = 5, days = 30), dry_run = FALSE)

9. Tips and conventions