Skip to contents
# ---- setup, include=FALSE ----------------------------------------------------

if (requireNamespace("pkgload", quietly = TRUE)) {
  pkgload::load_all(".")
} else {
  library(stamp)
}
##  Loading stamp

Let’s initialize a lightweight project and walk through the most common workflows: formats, saving/loading, sidecars, versions/lineage, primary-key helpers, and retention. The examples use temporary directories so you can run them locally without touching your real project.

1. Initialize a project

st_init() prepares a small internal state directory (by default .stamp/) to hold temporary files, logs, sidecars, and version snapshots. Use a temporary directory for vignette examples so saves and snapshots are isolated.

# Use a private temp dir so the vignette is reproducible locally
tdir <- fs::path_temp("stamp-vignette")
fs::dir_create(tdir)
st_init(tdir)
##  stamp initialized
##   alias: default
##   root: /tmp/RtmpuomfRe/stamp-vignette
##   state: /tmp/RtmpuomfRe/stamp-vignette/.stamp
# Inspect created structure
fs::path(tdir, ".stamp") |>
  fs::dir_tree(recurse = TRUE, all = TRUE)
## /tmp/RtmpuomfRe/stamp-vignette/.stamp
## ├── logs
## └── temp

Notes - Default state dir: .stamp/ (you can override via st_init(state_dir = "_stamp")).

2. Options (st_opts())

Global behavior is controlled via st_opts(). Typical options you will use:

  • meta_format: how sidecars are written. Allowed: "json", "qs2", or "both".
  • default_format: which format to use when none is inferred from a path.
  • versioning: controls whether saves create version snapshots ("content" vs "timestamp" vs `“off”).

Example:

# show defaults
st_opts(.get = TRUE)
## $force_on_code_change
## [1] TRUE
## 
## $retain_versions
## [1] Inf
## 
## $versioning
## [1] "content"
## 
## $meta_format
## [1] "json"
## 
## $usetz
## [1] FALSE
## 
## $timeformat
## [1] "%Y%m%d%H%M%S"
## 
## $code_hash
## [1] TRUE
## 
## $default_format
## [1] "qs2"
## 
## $verify_on_load
## [1] FALSE
## 
## $data_folder
## [1] ".st_data"
## 
## $store_file_hash
## [1] FALSE
## 
## $verbose
## [1] TRUE
## 
## $timezone
## [1] "UTC"
## 
## $require_pk_on_load
## [1] FALSE
## 
## $warn_missing_pk_on_load
## [1] TRUE
# write both JSON and QS2 sidecars
st_opts(meta_format = "both")
##  stamp options updated
##   meta_format = "both"
st_opts("meta_format", .get = TRUE)
## [1] "both"

Use versioning to control when a version snapshot is recorded. The default "content" mode records a new version only when content/code changed; "timestamp" forces a version on every save (useful for audit trails).

3. Paths and format registry

st_path() wraps a path string and optionally carries an explicit format hint. Format inference also maps known extensions (via an internal registry).

p1 <- st_path("data/iris.qs2")
p2 <- st_path("data/mtcars.fst", format = "fst")
p1
## <st_path> data/iris.qs2 [format=qs2]
st_formats()  # built-in handlers: qs2, rds, csv, fst, json
## [1] "csv"     "fst"     "json"    "parquet" "qs2"     "rds"

You can extend the registry with st_register_format() to add a new format (e.g. Parquet). The registry will also map file extensions if you provide them.

4. Save & load (atomic, with sidecar metadata)

Use st_save() and st_load() for robust writes. st_save() performs an atomic write (temp file then move), writes sidecar metadata, and — depending on versioning — records a version snapshot.

x <- data.frame(a = 1:3, b = letters[1:3])
outdir <- fs::path(tdir, "stamp-output")
fs::dir_create(outdir)

res <- st_save(x, fs::path(outdir, "example.qs2"), metadata = list(description = "toy"))
##  Saved [qs2] → /tmp/RtmpuomfRe/stamp-vignette/stamp-output/example.qs2 @
##   version f237f26e6fdaff82
res$path
## [1] "/tmp/RtmpuomfRe/stamp-vignette/stamp-output/example.qs2"
# load back (format auto-detected)
y <- st_load(res$path)
## Warning: No primary key recorded for
## /tmp/RtmpuomfRe/stamp-vignette/stamp-output/example.qs2.
##  You can add one with `st_add_pk()`.
##  Loaded [qs2] ←
## /tmp/RtmpuomfRe/stamp-vignette/stamp-output/example.qs2
identical(x, y)
## [1] TRUE

st_save() accepts additional arguments useful for provenance:

  • code: a function/expression whose hash is recorded (via st_hash_code).
  • parents: optional list of parent descriptors (list(list(path=…, version_id=…), …)) to record provenance.
  • code_label: a short human label for the producing code.

4.1 Loading specific versions

The version argument in st_load() allows you to load historical versions of artifacts. This is useful for comparing changes over time or recovering from mistakes.

# Ensure versioning is enabled for this example
st_opts(versioning = "timestamp")  # Force version on every save
##  stamp options updated
##   versioning = "timestamp"
# Create multiple versions by modifying and saving
v_path <- fs::path(outdir, "versioned.qs2")

# Version 1
v1 <- data.frame(x = 1:3, y = c("a", "b", "c"))
st_save(v1, v_path, code_label = "initial")
##  Saved [qs2] → /tmp/RtmpuomfRe/stamp-vignette/stamp-output/versioned.qs2 @
##   version fdb85ccf0014c20f
Sys.sleep(1.1)  # ensure distinct timestamps on all platforms

# Version 2
v2 <- data.frame(x = 1:5, y = c("a", "b", "c", "d", "e"))
st_save(v2, v_path, code_label = "added rows")
##  Saved [qs2] → /tmp/RtmpuomfRe/stamp-vignette/stamp-output/versioned.qs2 @
##   version 2988c8d66a5ef9b1
Sys.sleep(1.1)

# Version 3
v3 <- data.frame(x = 1:5, y = c("a", "b", "c", "d", "e"), z = 10:14)
st_save(v3, v_path, code_label = "added column z")
##  Saved [qs2] → /tmp/RtmpuomfRe/stamp-vignette/stamp-output/versioned.qs2 @
##   version f2b8db5546cdc965
# Check available versions (explicit alias auto-detect)
versions <- st_versions(v_path, alias = NULL)
if (nrow(versions) == 0) {
  cat("Warning: No versions were created. Skipping version loading examples.\n")
} else {
  print(versions[, .(version_id, created_at, size_bytes)])
  
  # Load latest (default)
  current <- st_load(v_path, alias = NULL)
  cat(sprintf("Current: %d rows, %d columns\n", nrow(current), ncol(current)))
  
  # Load previous version (version = -1) when available; skip if snapshot missing
  if (nrow(versions) >= 2) {
    prev_ok <- TRUE
    previous <- tryCatch(
      st_load(v_path, version = -1, alias = NULL),
      error = function(e) { prev_ok <<- FALSE; NULL }
    )
    if (isTRUE(prev_ok)) {
      cat(sprintf("Previous (v-1): %d rows, %d columns\n", nrow(previous), ncol(previous)))
    } else {
      cat("Previous (v-1): snapshot not available; skipping.\n")
    }
  }
}
##          version_id                  created_at size_bytes
##              <char>                      <char>      <num>
## 1: f2b8db5546cdc965 2026-01-28T15:50:34.329705Z        287
## 2: 2988c8d66a5ef9b1 2026-01-28T15:50:33.181683Z        262
## 3: fdb85ccf0014c20f 2026-01-28T15:50:31.965345Z        261
## Warning: No primary key recorded for
## /tmp/RtmpuomfRe/stamp-vignette/stamp-output/versioned.qs2.
##  You can add one with `st_add_pk()`.
##  Loaded [qs2] ←
## /tmp/RtmpuomfRe/stamp-vignette/stamp-output/versioned.qs2
## Current: 5 rows, 3 columns
##  Loaded ← stamp-output/versioned.qs2 @
## 2988c8d66a5ef9b1 [qs2]
## Previous (v-1): 5 rows, 2 columns
  # Load two versions back (version = -2) when available, else oldest by ID
  older_ok <- TRUE
  if (nrow(versions) >= 3) {
    older <- tryCatch(
      st_load(v_path, version = -2, alias = NULL),
      error = function(e) { older_ok <<- FALSE; NULL }
    )
  } else if (nrow(versions) >= 1) {
    oldest_id <- versions$version_id[nrow(versions)]
    older <- tryCatch(
      st_load(v_path, version = oldest_id, alias = NULL),
      error = function(e) { older_ok <<- FALSE; NULL }
    )
  } else {
    older_ok <- FALSE
    older <- NULL
  }
##  Loaded ← stamp-output/versioned.qs2 @
## fdb85ccf0014c20f [qs2]
  if (isTRUE(older_ok)) {
    cat(sprintf("Older: %d rows, %d columns\n", nrow(older), ncol(older)))
  } else {
    cat("Older: snapshot not available; skipping.\n")
  }
## Older: 3 rows, 2 columns
  # Load specific version by ID (oldest)
  vid <- versions$version_id[nrow(versions)]
  spec_ok <- TRUE
  specific <- tryCatch(
    st_load(v_path, version = vid, alias = NULL),
    error = function(e) { spec_ok <<- FALSE; NULL }
  )
##  Loaded ← stamp-output/versioned.qs2 @
## fdb85ccf0014c20f [qs2]
  if (isTRUE(spec_ok) && isTRUE(older_ok)) {
    cat(sprintf("Specific version matches older: %s\n", identical(specific, older))) 
}
## Specific version matches older: TRUE
# Reset to default versioning
st_opts(versioning = "content")
##  stamp options updated
##   versioning = "content"

The version argument supports several modes:

  • NULL (default): loads the current artifact file
  • 0: same as NULL, loads latest version
  • Negative integers (-1, -2, etc.): load versions relative to latest
  • Character string: specific version ID (from st_versions())
  • "select", "pick", or "choose": interactive menu (in interactive sessions)
# Interactive menu to choose a version (only works in interactive R sessions)
# This will display a menu with formatted timestamps and file sizes
selected <- st_load(v_path, version = "select")

# The menu looks like:
# ℹ Select a version to load from versioned.qs2:
#   Latest version is [1]
# 
# Available versions:
# 
# 1: [1] 2025-11-26 10:30:15 (0.12 MB) - 20251126T103015Z
# 2: [2] 2025-11-26 10:30:10 (0.10 MB) - 20251126T103010Z
# 3: [3] 2025-11-26 10:30:05 (0.08 MB) - 20251126T103005Z
# 
# Selection: _

5. Sidecars (quick metadata)

Sidecars live in an stmeta/ sibling directory next to the artifact and contain metadata such as path, format, created_at (UTC), size_bytes, content_hash, code_hash, code_label, and parents (a quick view).

sc <- st_read_sidecar(res$path)
str(sc)
## List of 11
##  $ path        : chr "/tmp/RtmpuomfRe/stamp-vignette/stamp-output/example.qs2"
##  $ format      : chr "qs2"
##  $ created_at  : chr "2026-01-28T15:50:31.660943Z"
##  $ size_bytes  : int 256
##  $ content_hash: chr "99235ac79dea7ab0"
##  $ code_hash   : NULL
##  $ file_hash   : NULL
##  $ code_label  : NULL
##  $ parents     : list()
##  $ attrs       : list()
##  $ description : chr "toy"

The sidecar is intended for quick inspection and for storing metadata even when a version snapshot may not be recorded (e.g., when versioning = "content" and nothing changed). For reproducible lineage and rebuilds, st_save() writes a snapshot under .stamp/versions/ that includes committed copies of the artifact and its parents.json.

6. Versions, lineage, and inspection

The package maintains a simple catalog of versions. Useful functions:

  • st_versions(path) — list recorded versions for an artifact.
  • st_latest(path) — get the latest version id.
  • st_load_version(path, version_id) — load a specific recorded version.
  • st_info(path) — quick inspection: returns current sidecar, catalog info, snapshot dir (if present), and parents (from snapshot or sidecar fallback).
  • st_lineage(path, depth=1) — walk immediate or recursive parents.

Example that demonstrates parents and lineage (parents passed to st_save):

# upstream artifact
in_path <- fs::path(outdir, "upstream.qs")
st_save(data.frame(id=1:3), in_path)
##  Saved [qs2] → /tmp/RtmpuomfRe/stamp-vignette/stamp-output/upstream.qs @
##   version 110bd70835013513
in_vid <- st_latest(in_path)

# derived artifact recording parent info
out_path <- fs::path(outdir, "derived.qs")
parents <- list(list(path = in_path, version_id = in_vid))
st_save(data.frame(id=1:3, v=10), out_path, parents = parents, code_label = "multiply")
##  Saved [qs2] → /tmp/RtmpuomfRe/stamp-vignette/stamp-output/derived.qs @
##   version da659559a434d2c9
st_info(out_path)$sidecar
## $path
## [1] "/tmp/RtmpuomfRe/stamp-vignette/stamp-output/derived.qs"
## 
## $format
## [1] "qs2"
## 
## $created_at
## [1] "2026-01-28T15:50:34.848972Z"
## 
## $size_bytes
## [1] 256
## 
## $content_hash
## [1] "a6f9da7f2b465601"
## 
## $code_hash
## NULL
## 
## $file_hash
## NULL
## 
## $code_label
## [1] "multiply"
## 
## $parents
##                                                      path       version_id
## 1 /tmp/RtmpuomfRe/stamp-vignette/stamp-output/upstream.qs 110bd70835013513
## 
## $attrs
## list()
st_lineage(out_path, depth = 1)
##   level                                             child_path    child_version
## 1     1 /tmp/RtmpuomfRe/stamp-vignette/stamp-output/derived.qs da659559a434d2c9
##                                               parent_path   parent_version
## 1 /tmp/RtmpuomfRe/stamp-vignette/stamp-output/upstream.qs 110bd70835013513

Notes on behavior - The sidecar always contains parents for quick inspection. However, in the default versioning = "content" mode a new committed snapshot (and its parents.json) will only be created when content or code changed. The vignette code above deliberately saves upstream and derived artifacts so a snapshot is recorded.

7. Primary-key helpers (optional)

You can record a primary-key (pk) for an artifact in its sidecar to make it easier to identify rows later. Helpers:

  • st_pk() — normalize/validate a pk spec.
  • st_add_pk(path, keys) — record a pk in the artifact sidecar (optionally validate against on-disk content).
  • st_inspect_pk(path) — read pk from sidecar.
  • st_with_pk(df, keys) and st_get_pk() — in-memory helpers.

Example:

st_add_pk(out_path, keys = c("id"))
##  stamp options updated
##   require_pk_on_load = "FALSE"
## Warning: No primary key recorded for
## /tmp/RtmpuomfRe/stamp-vignette/stamp-output/derived.qs.
##  You can add one with `st_add_pk()`.
##  Loaded [qs2] ← /tmp/RtmpuomfRe/stamp-vignette/stamp-output/derived.qs
##  Recorded primary key for
##   /tmp/RtmpuomfRe/stamp-vignette/stamp-output/derived.qs --> id
##  stamp options updated
##   require_pk_on_load = "FALSE"
st_inspect_pk(out_path)
## [1] "id"
# load and filter by pk using st_filter
df <- st_load(out_path)
##  Loaded [qs2] ←
## /tmp/RtmpuomfRe/stamp-vignette/stamp-output/derived.qs
st_filter(df, list(id = 1))
##   id  v
## 1  1 10

8. Retention / pruning

To control disk usage, st_prune_versions() prunes older version snapshots according to a retention policy. The simplest call applies the default project policy; you can also pass policy or use dry_run = TRUE to preview.

# dry-run to preview deletions for this artifact
st_prune_versions(path = out_path, policy = 5, dry_run = TRUE)

# apply retention (non-dry)
st_prune_versions(path = out_path, policy = list(n = 5, days = 30), dry_run = FALSE)

9. Tips and conventions