Skip to contents

{stamp} makes heavy use of the package {digest} package, which is basically a generator of hashes of R objects using different types of algorithms.

fastest

Let’s use the Reporter-Partner trade data at commodity level for the USA between 2015 and 2020

library(tradestatistics)
# US HS4-level trade from 2010 to 2020
x <- ots_create_tidy_data(years = x1:x2,
                         reporters = "usa",
                         table = "yrpc")
#> Downloading data for the combination 2015, usa, all...
#> Downloading data for the combination 2016, usa, all...
#> Downloading data for the combination 2017, usa, all...
#> Downloading data for the combination 2018, usa, all...
#> Downloading data for the combination 2019, usa, all...
#> Downloading data for the combination 2020, usa, all...

it is a huge data dataset with 1828772 observations.

bench <- microbenchmark::microbenchmark(
  times = times,
  md5 = stamp_get(x, algo = "md5"),
  sha1 = stamp_get(x, algo = "sha1"),
  crc32 = stamp_get(x, algo = "crc32"),
  sha256 = stamp_get(x, algo = "sha256"),
  sha512 = stamp_get(x, algo = "sha512"),
  xxhash32 = stamp_get(x, algo = "xxhash32"),
  xxhash64 = stamp_get(x, algo = "xxhash64"),
  murmur32 = stamp_get(x, algo = "murmur32"),
  spookyhash = stamp_get(x, algo = "spookyhash"),
  blake3 = stamp_get(x, algo = "blake3")
)
if (requireNamespace("highcharter")) {
  hc_dt <- highcharter::data_to_boxplot(bench,
                                        time,
                                        expr,
                                        add_outliers = FALSE,
                                        name = "Time in milliseconds")

  highcharter::highchart() |>
  highcharter::hc_xAxis(type = "category") |>
  highcharter::hc_chart(inverted=TRUE) |>
  highcharter::hc_add_series_list(hc_dt)
  
} else {
  boxplot(bench, outline = FALSE)
}
#> Loading required namespace: highcharter
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
res <- as.data.table(bench)
med <- res[, .(median = median(time)), by = expr]
# minmed <- med[, .SD[which.min(median)]]
minmed <- med[which.min(median), as.character(expr)]

shortest

algos <- c("md5", "sha1", "crc32", "sha256", "sha512",
                      "xxhash32", "xxhash64", "murmur32", "spookyhash",
                      "blake3")

x <- 1:5

ln <- sapply(algos, \(.) {
  st <- stamp_get(x,.)
  nchar(st$stamps)
})

ln <- sort(ln)
ln 
#>      crc32   xxhash32   murmur32   xxhash64        md5 spookyhash       sha1 
#>          8          8          8         16         32         32         40 
#>     sha256     blake3     sha512 
#>         64         64        128

st <- ln[1]
st
#> crc32 
#>     8