Skip to contents

This guide covers advanced usage patterns including batch certificate generation, duplicate detection with perceptual hashing, and canvas validation workflows.

Batch Certificate Generation

When certifying multiple artworks, share a database connection for efficiency:

library(artpixeltrace)
library(data.table)

# Get list of artworks needing certificates
cn <- artcore::..dbc()

artworks_to_certify <- artutils::dbArtQuery(
  "SELECT artist_uuid, art_uuid
   FROM app.artworks
   WHERE verified = TRUE AND cert_id IS NULL",
  cn = cn
)

# Generate certificates in batch
results <- lapply(seq_len(nrow(artworks_to_certify)), function(i) {
  row <- artworks_to_certify[i]

  tryCatch(
    {
      cert <- renderCertificate(
        artist = row$artist_uuid,
        artwork = row$art_uuid,
        saveDB = TRUE,
        saveCDN = TRUE,
        cn = cn
      )
      cert[, status := "success"]
    },
    error = function(e) {
      data.table(
        artist_uuid = row$artist_uuid,
        art_uuid = row$art_uuid,
        status = "failed",
        error = conditionMessage(e)
      )
    }
  )
})

artcore::..dbd(cn)

# Combine results
all_certs <- rbindlist(results, fill = TRUE)
all_certs[, .N, by = status]
#>     status  N
#> 1: success 47
#> 2:  failed  3

Duplicate Detection with Perceptual Hashing

Perceptual hashes enable finding visually similar images. This is useful for:

  • Detecting duplicate uploads
  • Finding unauthorized copies
  • Verifying artwork authenticity

Computing and Storing Hashes

# Compute hash for a new upload
new_image <- "path/to/uploaded/image.png"
hash <- image_phash(new_image)

# Store hash in database
artutils::dbArtQuery(
  "UPDATE app.artworks
   SET art_hash = $1, art_binary = $2
   WHERE art_uuid = $3",
  params = list(hash$art_hash, hash$art_binary, artwork_uuid)
)

Finding Similar Images

Use Hamming distance on the binary representation to find similar images:

# Helper function for Hamming distance
hamming_distance <- function(bin1, bin2) {
  # Convert strings to numeric vectors
  v1 <- as.numeric(strsplit(bin1, "")[[1]])
  v2 <- as.numeric(strsplit(bin2, "")[[1]])
  sum(v1 != v2)
}

# Compute hash for query image
query_hash <- image_phash("query_image.png")

# Get all stored hashes
stored_hashes <- artutils::dbArtQuery(
  "SELECT art_uuid, art_binary FROM app.artworks WHERE art_binary IS NOT NULL"
)

# Find similar images (distance < 10 is very similar)
stored_hashes[, distance := sapply(art_binary, function(b) {
  hamming_distance(query_hash$art_binary, b)
})]

similar <- stored_hashes[distance < 10]
similar[order(distance)]
#>                               art_uuid distance
#> 1: 99xxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx        0
#> 2: 99yyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy        3
#> 3: 99zzzzzz-zzzz-zzzz-zzzz-zzzzzzzzzzzz        7

Threshold Guidelines

Hamming Distance Interpretation
0 Identical or near-identical
1-5 Very similar (likely same artwork with minor edits)
6-10 Similar (possibly related or derivative)
11-20 Somewhat similar (may share visual elements)
20+ Different images

Canvas Validation Workflow

Before accepting artwork uploads, validate the Procreate canvas:

Step 1: Check Signature Exists

canvas_url <- artutils::pathArtCanvas(artist, artwork)

if (!isCanvasSigned(canvas_url)) {
  stop("Artwork must be signed in Procreate before submission")
}

Step 2: Extract Canvas Metadata

The canvas contains creation metadata that can be validated:

# Download canvas to temp location
canvas_path <- tempfile(fileext = ".procreate")
download.file(canvas_url, canvas_path)

# Extract Document.archive for metadata
tmpdir <- tempfile()
zip::unzip(canvas_path, exdir = tmpdir, files = "Document.archive")

# Parse metadata (requires plist parsing)
# Contains: brush strokes, layers, creation time, etc.

Step 3: Compare Hash Against Registered Artwork

# Get the display image
image_url <- artutils::pathArtImage(artist, artwork)

# Compute current hash
current_hash <- image_phash(image_url)

# Get stored hash from database
stored <- artutils::dbArtQuery(
  "SELECT art_hash FROM app.artworks WHERE art_uuid = $1",
  params = list(artwork)
)

if (current_hash$art_hash != stored$art_hash) {
  warning("Image hash mismatch - artwork may have been modified")
}

Custom Certificate Timestamps

By default, certificates use the current time. For backdating corrections or specific dates:

# Use specific timestamp
cert <- renderCertificate(
  artist = artist,
  artwork = artwork,
  new_utc = as.POSIXct("2023-06-15 14:30:00", tz = "UTC")
)

Keeping Intermediate Files

For debugging or custom processing, preserve intermediate files:

# Create persistent output directory
outdir <- fs::path("~/certificates", artwork)
fs::dir_create(outdir)

cert <- renderCertificate(
  artist = artist,
  artwork = artwork,
  outdir = outdir,
  clean = FALSE # Keep intermediate files
)

# Directory now contains:
# - CERT-000-000-123.pdf (final certificate)
# - CERT-000-000-123.jpeg (preview image)
# - CERT-000-000-123.tex (LaTeX source)
# - templ-000-000-123.png (template image)
# - frame-000-000-123.png (framed artwork)
# - sign-000-000-123.png (signature image)

Error Handling

Robust error handling for production pipelines:

process_artwork <- function(artist, artwork, cn) {
  tryCatch(
    {
      # Validate canvas
      canvas_url <- artutils::pathArtCanvas(artist, artwork)
      if (!isCanvasSigned(canvas_url)) {
        return(list(success = FALSE, error = "Canvas not signed"))
      }

      # Generate certificate
      cert <- renderCertificate(
        artist = artist,
        artwork = artwork,
        saveDB = TRUE,
        saveCDN = TRUE,
        cn = cn
      )

      list(success = TRUE, cert_id = cert$cert_id)
    },
    error = function(e) {
      list(success = FALSE, error = conditionMessage(e))
    }
  )
}

Performance Considerations

Image Processing

  • image_phash() loads images into memory - monitor RAM for large batches
  • Processing is CPU-bound; consider parallel processing for large datasets

Certificate Generation

  • LaTeX compilation is slow (~2-5 seconds per certificate)
  • CDN uploads add network latency
  • Share database connections in batch operations

Optimization Tips

# Process in batches of 50-100 to balance memory and efficiency
batch_size <- 50

# Use data.table for efficient list operations
results <- vector("list", nrow(artworks))

for (i in seq_len(nrow(artworks))) {
  results[[i]] <- process_artwork(
    artworks[i, artist_uuid],
    artworks[i, art_uuid],
    cn
  )

  # Progress logging
  if (i %% 10 == 0) {
    rdstools::log_inf(sprintf("Processed %d/%d", i, nrow(artworks)))
  }
}

Next Steps