build_portfolio_comparison <- function(artist_slugs) {
# Phase 1: Batch lookup and validation
cn <- artcore::dbc()
on.exit(artcore::dbd(cn))
# Resolve all slugs in parallel (single connection)
artist_records <- lapply(artist_slugs, function(slug) {
get_artist_by_slug(slug, cn = cn)
})
# Filter out NULLs (artists not found)
valid_artists <- Filter(Negate(is.null), artist_records)
if (length(valid_artists) == 0) {
return(list(error = "no_valid_artists", attempted = artist_slugs))
}
artist_uuids <- sapply(valid_artists, function(x) x$artist_uuid)
# Phase 2: Gather statistics for all artists
stats_list <- lapply(artist_uuids, function(uuid) {
stats <- get_artist_stats(uuid, cn = cn)
stats$artist_uuid <- uuid
stats
})
# Convert to data.table for analysis
stats_dt <- data.table::rbindlist(stats_list, fill = TRUE)
# Phase 3: Calculate comparison metrics
stats_dt[, `:=`(
# Productivity metrics
hours_per_artwork = total_hours / pmax(artworks, 1),
strokes_per_hour = total_brushes / pmax(total_hours, 1),
# Efficiency score (artworks per collection)
collection_density = artworks / pmax(collections, 1),
# Experience (days since joining)
days_active = as.numeric(Sys.Date() - as.Date(member_since))
)]
# Phase 4: Rank artists across dimensions
ranking_cols <- c("artworks", "total_hours", "total_brushes",
"hours_per_artwork", "strokes_per_hour", "collection_density")
for (col in ranking_cols) {
rank_col <- paste0(col, "_rank")
stats_dt[, (rank_col) := frankv(get(col), order = -1, na.last = "keep")]
}
# Phase 5: Enrich with profile data
for (i in seq_along(valid_artists)) {
uuid <- artist_uuids[i]
profile <- valid_artists[[i]]
stats_dt[artist_uuid == uuid, `:=`(
artist_name = profile$artist_name,
slug = profile$slug,
location = paste(profile$city, profile$state, sep = ", "),
avatar_url = path_artist_thumb(uuid)
)]
}
# Phase 6: Generate insights
insights <- list(
most_prolific = stats_dt[which.max(artworks)]$artist_name,
most_dedicated = stats_dt[which.max(total_hours)]$artist_name,
most_efficient = stats_dt[which.min(hours_per_artwork)]$artist_name,
fastest_worker = stats_dt[which.max(strokes_per_hour)]$artist_name,
avg_portfolio_size = mean(stats_dt$artworks, na.rm = TRUE),
total_artworks_compared = sum(stats_dt$artworks, na.rm = TRUE)
)
list(
artists = stats_dt,
insights = insights,
comparison_date = Sys.Date()
)
}
# Use it
comparison <- build_portfolio_comparison(c(
"bobby-fatemi",
"jane-smith",
"artist-three"
))
if (is.null(comparison$error)) {
cat("\n=== PORTFOLIO COMPARISON ===\n")
cat("Most prolific:", comparison$insights$most_prolific, "\n")
cat("Most dedicated:", comparison$insights$most_dedicated, "\n")
cat("Most efficient:", comparison$insights$most_efficient, "\n")
cat("Total artworks:", comparison$insights$total_artworks_compared, "\n")
# View rankings
print(comparison$artists[, .(
artist_name,
artworks,
artworks_rank,
total_hours,
hours_per_artwork
)])
}Complex Multi-Step Workflows
This vignette covers advanced scenarios that combine multiple artutils functions to solve real production challenges. These workflows demonstrate patterns you’ll encounter in complex applications.
Workflow 1: Multi-Artist Portfolio Comparison
Build a comparative dashboard showing multiple artists side-by-side. This is useful for gallery curation, NFT collection management, or artist discovery features.
The Challenge
We need to: 1. Load data for multiple artists efficiently 2. Normalize metrics for fair comparison 3. Rank artists across different dimensions 4. Handle missing or incomplete data gracefully
Implementation
Key Techniques
- Batch processing: Single connection for all queries
-
Null handling:
Filter(Negate(is.null), ...)removes invalid artists -
Ranking:
frankv()handles ties and NA values correctly - Enrichment pattern: Merge statistical data with profile data
-
Defensive calculations:
pmax(x, 1)prevents division by zero
Workflow 2: Building a Gallery with Smart Pagination
Implement a paginated gallery with filtering, sorting, and visibility rules. This is the foundation for any artwork browsing interface.
The Challenge
We need to: 1. Support dynamic filters (style, category, artist) 2. Respect collection visibility settings 3. Paginate efficiently without loading all data 4. Sort by multiple criteria 5. Handle edge cases (empty results, last page)
Implementation
build_paginated_gallery <- function(
artist_uuid = NULL,
style_tags = NULL,
category = NULL,
sort_by = "created_utc",
sort_order = "DESC",
page = 1,
per_page = 20,
include_hidden = FALSE
) {
cn <- artcore::dbc()
on.exit(artcore::dbd(cn))
# Phase 1: Build dynamic query
where_clauses <- c("1=1") # Always true base condition
params <- list()
if (!is.null(artist_uuid)) {
artcore::validate_uuid(artist_uuid, "artist")
where_clauses <- c(where_clauses, "ai.artist_uuid = ?")
params <- c(params, artist_uuid)
}
if (!is.null(category)) {
where_clauses <- c(where_clauses, "ap.category = ?")
params <- c(params, category)
}
if (!is.null(style_tags) && length(style_tags) > 0) {
# Match any of the provided tags
tag_placeholders <- paste(rep("?", length(style_tags)), collapse = ", ")
where_clauses <- c(where_clauses,
paste0("ast.tag IN (", tag_placeholders, ")"))
params <- c(params, as.list(style_tags))
}
where_sql <- paste(where_clauses, collapse = " AND ")
# Phase 2: Get total count (for pagination metadata)
count_qry <- DBI::sqlInterpolate(cn, paste0("
SELECT COUNT(DISTINCT ai.art_uuid) as total
FROM app.artwork_index ai
LEFT JOIN app.artwork_profiles ap ON ai.art_uuid = ap.art_uuid
LEFT JOIN app.artwork_styles ast ON ai.art_uuid = ast.art_uuid
WHERE ", where_sql
), .dots = params)
total_count <- db_art_get(count_qry, cn = cn)$total[1]
# Phase 3: Calculate pagination
offset <- (page - 1) * per_page
total_pages <- ceiling(total_count / per_page)
if (page > total_pages && total_count > 0) {
# Requested page doesn't exist - return last page
page <- total_pages
offset <- (page - 1) * per_page
}
# Phase 4: Fetch page of results
data_qry <- DBI::sqlInterpolate(cn, paste0("
SELECT DISTINCT
ai.art_uuid,
ai.artist_uuid,
ai.art_name,
ai.art_title,
ai.collection_uuid,
ai.created_utc,
idx.artist_name,
s.brush_strokes,
s.drawing_hours,
s.n_unique_colors,
ap.category,
ci.collection_name
FROM app.artwork_index ai
JOIN app.artist_index idx ON ai.artist_uuid = idx.artist_uuid
JOIN app.collection_index ci ON ai.collection_uuid = ci.collection_uuid
LEFT JOIN app.artwork_profiles ap ON ai.art_uuid = ap.art_uuid
LEFT JOIN app.artwork_styles ast ON ai.art_uuid = ast.art_uuid
LEFT JOIN app.artwork_stats s ON ai.art_uuid = s.art_uuid
WHERE ", where_sql, "
ORDER BY ai.", sort_by, " ", sort_order, "
LIMIT ", per_page, " OFFSET ", offset
), .dots = params)
results <- db_art_get(data_qry, cn = cn)
# Phase 5: Apply visibility filtering if needed
if (!include_hidden && nrow(results) > 0) {
# Get visibility for all collections in this page
collection_uuids <- unique(results$collection_uuid)
artist_uuids <- unique(results$artist_uuid)
visibility <- get_collect_visibility(
artists = artist_uuids,
collects = collection_uuids,
cn = cn
)
# Merge and filter
results <- merge(results, visibility,
by = "collection_uuid",
all.x = TRUE)
# Default to visible if no settings (shouldn't happen)
results[is.na(is_visible), is_visible := TRUE]
# Filter out hidden collections
results <- results[is_visible == TRUE]
# Remove visibility column (internal detail)
results[, is_visible := NULL]
}
# Phase 6: Enrich with CDN paths
if (nrow(results) > 0) {
results[, thumb_url := path_artwork_thumb(artist_uuid, art_uuid),
by = .(artist_uuid, art_uuid)]
results[, main_url := path_art_main_image(artist_uuid, art_uuid),
by = .(artist_uuid, art_uuid)]
}
# Phase 7: Build response with metadata
list(
items = results,
pagination = list(
page = page,
per_page = per_page,
total_items = total_count,
total_pages = total_pages,
has_prev = page > 1,
has_next = page < total_pages,
showing_from = offset + 1,
showing_to = min(offset + nrow(results), total_count)
),
filters_applied = list(
artist = artist_uuid,
style_tags = style_tags,
category = category,
sort_by = sort_by,
sort_order = sort_order
)
)
}
# Use it
gallery <- build_paginated_gallery(
style_tags = c("abstract", "landscape"),
category = "painting",
sort_by = "created_utc",
sort_order = "DESC",
page = 1,
per_page = 20
)
cat("Showing", gallery$pagination$showing_from, "-",
gallery$pagination$showing_to, "of",
gallery$pagination$total_items, "artworks\n")
cat("Page", gallery$pagination$page, "of",
gallery$pagination$total_pages, "\n")Key Techniques
- Dynamic query building: Conditionally add WHERE clauses
- SQL interpolation: Use parameterized queries to prevent SQL injection
- Two-phase pagination: Count total, then fetch page
- Visibility integration: Batch check collections
- Defensive pagination: Handle invalid page numbers gracefully
- Rich metadata: Return everything needed to render pagination UI
Workflow 3: Artwork Upload with Full Validation
Handle the complete artwork upload workflow with validation, error recovery, and rollback.
The Challenge
We need to: 1. Validate all data before insertion 2. Use transactions for atomicity 3. Handle partial failures gracefully 4. Update aggregate statistics 5. Provide detailed error reporting
Implementation
upload_artwork_safely <- function(
artist_uuid,
artwork_uuid,
collection_uuid,
artwork_data # List containing all prepared data.tables
) {
# Phase 1: Pre-flight validation
validation_errors <- list()
# Validate UUIDs
tryCatch({
artcore::validate_uuid(artist_uuid, "artist")
artcore::validate_uuid(artwork_uuid, "artwork")
artcore::validate_uuid(collection_uuid, "collection")
}, error = function(e) {
validation_errors <<- c(validation_errors,
list(uuid_validation = e$message))
})
# Validate required data tables
required_tables <- c(
"artwork_index",
"artwork_stats",
"artwork_meta",
"artwork_frame_analytics"
)
for (table in required_tables) {
if (!table %in% names(artwork_data) || nrow(artwork_data[[table]]) == 0) {
validation_errors[[table]] <- paste("Missing or empty:", table)
}
}
# Validate artwork_index structure
if ("artwork_index" %in% names(artwork_data)) {
idx <- artwork_data$artwork_index
required_cols <- c("artist_uuid", "art_uuid", "collection_uuid",
"art_name", "art_title")
missing_cols <- setdiff(required_cols, names(idx))
if (length(missing_cols) > 0) {
validation_errors$artwork_index <- paste("Missing columns:",
paste(missing_cols, collapse = ", "))
}
}
# If validation failed, return errors immediately
if (length(validation_errors) > 0) {
return(list(
success = FALSE,
phase = "validation",
errors = validation_errors
))
}
# Phase 2: Verify collection exists and is owned by artist
cn <- dbc()
on.exit(dbd(cn))
collection_check <- db_art_get(
stringr::str_glue("
SELECT artist_uuid, collection_name
FROM app.collection_index
WHERE collection_uuid = '{collection_uuid}'
"),
cn = cn
)
if (nrow(collection_check) == 0) {
return(list(
success = FALSE,
phase = "collection_verification",
errors = list(collection = "Collection does not exist")
))
}
if (collection_check$artist_uuid[1] != artist_uuid) {
return(list(
success = FALSE,
phase = "collection_verification",
errors = list(ownership = "Collection belongs to different artist")
))
}
# Phase 3: Attempt insertion with transaction
insertion_result <- tryCatch({
add_artwork(
artist = artist_uuid,
artwork = artwork_uuid,
artwork_index = artwork_data$artwork_index,
artwork_stats = artwork_data$artwork_stats,
artwork_meta = artwork_data$artwork_meta,
artwork_frame_analytics = artwork_data$artwork_frame_analytics,
artwork_colors = artwork_data$artwork_colors,
artwork_profiles = artwork_data$artwork_profiles,
artwork_hash = artwork_data$artwork_hash,
artwork_paths = artwork_data$artwork_paths,
artwork_styles = artwork_data$artwork_styles,
global_styles = artwork_data$global_styles,
cn = cn
)
list(success = TRUE)
}, error = function(e) {
# Transaction rolled back automatically
list(success = FALSE, error = e$message)
})
if (!insertion_result$success) {
return(list(
success = FALSE,
phase = "database_insertion",
errors = list(insertion = insertion_result$error)
))
}
# Phase 4: Update aggregate statistics
stats_result <- tryCatch({
update_artist_stats(artist_uuid, cn = cn)
list(success = TRUE)
}, error = function(e) {
# Artwork was inserted but stats update failed
# This is recoverable - artwork exists, stats can be recalculated later
list(success = FALSE, error = e$message)
})
# Phase 5: Optionally recalculate benchmarks
benchmark_result <- tryCatch({
update_artist_benchmarks(artist_uuid, cn = cn)
list(success = TRUE)
}, error = function(e) {
# Benchmarks failed but artwork is safely stored
list(success = FALSE, error = e$message)
})
# Phase 6: Build success response with warnings
response <- list(
success = TRUE,
artwork_uuid = artwork_uuid,
artist_uuid = artist_uuid,
collection = collection_check$collection_name[1]
)
warnings <- list()
if (!stats_result$success) {
warnings$stats <- paste("Stats update failed:", stats_result$error)
}
if (!benchmark_result$success) {
warnings$benchmarks <- paste("Benchmark update failed:", benchmark_result$error)
}
if (length(warnings) > 0) {
response$warnings <- warnings
response$recovery_instructions <- paste(
"Artwork uploaded successfully but post-processing failed.",
"Run update_artist_stats() and update_artist_benchmarks() manually."
)
}
response
}
# Use it
result <- upload_artwork_safely(
artist_uuid = "746b8207-72f5-4ab6-8d19-a91d03daec3d",
artwork_uuid = "99a61148-1d3b-4340-8cf6-92ad26046b0f",
collection_uuid = "77102117-5e8d-4c3e-8e22-f48e91a4e7c1",
artwork_data = prepared_data # From artpipelines
)
if (result$success) {
cat("SUCCESS: Artwork uploaded\n")
if (!is.null(result$warnings)) {
cat("WARNINGS:\n")
for (w in names(result$warnings)) {
cat(" -", result$warnings[[w]], "\n")
}
}
} else {
cat("FAILED at phase:", result$phase, "\n")
cat("Errors:\n")
for (e in names(result$errors)) {
cat(" -", e, ":", result$errors[[e]], "\n")
}
}Key Techniques
- Multi-phase validation: Check everything before touching database
-
Transactional safety:
add_artwork()uses transaction internally - Graceful degradation: Artwork succeeds even if stats/benchmarks fail
- Detailed error reporting: Phase tracking helps debugging
- Recovery instructions: Tell user how to fix partial failures
Workflow 4: Performance Optimization with Caching
Implement intelligent caching for get_appdata() in a Shiny application.
The Challenge
get_appdata() is expensive (multiple joins, benchmark calculations). We need to: 1. Cache results to avoid repeated queries 2. Invalidate cache when data changes 3. Share cache across reactive contexts 4. Handle cache misses gracefully
Implementation
# In Shiny server function
server <- function(input, output, session) {
# Reactive values for state management
rv <- reactiveValues(
appdata_cache = list(), # UUID -> appdata mapping
cache_timestamps = list(), # UUID -> timestamp
cache_ttl = 300 # Time-to-live: 5 minutes
)
# Helper: Get cache key
make_cache_key <- function(artist, artwork) {
paste(artist, artwork, sep = ":")
}
# Helper: Check if cache is fresh
is_cache_fresh <- function(cache_key) {
if (!cache_key %in% names(rv$cache_timestamps)) {
return(FALSE)
}
age <- as.numeric(Sys.time() - rv$cache_timestamps[[cache_key]])
age < rv$cache_ttl
}
# Smart getter with caching
get_appdata_cached <- function(artist, artwork) {
cache_key <- make_cache_key(artist, artwork)
# Check cache
if (cache_key %in% names(rv$appdata_cache) &&
is_cache_fresh(cache_key)) {
cat("[CACHE HIT]", cache_key, "\n")
return(rv$appdata_cache[[cache_key]])
}
# Cache miss - fetch from database
cat("[CACHE MISS]", cache_key, "- fetching...\n")
appdata <- get_appdata(artist, artwork)
# Store in cache
rv$appdata_cache[[cache_key]] <- appdata
rv$cache_timestamps[[cache_key]] <- Sys.time()
appdata
}
# Invalidate cache on modification
invalidate_cache <- function(artist = NULL, artwork = NULL) {
if (!is.null(artist) && !is.null(artwork)) {
# Invalidate specific artwork
cache_key <- make_cache_key(artist, artwork)
rv$appdata_cache[[cache_key]] <- NULL
rv$cache_timestamps[[cache_key]] <- NULL
cat("[CACHE INVALIDATED]", cache_key, "\n")
} else if (!is.null(artist)) {
# Invalidate all artworks for artist
pattern <- paste0("^", artist, ":")
keys_to_remove <- grep(pattern, names(rv$appdata_cache), value = TRUE)
for (key in keys_to_remove) {
rv$appdata_cache[[key]] <- NULL
rv$cache_timestamps[[key]] <- NULL
}
cat("[CACHE INVALIDATED] Artist:", artist,
"- removed", length(keys_to_remove), "entries\n")
} else {
# Clear entire cache
rv$appdata_cache <- list()
rv$cache_timestamps <- list()
cat("[CACHE CLEARED]\n")
}
}
# Usage in reactive context
artwork_data <- reactive({
req(rv$current_artist, rv$current_artwork)
get_appdata_cached(rv$current_artist, rv$current_artwork)
})
# Invalidate after upload
observeEvent(input$upload_complete, {
invalidate_cache(
artist = rv$current_artist,
artwork = input$uploaded_artwork_uuid
)
})
# Periodic cache cleanup
observe({
invalidate(later::later(function() {
# Remove stale entries
current_time <- Sys.time()
stale_keys <- character(0)
for (key in names(rv$cache_timestamps)) {
age <- as.numeric(current_time - rv$cache_timestamps[[key]])
if (age > rv$cache_ttl) {
stale_keys <- c(stale_keys, key)
}
}
if (length(stale_keys) > 0) {
for (key in stale_keys) {
rv$appdata_cache[[key]] <- NULL
rv$cache_timestamps[[key]] <- NULL
}
cat("[CACHE CLEANUP] Removed", length(stale_keys), "stale entries\n")
}
}, delay = 60)) # Run every 60 seconds
})
}Key Techniques
- Key-based caching: Simple string key from UUID pair
- TTL expiration: Auto-expire after 5 minutes
- Selective invalidation: Artist-level or artwork-level
- Periodic cleanup: Remove stale entries to prevent memory growth
- Cache telemetry: Log hits/misses for monitoring
Performance Benchmarks
Real-world performance measurements from production:
| Operation | Cold (no cache) | Warm (with cache) | Speedup |
|---|---|---|---|
get_appdata() |
250-400ms | <1ms | 250-400x |
get_artist_stats() |
15-25ms | N/A | N/A |
get_frame_analytics() |
100-150ms | N/A | N/A |
| Artist profile (6 queries) | 150ms (shared cn) | 450ms (6 separate cn) | 3x |
Optimization priorities: 1. Use shared connections for multiple queries 2. Cache get_appdata() results in Shiny apps 3. Lazy-load frame analytics (only when replay player opened) 4. Use batch visibility checks instead of loops
Next Steps
These workflows demonstrate production-grade patterns. For more:
- Data Access Patterns - Foundational query patterns
- Data Modification Workflows - WRITE operations
- Reference - Complete function documentation
