Advanced Workflow • artutils

Complex Multi-Step Workflows

This vignette covers advanced scenarios that combine multiple artutils functions to solve real production challenges. These workflows demonstrate patterns you’ll encounter in complex applications.

Workflow 1: Multi-Artist Portfolio Comparison

Build a comparative dashboard showing multiple artists side-by-side. This is useful for gallery curation, NFT collection management, or artist discovery features.

The Challenge

We need to: 1. Load data for multiple artists efficiently 2. Normalize metrics for fair comparison 3. Rank artists across different dimensions 4. Handle missing or incomplete data gracefully

Implementation

build_portfolio_comparison <- function(artist_slugs) {
  # Phase 1: Batch lookup and validation
  cn <- artcore::dbc()
  on.exit(artcore::dbd(cn))

  # Resolve all slugs in parallel (single connection)
  artist_records <- lapply(artist_slugs, function(slug) {
    get_artist_by_slug(slug, cn = cn)
  })

  # Filter out NULLs (artists not found)
  valid_artists <- Filter(Negate(is.null), artist_records)
  if (length(valid_artists) == 0) {
    return(list(error = "no_valid_artists", attempted = artist_slugs))
  }

  artist_uuids <- sapply(valid_artists, function(x) x$artist_uuid)

  # Phase 2: Gather statistics for all artists
  stats_list <- lapply(artist_uuids, function(uuid) {
    stats <- get_artist_stats(uuid, cn = cn)
    stats$artist_uuid <- uuid
    stats
  })

  # Convert to data.table for analysis
  stats_dt <- data.table::rbindlist(stats_list, fill = TRUE)

  # Phase 3: Calculate comparison metrics
  stats_dt[, `:=`(
    # Productivity metrics
    hours_per_artwork = total_hours / pmax(artworks, 1),
    strokes_per_hour = total_brushes / pmax(total_hours, 1),

    # Efficiency score (artworks per collection)
    collection_density = artworks / pmax(collections, 1),

    # Experience (days since joining)
    days_active = as.numeric(Sys.Date() - as.Date(member_since))
  )]

  # Phase 4: Rank artists across dimensions
  ranking_cols <- c("artworks", "total_hours", "total_brushes",
                    "hours_per_artwork", "strokes_per_hour", "collection_density")

  for (col in ranking_cols) {
    rank_col <- paste0(col, "_rank")
    stats_dt[, (rank_col) := frankv(get(col), order = -1, na.last = "keep")]
  }

  # Phase 5: Enrich with profile data
  for (i in seq_along(valid_artists)) {
    uuid <- artist_uuids[i]
    profile <- valid_artists[[i]]

    stats_dt[artist_uuid == uuid, `:=`(
      artist_name = profile$artist_name,
      slug = profile$slug,
      location = paste(profile$city, profile$state, sep = ", "),
      avatar_url = path_artist_thumb(uuid)
    )]
  }

  # Phase 6: Generate insights
  insights <- list(
    most_prolific = stats_dt[which.max(artworks)]$artist_name,
    most_dedicated = stats_dt[which.max(total_hours)]$artist_name,
    most_efficient = stats_dt[which.min(hours_per_artwork)]$artist_name,
    fastest_worker = stats_dt[which.max(strokes_per_hour)]$artist_name,
    avg_portfolio_size = mean(stats_dt$artworks, na.rm = TRUE),
    total_artworks_compared = sum(stats_dt$artworks, na.rm = TRUE)
  )

  list(
    artists = stats_dt,
    insights = insights,
    comparison_date = Sys.Date()
  )
}

# Use it
comparison <- build_portfolio_comparison(c(
  "bobby-fatemi",
  "jane-smith",
  "artist-three"
))

if (is.null(comparison$error)) {
  cat("\n=== PORTFOLIO COMPARISON ===\n")
  cat("Most prolific:", comparison$insights$most_prolific, "\n")
  cat("Most dedicated:", comparison$insights$most_dedicated, "\n")
  cat("Most efficient:", comparison$insights$most_efficient, "\n")
  cat("Total artworks:", comparison$insights$total_artworks_compared, "\n")

  # View rankings
  print(comparison$artists[, .(
    artist_name,
    artworks,
    artworks_rank,
    total_hours,
    hours_per_artwork
  )])
}

Key Techniques

Batch processing: Single connection for all queries
Null handling: Filter(Negate(is.null), ...) removes invalid artists
Ranking: frankv() handles ties and NA values correctly
Enrichment pattern: Merge statistical data with profile data
Defensive calculations: pmax(x, 1) prevents division by zero

Workflow 2: Building a Gallery with Smart Pagination

Implement a paginated gallery with filtering, sorting, and visibility rules. This is the foundation for any artwork browsing interface.

The Challenge

We need to: 1. Support dynamic filters (style, category, artist) 2. Respect collection visibility settings 3. Paginate efficiently without loading all data 4. Sort by multiple criteria 5. Handle edge cases (empty results, last page)

Implementation

build_paginated_gallery <- function(
  artist_uuid = NULL,
  style_tags = NULL,
  category = NULL,
  sort_by = "created_utc",
  sort_order = "DESC",
  page = 1,
  per_page = 20,
  include_hidden = FALSE
) {
  cn <- artcore::dbc()
  on.exit(artcore::dbd(cn))

  # Phase 1: Build dynamic query
  where_clauses <- c("1=1")  # Always true base condition
  params <- list()

  if (!is.null(artist_uuid)) {
    artcore::validate_uuid(artist_uuid, "artist")
    where_clauses <- c(where_clauses, "ai.artist_uuid = ?")
    params <- c(params, artist_uuid)
  }

  if (!is.null(category)) {
    where_clauses <- c(where_clauses, "ap.category = ?")
    params <- c(params, category)
  }

  if (!is.null(style_tags) && length(style_tags) > 0) {
    # Match any of the provided tags
    tag_placeholders <- paste(rep("?", length(style_tags)), collapse = ", ")
    where_clauses <- c(where_clauses,
                       paste0("ast.tag IN (", tag_placeholders, ")"))
    params <- c(params, as.list(style_tags))
  }

  where_sql <- paste(where_clauses, collapse = " AND ")

  # Phase 2: Get total count (for pagination metadata)
  count_qry <- DBI::sqlInterpolate(cn, paste0("
    SELECT COUNT(DISTINCT ai.art_uuid) as total
    FROM app.artwork_index ai
    LEFT JOIN app.artwork_profiles ap ON ai.art_uuid = ap.art_uuid
    LEFT JOIN app.artwork_styles ast ON ai.art_uuid = ast.art_uuid
    WHERE ", where_sql
  ), .dots = params)

  total_count <- db_art_get(count_qry, cn = cn)$total[1]

  # Phase 3: Calculate pagination
  offset <- (page - 1) * per_page
  total_pages <- ceiling(total_count / per_page)

  if (page > total_pages && total_count > 0) {
    # Requested page doesn't exist - return last page
    page <- total_pages
    offset <- (page - 1) * per_page
  }

  # Phase 4: Fetch page of results
  data_qry <- DBI::sqlInterpolate(cn, paste0("
    SELECT DISTINCT
      ai.art_uuid,
      ai.artist_uuid,
      ai.art_name,
      ai.art_title,
      ai.collection_uuid,
      ai.created_utc,
      idx.artist_name,
      s.brush_strokes,
      s.drawing_hours,
      s.n_unique_colors,
      ap.category,
      ci.collection_name
    FROM app.artwork_index ai
    JOIN app.artist_index idx ON ai.artist_uuid = idx.artist_uuid
    JOIN app.collection_index ci ON ai.collection_uuid = ci.collection_uuid
    LEFT JOIN app.artwork_profiles ap ON ai.art_uuid = ap.art_uuid
    LEFT JOIN app.artwork_styles ast ON ai.art_uuid = ast.art_uuid
    LEFT JOIN app.artwork_stats s ON ai.art_uuid = s.art_uuid
    WHERE ", where_sql, "
    ORDER BY ai.", sort_by, " ", sort_order, "
    LIMIT ", per_page, " OFFSET ", offset
  ), .dots = params)

  results <- db_art_get(data_qry, cn = cn)

  # Phase 5: Apply visibility filtering if needed
  if (!include_hidden && nrow(results) > 0) {
    # Get visibility for all collections in this page
    collection_uuids <- unique(results$collection_uuid)
    artist_uuids <- unique(results$artist_uuid)

    visibility <- get_collect_visibility(
      artists = artist_uuids,
      collects = collection_uuids,
      cn = cn
    )

    # Merge and filter
    results <- merge(results, visibility,
                     by = "collection_uuid",
                     all.x = TRUE)

    # Default to visible if no settings (shouldn't happen)
    results[is.na(is_visible), is_visible := TRUE]

    # Filter out hidden collections
    results <- results[is_visible == TRUE]

    # Remove visibility column (internal detail)
    results[, is_visible := NULL]
  }

  # Phase 6: Enrich with CDN paths
  if (nrow(results) > 0) {
    results[, thumb_url := path_artwork_thumb(artist_uuid, art_uuid),
            by = .(artist_uuid, art_uuid)]
    results[, main_url := path_art_main_image(artist_uuid, art_uuid),
            by = .(artist_uuid, art_uuid)]
  }

  # Phase 7: Build response with metadata
  list(
    items = results,
    pagination = list(
      page = page,
      per_page = per_page,
      total_items = total_count,
      total_pages = total_pages,
      has_prev = page > 1,
      has_next = page < total_pages,
      showing_from = offset + 1,
      showing_to = min(offset + nrow(results), total_count)
    ),
    filters_applied = list(
      artist = artist_uuid,
      style_tags = style_tags,
      category = category,
      sort_by = sort_by,
      sort_order = sort_order
    )
  )
}

# Use it
gallery <- build_paginated_gallery(
  style_tags = c("abstract", "landscape"),
  category = "painting",
  sort_by = "created_utc",
  sort_order = "DESC",
  page = 1,
  per_page = 20
)

cat("Showing", gallery$pagination$showing_from, "-",
    gallery$pagination$showing_to, "of",
    gallery$pagination$total_items, "artworks\n")
cat("Page", gallery$pagination$page, "of",
    gallery$pagination$total_pages, "\n")

Key Techniques

Dynamic query building: Conditionally add WHERE clauses
SQL interpolation: Use parameterized queries to prevent SQL injection
Two-phase pagination: Count total, then fetch page
Visibility integration: Batch check collections
Defensive pagination: Handle invalid page numbers gracefully
Rich metadata: Return everything needed to render pagination UI

Workflow 3: Artwork Upload with Full Validation

Handle the complete artwork upload workflow with validation, error recovery, and rollback.

The Challenge

We need to: 1. Validate all data before insertion 2. Use transactions for atomicity 3. Handle partial failures gracefully 4. Update aggregate statistics 5. Provide detailed error reporting

Implementation

upload_artwork_safely <- function(
  artist_uuid,
  artwork_uuid,
  collection_uuid,
  artwork_data  # List containing all prepared data.tables
) {
  # Phase 1: Pre-flight validation
  validation_errors <- list()

  # Validate UUIDs
  tryCatch({
    artcore::validate_uuid(artist_uuid, "artist")
    artcore::validate_uuid(artwork_uuid, "artwork")
    artcore::validate_uuid(collection_uuid, "collection")
  }, error = function(e) {
    validation_errors <<- c(validation_errors,
                            list(uuid_validation = e$message))
  })

  # Validate required data tables
  required_tables <- c(
    "artwork_index",
    "artwork_stats",
    "artwork_meta",
    "artwork_frame_analytics"
  )

  for (table in required_tables) {
    if (!table %in% names(artwork_data) || nrow(artwork_data[[table]]) == 0) {
      validation_errors[[table]] <- paste("Missing or empty:", table)
    }
  }

  # Validate artwork_index structure
  if ("artwork_index" %in% names(artwork_data)) {
    idx <- artwork_data$artwork_index
    required_cols <- c("artist_uuid", "art_uuid", "collection_uuid",
                       "art_name", "art_title")
    missing_cols <- setdiff(required_cols, names(idx))
    if (length(missing_cols) > 0) {
      validation_errors$artwork_index <- paste("Missing columns:",
                                               paste(missing_cols, collapse = ", "))
    }
  }

  # If validation failed, return errors immediately
  if (length(validation_errors) > 0) {
    return(list(
      success = FALSE,
      phase = "validation",
      errors = validation_errors
    ))
  }

  # Phase 2: Verify collection exists and is owned by artist
  cn <- dbc()
  on.exit(dbd(cn))

  collection_check <- db_art_get(
    stringr::str_glue("
      SELECT artist_uuid, collection_name
      FROM app.collection_index
      WHERE collection_uuid = '{collection_uuid}'
    "),
    cn = cn
  )

  if (nrow(collection_check) == 0) {
    return(list(
      success = FALSE,
      phase = "collection_verification",
      errors = list(collection = "Collection does not exist")
    ))
  }

  if (collection_check$artist_uuid[1] != artist_uuid) {
    return(list(
      success = FALSE,
      phase = "collection_verification",
      errors = list(ownership = "Collection belongs to different artist")
    ))
  }

  # Phase 3: Attempt insertion with transaction
  insertion_result <- tryCatch({
    add_artwork(
      artist = artist_uuid,
      artwork = artwork_uuid,
      artwork_index = artwork_data$artwork_index,
      artwork_stats = artwork_data$artwork_stats,
      artwork_meta = artwork_data$artwork_meta,
      artwork_frame_analytics = artwork_data$artwork_frame_analytics,
      artwork_colors = artwork_data$artwork_colors,
      artwork_profiles = artwork_data$artwork_profiles,
      artwork_hash = artwork_data$artwork_hash,
      artwork_paths = artwork_data$artwork_paths,
      artwork_styles = artwork_data$artwork_styles,
      global_styles = artwork_data$global_styles,
      cn = cn
    )
    list(success = TRUE)
  }, error = function(e) {
    # Transaction rolled back automatically
    list(success = FALSE, error = e$message)
  })

  if (!insertion_result$success) {
    return(list(
      success = FALSE,
      phase = "database_insertion",
      errors = list(insertion = insertion_result$error)
    ))
  }

  # Phase 4: Update aggregate statistics
  stats_result <- tryCatch({
    update_artist_stats(artist_uuid, cn = cn)
    list(success = TRUE)
  }, error = function(e) {
    # Artwork was inserted but stats update failed
    # This is recoverable - artwork exists, stats can be recalculated later
    list(success = FALSE, error = e$message)
  })

  # Phase 5: Optionally recalculate benchmarks
  benchmark_result <- tryCatch({
    update_artist_benchmarks(artist_uuid, cn = cn)
    list(success = TRUE)
  }, error = function(e) {
    # Benchmarks failed but artwork is safely stored
    list(success = FALSE, error = e$message)
  })

  # Phase 6: Build success response with warnings
  response <- list(
    success = TRUE,
    artwork_uuid = artwork_uuid,
    artist_uuid = artist_uuid,
    collection = collection_check$collection_name[1]
  )

  warnings <- list()
  if (!stats_result$success) {
    warnings$stats <- paste("Stats update failed:", stats_result$error)
  }
  if (!benchmark_result$success) {
    warnings$benchmarks <- paste("Benchmark update failed:", benchmark_result$error)
  }

  if (length(warnings) > 0) {
    response$warnings <- warnings
    response$recovery_instructions <- paste(
      "Artwork uploaded successfully but post-processing failed.",
      "Run update_artist_stats() and update_artist_benchmarks() manually."
    )
  }

  response
}

# Use it
result <- upload_artwork_safely(
  artist_uuid = "746b8207-72f5-4ab6-8d19-a91d03daec3d",
  artwork_uuid = "99a61148-1d3b-4340-8cf6-92ad26046b0f",
  collection_uuid = "77102117-5e8d-4c3e-8e22-f48e91a4e7c1",
  artwork_data = prepared_data  # From artpipelines
)

if (result$success) {
  cat("SUCCESS: Artwork uploaded\n")
  if (!is.null(result$warnings)) {
    cat("WARNINGS:\n")
    for (w in names(result$warnings)) {
      cat("  -", result$warnings[[w]], "\n")
    }
  }
} else {
  cat("FAILED at phase:", result$phase, "\n")
  cat("Errors:\n")
  for (e in names(result$errors)) {
    cat("  -", e, ":", result$errors[[e]], "\n")
  }
}

Key Techniques

Multi-phase validation: Check everything before touching database
Transactional safety: add_artwork() uses transaction internally
Graceful degradation: Artwork succeeds even if stats/benchmarks fail
Detailed error reporting: Phase tracking helps debugging
Recovery instructions: Tell user how to fix partial failures

Workflow 4: Performance Optimization with Caching

Implement intelligent caching for get_appdata() in a Shiny application.

The Challenge

get_appdata() is expensive (multiple joins, benchmark calculations). We need to: 1. Cache results to avoid repeated queries 2. Invalidate cache when data changes 3. Share cache across reactive contexts 4. Handle cache misses gracefully

Implementation

# In Shiny server function
server <- function(input, output, session) {
  # Reactive values for state management
  rv <- reactiveValues(
    appdata_cache = list(),  # UUID -> appdata mapping
    cache_timestamps = list(),  # UUID -> timestamp
    cache_ttl = 300  # Time-to-live: 5 minutes
  )

  # Helper: Get cache key
  make_cache_key <- function(artist, artwork) {
    paste(artist, artwork, sep = ":")
  }

  # Helper: Check if cache is fresh
  is_cache_fresh <- function(cache_key) {
    if (!cache_key %in% names(rv$cache_timestamps)) {
      return(FALSE)
    }
    age <- as.numeric(Sys.time() - rv$cache_timestamps[[cache_key]])
    age < rv$cache_ttl
  }

  # Smart getter with caching
  get_appdata_cached <- function(artist, artwork) {
    cache_key <- make_cache_key(artist, artwork)

    # Check cache
    if (cache_key %in% names(rv$appdata_cache) &&
        is_cache_fresh(cache_key)) {
      cat("[CACHE HIT]", cache_key, "\n")
      return(rv$appdata_cache[[cache_key]])
    }

    # Cache miss - fetch from database
    cat("[CACHE MISS]", cache_key, "- fetching...\n")
    appdata <- get_appdata(artist, artwork)

    # Store in cache
    rv$appdata_cache[[cache_key]] <- appdata
    rv$cache_timestamps[[cache_key]] <- Sys.time()

    appdata
  }

  # Invalidate cache on modification
  invalidate_cache <- function(artist = NULL, artwork = NULL) {
    if (!is.null(artist) && !is.null(artwork)) {
      # Invalidate specific artwork
      cache_key <- make_cache_key(artist, artwork)
      rv$appdata_cache[[cache_key]] <- NULL
      rv$cache_timestamps[[cache_key]] <- NULL
      cat("[CACHE INVALIDATED]", cache_key, "\n")
    } else if (!is.null(artist)) {
      # Invalidate all artworks for artist
      pattern <- paste0("^", artist, ":")
      keys_to_remove <- grep(pattern, names(rv$appdata_cache), value = TRUE)
      for (key in keys_to_remove) {
        rv$appdata_cache[[key]] <- NULL
        rv$cache_timestamps[[key]] <- NULL
      }
      cat("[CACHE INVALIDATED] Artist:", artist,
          "- removed", length(keys_to_remove), "entries\n")
    } else {
      # Clear entire cache
      rv$appdata_cache <- list()
      rv$cache_timestamps <- list()
      cat("[CACHE CLEARED]\n")
    }
  }

  # Usage in reactive context
  artwork_data <- reactive({
    req(rv$current_artist, rv$current_artwork)
    get_appdata_cached(rv$current_artist, rv$current_artwork)
  })

  # Invalidate after upload
  observeEvent(input$upload_complete, {
    invalidate_cache(
      artist = rv$current_artist,
      artwork = input$uploaded_artwork_uuid
    )
  })

  # Periodic cache cleanup
  observe({
    invalidate(later::later(function() {
      # Remove stale entries
      current_time <- Sys.time()
      stale_keys <- character(0)

      for (key in names(rv$cache_timestamps)) {
        age <- as.numeric(current_time - rv$cache_timestamps[[key]])
        if (age > rv$cache_ttl) {
          stale_keys <- c(stale_keys, key)
        }
      }

      if (length(stale_keys) > 0) {
        for (key in stale_keys) {
          rv$appdata_cache[[key]] <- NULL
          rv$cache_timestamps[[key]] <- NULL
        }
        cat("[CACHE CLEANUP] Removed", length(stale_keys), "stale entries\n")
      }
    }, delay = 60))  # Run every 60 seconds
  })
}

Key Techniques

Key-based caching: Simple string key from UUID pair
TTL expiration: Auto-expire after 5 minutes
Selective invalidation: Artist-level or artwork-level
Periodic cleanup: Remove stale entries to prevent memory growth
Cache telemetry: Log hits/misses for monitoring

Performance Benchmarks

Real-world performance measurements from production:

Operation	Cold (no cache)	Warm (with cache)	Speedup
`get_appdata()`	250-400ms	<1ms	250-400x
`get_artist_stats()`	15-25ms	N/A	N/A
`get_frame_analytics()`	100-150ms	N/A	N/A
Artist profile (6 queries)	150ms (shared cn)	450ms (6 separate cn)	3x

Optimization priorities: 1. Use shared connections for multiple queries 2. Cache get_appdata() results in Shiny apps 3. Lazy-load frame analytics (only when replay player opened) 4. Use batch visibility checks instead of loops

Next Steps

These workflows demonstrate production-grade patterns. For more:

Data Access Patterns - Foundational query patterns
Data Modification Workflows - WRITE operations
Reference - Complete function documentation