Advanced Workflow • artbenchmark

Overview

This guide covers advanced use cases for artbenchmark:

Batch processing multiple artworks
Custom metric analysis using raster data
Performance optimization for large portfolios
Integration with artutils benchmarking system
Understanding extended metrics

library(artbenchmark)
library(data.table)

Batch Processing Artworks

Processing Multiple Artworks

When processing an artist’s portfolio, batch operations avoid redundant image reads:

# Sample artwork data from upload process
artwork_files <- data.table(
    art_uuid = c(
        "99a61148-1d3b-4340-8cf6-92ad26046b0f",
        "99b72259-2e4c-5451-9dg7-a02e14fc7e1g",
        "99c83360-3f5d-6562-0eh8-b13f25gd8f2h"
    ),
    img_path = c(
        "/uploads/artist1/artwork1/main.png",
        "/uploads/artist1/artwork2/main.png",
        "/uploads/artist1/artwork3/main.png"
    ),
    file_sz = c(1847293, 2103847, 1654982),
    draw_mins = c(145, 189, 132),
    strokes = c(52340, 67823, 48291)
)

# Compute metrics for all artworks
metrics_list <- lapply(1:nrow(artwork_files), function(i) {
    row <- artwork_files[i]

    metrics <- calc_art_metrics(
        img_path = row$img_path,
        file_sz = row$file_sz,
        draw_mins = row$draw_mins,
        strokes = row$strokes
    )

    # Add artwork identifier
    metrics[, art_uuid := row$art_uuid]

    metrics
})

# Combine into single data.table
all_metrics <- rbindlist(metrics_list)

# Add common identifiers
all_metrics[, `:=`(
    artist_uuid = "746b8207-72f5-4ab6-8d19-a91d03daec3d",
    created_utc = Sys.time()
)]

Error Handling in Batch Operations

Robust batch processing handles failures gracefully:

safe_calc_metrics <- function(artwork_row) {
    tryCatch(
        {
            metrics <- calc_art_metrics(
                img_path = artwork_row$img_path,
                file_sz = artwork_row$file_sz,
                draw_mins = artwork_row$draw_mins,
                strokes = artwork_row$strokes
            )
            metrics[, art_uuid := artwork_row$art_uuid]
            list(success = TRUE, data = metrics, error = NULL)
        },
        error = function(e) {
            list(
                success = FALSE,
                data = NULL,
                error = paste0(artwork_row$art_uuid, ": ", e$message)
            )
        }
    )
}

# Process with error tracking
results <- lapply(1:nrow(artwork_files), function(i) {
    safe_calc_metrics(artwork_files[i])
})

# Extract successful results
successful <- results[sapply(results, function(x) x$success)]
all_metrics <- rbindlist(lapply(successful, function(x) x$data))

# Log failures
failures <- results[!sapply(results, function(x) x$success)]
if (length(failures) > 0) {
    lapply(failures, function(x) message(x$error))
}

Custom Metric Analysis

Working with Raw Raster Data

For custom analysis beyond standard metrics, extract raster data directly:

# Extract raster data
rast_data <- get_image_rast("/path/to/artwork.png")

# Structure:
# - colors: data.table with col (hex) and N (pixel count)
# - total_px: Integer, total pixels in image
# - colored_px: Integer, pixels with color

Custom Metric Examples

Color Distribution Analysis

# Get color frequency distribution
colors <- rast_data$colors

# Find colors used more than 100 times
frequent_colors <- colors[N > 100]

# Calculate color concentration
# (what % of pixels use the top 10 colors?)
top_10_colors <- colors[order(-N)][1:10]
concentration <- sum(top_10_colors$N) / rast_data$colored_px * 100

print(paste0("Top 10 colors account for ", round(concentration, 2), "% of pixels"))

Palette Complexity Score

# Custom complexity metric combining multiple factors
palette_complexity <- function(rast_data, strokes) {
    n_colors <- nrow(rast_data$colors)
    coverage <- rast_data$colored_px / rast_data$total_px
    colors_per_stroke <- n_colors / strokes

    # Weighted complexity score (custom formula)
    complexity <- (n_colors * 0.4) + (coverage * 100 * 0.3) + (colors_per_stroke * 10000 * 0.3)

    data.table(
        metric_key = "palette_complexity",
        value = complexity
    )
}

custom_metric <- palette_complexity(rast_data, strokes = 50000)

Color Dominance Analysis

# Identify if artwork has a dominant color theme
analyze_color_dominance <- function(rast_data) {
    colors <- rast_data$colors

    # Convert hex to RGB for hue analysis
    rgb_vals <- col2rgb(colors$col)

    # Simple hue classification (would be more sophisticated in production)
    # Count pixels by rough hue category
    # (Red, Orange, Yellow, Green, Blue, Purple, Neutral)

    # For this example, just show the concept:
    top_color <- colors[which.max(N)]
    top_color_pct <- (top_color$N / rast_data$colored_px) * 100

    data.table(
        dominant_color = top_color$col,
        dominance_pct = top_color_pct,
        has_dominant_theme = top_color_pct > 25
    )
}

dominance <- analyze_color_dominance(rast_data)

Understanding Extended Metrics

Extended metrics are computed but not part of the core 16-metric framework. They provide deeper insights for research and future feature development.

Accessing Extended Metrics

# Get list of extended metric keys
extended_keys <- get_extended_metrics()
print(extended_keys)
#> [1] "count_dom_colors"  "q25_color_freq"    "q50_color_freq"
#> [4] "ave_bits_psec"     "ave_strokes_psec"  "ave_colors_psec"
#> [7] "ave_bits_pstroke"  "ave_bits_pcolor"

# Filter extended metrics from computation results
metrics <- calc_art_metrics(img_path, file_sz, draw_mins, strokes)
extended_metrics <- metrics[metric_key %in% extended_keys]

What Extended Metrics Measure

Color Frequency Quantiles

# q25, q50, q75 describe the distribution of how often colors appear
# Lower quartile = rare colors
# Median = typical color usage
# Upper quartile = frequently-used colors

# High q75 suggests some colors dominate the palette
# Low q75 suggests even distribution across all colors

Time-Based Productivity

# ave_bits_psec = File size growth rate (bytes per second)
# ave_strokes_psec = Mark-making speed
# ave_colors_psec = Color introduction rate

# These complement the core "ave_bpm" metric from Procreate stats
# Useful for detecting unusual creation patterns

Complexity Ratios

# ave_bits_pstroke = File complexity per stroke
# ave_bits_pcolor = File complexity per color

# High bits-per-stroke = detailed, complex strokes
# Low bits-per-stroke = simple, efficient mark-making

# High bits-per-color = rich, blended color usage
# Low bits-per-color = limited palette

Integration with artutils

artbenchmark is designed to work seamlessly with the artutils benchmarking system.

Typical Integration Flow

# 1. Compute metrics with artbenchmark
metrics <- calc_art_metrics(img_path, file_sz, draw_mins, strokes)

# 2. Add identifiers
metrics[, `:=`(
    art_uuid = artwork_uuid,
    artist_uuid = artist_uuid,
    created_utc = Sys.time()
)]

# 3. Insert into database via artutils
# artutils::insert_artwork_metrics(metrics, cn)

# 4. Update artist benchmarks (percentile calculations)
# artutils::update_artist_benchmarks(artist_uuid, cn)

Portfolio-Relative Benchmarking

The metrics computed here feed into portfolio-relative percentile calculations:

# After inserting multiple artworks, artutils computes percentiles
# within the artist's portfolio:

# For each metric:
# - Sort all artwork values for that artist
# - Calculate percentile rank (0-100) for each artwork
# - Store percentile in artist_benchmarks table

# Example result (from artutils):
# art_uuid        metric_key         value  percentile
# 99xxx...     canvas_coverage      91.2         85
# 99xxx...     n_unique_colors    9876.0         72
# 99xxx...     ave_blend_rate        0.81        91

# Percentile 85 = This artwork's coverage exceeds 85% of artist's portfolio

Performance Considerations

Image Processing Overhead

The main performance bottleneck is image resizing and raster extraction:

# Benchmark: Single artwork processing time
system.time({
    metrics <- calc_art_metrics(img_path, file_sz, draw_mins, strokes)
})
#   user  system elapsed
#  1.234   0.123   1.357

# ~1.3 seconds per artwork on typical hardware
# Dominated by ImageMagick operations (image resize, raster conversion)

Optimization Strategies

Parallel Processing

# For large portfolios, process artworks in parallel
library(parallel)

# Detect cores
n_cores <- detectCores() - 1

# Parallel processing
cl <- makeCluster(n_cores)
clusterEvalQ(cl, library(artbenchmark))

metrics_list <- parLapply(cl, 1:nrow(artwork_files), function(i) {
    row <- artwork_files[i]
    metrics <- calc_art_metrics(row$img_path, row$file_sz, row$draw_mins, row$strokes)
    metrics[, art_uuid := row$art_uuid]
    metrics
})

stopCluster(cl)
all_metrics <- rbindlist(metrics_list)

Pre-filtered Image Sets

If processing the same artworks multiple times (e.g., during testing), cache raster data:

# Extract raster once
rast_cache <- get_image_rast(img_path)

# Reuse for different metric calculations
# (custom analysis functions that accept rast_data directly)

Validation and Quality Checks

Sanity Check Computed Metrics

validate_metrics <- function(metrics, strokes, draw_mins) {
    issues <- list()

    # Canvas coverage should be 0-100
    coverage <- metrics[metric_key == "canvas_coverage", value]
    if (coverage < 0 || coverage > 100) {
        issues <- c(issues, "Invalid canvas coverage")
    }

    # Unique colors should be reasonable
    n_colors <- metrics[metric_key == "n_unique_colors", value]
    if (n_colors < 10 || n_colors > 100000) {
        issues <- c(issues, "Unusual color count")
    }

    # Blend rate should be 0-1
    blend_rate <- metrics[metric_key == "ave_blend_rate", value]
    if (blend_rate < 0 || blend_rate > 1) {
        issues <- c(issues, "Invalid blend rate")
    }

    if (length(issues) > 0) {
        return(list(valid = FALSE, issues = issues))
    }

    list(valid = TRUE, issues = NULL)
}

validation <- validate_metrics(metrics, strokes, draw_mins)
if (!validation$valid) {
    warning("Metric validation failed: ", paste(validation$issues, collapse = ", "))
}

Next Steps

Get Started - Core workflow and package overview
Quickstart - Basic usage tutorial
Function Reference - Complete API documentation

For production deployment, see the artutils package for database integration and benchmark calculations.