Skip to contents

Overview

This guide covers advanced use cases for artbenchmark:

  • Batch processing multiple artworks
  • Custom metric analysis using raster data
  • Performance optimization for large portfolios
  • Integration with artutils benchmarking system
  • Understanding extended metrics

Batch Processing Artworks

Processing Multiple Artworks

When processing an artist’s portfolio, batch operations avoid redundant image reads:

# Sample artwork data from upload process
artwork_files <- data.table(
    art_uuid = c(
        "99a61148-1d3b-4340-8cf6-92ad26046b0f",
        "99b72259-2e4c-5451-9dg7-a02e14fc7e1g",
        "99c83360-3f5d-6562-0eh8-b13f25gd8f2h"
    ),
    img_path = c(
        "/uploads/artist1/artwork1/main.png",
        "/uploads/artist1/artwork2/main.png",
        "/uploads/artist1/artwork3/main.png"
    ),
    file_sz = c(1847293, 2103847, 1654982),
    draw_mins = c(145, 189, 132),
    strokes = c(52340, 67823, 48291)
)

# Compute metrics for all artworks
metrics_list <- lapply(1:nrow(artwork_files), function(i) {
    row <- artwork_files[i]

    metrics <- calc_art_metrics(
        img_path = row$img_path,
        file_sz = row$file_sz,
        draw_mins = row$draw_mins,
        strokes = row$strokes
    )

    # Add artwork identifier
    metrics[, art_uuid := row$art_uuid]

    metrics
})

# Combine into single data.table
all_metrics <- rbindlist(metrics_list)

# Add common identifiers
all_metrics[, `:=`(
    artist_uuid = "746b8207-72f5-4ab6-8d19-a91d03daec3d",
    created_utc = Sys.time()
)]

Error Handling in Batch Operations

Robust batch processing handles failures gracefully:

safe_calc_metrics <- function(artwork_row) {
    tryCatch(
        {
            metrics <- calc_art_metrics(
                img_path = artwork_row$img_path,
                file_sz = artwork_row$file_sz,
                draw_mins = artwork_row$draw_mins,
                strokes = artwork_row$strokes
            )
            metrics[, art_uuid := artwork_row$art_uuid]
            list(success = TRUE, data = metrics, error = NULL)
        },
        error = function(e) {
            list(
                success = FALSE,
                data = NULL,
                error = paste0(artwork_row$art_uuid, ": ", e$message)
            )
        }
    )
}

# Process with error tracking
results <- lapply(1:nrow(artwork_files), function(i) {
    safe_calc_metrics(artwork_files[i])
})

# Extract successful results
successful <- results[sapply(results, function(x) x$success)]
all_metrics <- rbindlist(lapply(successful, function(x) x$data))

# Log failures
failures <- results[!sapply(results, function(x) x$success)]
if (length(failures) > 0) {
    lapply(failures, function(x) message(x$error))
}

Custom Metric Analysis

Working with Raw Raster Data

For custom analysis beyond standard metrics, extract raster data directly:

# Extract raster data
rast_data <- get_image_rast("/path/to/artwork.png")

# Structure:
# - colors: data.table with col (hex) and N (pixel count)
# - total_px: Integer, total pixels in image
# - colored_px: Integer, pixels with color

Custom Metric Examples

Color Distribution Analysis

# Get color frequency distribution
colors <- rast_data$colors

# Find colors used more than 100 times
frequent_colors <- colors[N > 100]

# Calculate color concentration
# (what % of pixels use the top 10 colors?)
top_10_colors <- colors[order(-N)][1:10]
concentration <- sum(top_10_colors$N) / rast_data$colored_px * 100

print(paste0("Top 10 colors account for ", round(concentration, 2), "% of pixels"))

Palette Complexity Score

# Custom complexity metric combining multiple factors
palette_complexity <- function(rast_data, strokes) {
    n_colors <- nrow(rast_data$colors)
    coverage <- rast_data$colored_px / rast_data$total_px
    colors_per_stroke <- n_colors / strokes

    # Weighted complexity score (custom formula)
    complexity <- (n_colors * 0.4) + (coverage * 100 * 0.3) + (colors_per_stroke * 10000 * 0.3)

    data.table(
        metric_key = "palette_complexity",
        value = complexity
    )
}

custom_metric <- palette_complexity(rast_data, strokes = 50000)

Color Dominance Analysis

# Identify if artwork has a dominant color theme
analyze_color_dominance <- function(rast_data) {
    colors <- rast_data$colors

    # Convert hex to RGB for hue analysis
    rgb_vals <- col2rgb(colors$col)

    # Simple hue classification (would be more sophisticated in production)
    # Count pixels by rough hue category
    # (Red, Orange, Yellow, Green, Blue, Purple, Neutral)

    # For this example, just show the concept:
    top_color <- colors[which.max(N)]
    top_color_pct <- (top_color$N / rast_data$colored_px) * 100

    data.table(
        dominant_color = top_color$col,
        dominance_pct = top_color_pct,
        has_dominant_theme = top_color_pct > 25
    )
}

dominance <- analyze_color_dominance(rast_data)

Understanding Extended Metrics

Extended metrics are computed but not part of the core 16-metric framework. They provide deeper insights for research and future feature development.

Accessing Extended Metrics

# Get list of extended metric keys
extended_keys <- get_extended_metrics()
print(extended_keys)
#> [1] "count_dom_colors"  "q25_color_freq"    "q50_color_freq"
#> [4] "ave_bits_psec"     "ave_strokes_psec"  "ave_colors_psec"
#> [7] "ave_bits_pstroke"  "ave_bits_pcolor"

# Filter extended metrics from computation results
metrics <- calc_art_metrics(img_path, file_sz, draw_mins, strokes)
extended_metrics <- metrics[metric_key %in% extended_keys]

What Extended Metrics Measure

Color Frequency Quantiles

# q25, q50, q75 describe the distribution of how often colors appear
# Lower quartile = rare colors
# Median = typical color usage
# Upper quartile = frequently-used colors

# High q75 suggests some colors dominate the palette
# Low q75 suggests even distribution across all colors

Time-Based Productivity

# ave_bits_psec = File size growth rate (bytes per second)
# ave_strokes_psec = Mark-making speed
# ave_colors_psec = Color introduction rate

# These complement the core "ave_bpm" metric from Procreate stats
# Useful for detecting unusual creation patterns

Complexity Ratios

# ave_bits_pstroke = File complexity per stroke
# ave_bits_pcolor = File complexity per color

# High bits-per-stroke = detailed, complex strokes
# Low bits-per-stroke = simple, efficient mark-making

# High bits-per-color = rich, blended color usage
# Low bits-per-color = limited palette

Integration with artutils

artbenchmark is designed to work seamlessly with the artutils benchmarking system.

Typical Integration Flow

# 1. Compute metrics with artbenchmark
metrics <- calc_art_metrics(img_path, file_sz, draw_mins, strokes)

# 2. Add identifiers
metrics[, `:=`(
    art_uuid = artwork_uuid,
    artist_uuid = artist_uuid,
    created_utc = Sys.time()
)]

# 3. Insert into database via artutils
# artutils::insert_artwork_metrics(metrics, cn)

# 4. Update artist benchmarks (percentile calculations)
# artutils::update_artist_benchmarks(artist_uuid, cn)

Portfolio-Relative Benchmarking

The metrics computed here feed into portfolio-relative percentile calculations:

# After inserting multiple artworks, artutils computes percentiles
# within the artist's portfolio:

# For each metric:
# - Sort all artwork values for that artist
# - Calculate percentile rank (0-100) for each artwork
# - Store percentile in artist_benchmarks table

# Example result (from artutils):
# art_uuid        metric_key         value  percentile
# 99xxx...     canvas_coverage      91.2         85
# 99xxx...     n_unique_colors    9876.0         72
# 99xxx...     ave_blend_rate        0.81        91

# Percentile 85 = This artwork's coverage exceeds 85% of artist's portfolio

Performance Considerations

Image Processing Overhead

The main performance bottleneck is image resizing and raster extraction:

# Benchmark: Single artwork processing time
system.time({
    metrics <- calc_art_metrics(img_path, file_sz, draw_mins, strokes)
})
#   user  system elapsed
#  1.234   0.123   1.357

# ~1.3 seconds per artwork on typical hardware
# Dominated by ImageMagick operations (image resize, raster conversion)

Optimization Strategies

Parallel Processing

# For large portfolios, process artworks in parallel
library(parallel)

# Detect cores
n_cores <- detectCores() - 1

# Parallel processing
cl <- makeCluster(n_cores)
clusterEvalQ(cl, library(artbenchmark))

metrics_list <- parLapply(cl, 1:nrow(artwork_files), function(i) {
    row <- artwork_files[i]
    metrics <- calc_art_metrics(row$img_path, row$file_sz, row$draw_mins, row$strokes)
    metrics[, art_uuid := row$art_uuid]
    metrics
})

stopCluster(cl)
all_metrics <- rbindlist(metrics_list)

Pre-filtered Image Sets

If processing the same artworks multiple times (e.g., during testing), cache raster data:

# Extract raster once
rast_cache <- get_image_rast(img_path)

# Reuse for different metric calculations
# (custom analysis functions that accept rast_data directly)

Validation and Quality Checks

Sanity Check Computed Metrics

validate_metrics <- function(metrics, strokes, draw_mins) {
    issues <- list()

    # Canvas coverage should be 0-100
    coverage <- metrics[metric_key == "canvas_coverage", value]
    if (coverage < 0 || coverage > 100) {
        issues <- c(issues, "Invalid canvas coverage")
    }

    # Unique colors should be reasonable
    n_colors <- metrics[metric_key == "n_unique_colors", value]
    if (n_colors < 10 || n_colors > 100000) {
        issues <- c(issues, "Unusual color count")
    }

    # Blend rate should be 0-1
    blend_rate <- metrics[metric_key == "ave_blend_rate", value]
    if (blend_rate < 0 || blend_rate > 1) {
        issues <- c(issues, "Invalid blend rate")
    }

    if (length(issues) > 0) {
        return(list(valid = FALSE, issues = issues))
    }

    list(valid = TRUE, issues = NULL)
}

validation <- validate_metrics(metrics, strokes, draw_mins)
if (!validation$valid) {
    warning("Metric validation failed: ", paste(validation$issues, collapse = ", "))
}

Next Steps

For production deployment, see the artutils package for database integration and benchmark calculations.