From 72006670f23bc72da6f7db6b207805a1d2c0356d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 04:31:43 +0000 Subject: [PATCH 1/5] Initial plan From 362d14c86246c38195e427971136e4d644450e84 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 04:47:48 +0000 Subject: [PATCH 2/5] Add memory profiling support to profile data model - Parse memory profiling data (small_v, big_v, nodes, dup_count) from Rprof output - Extend sample_types table to include memory sample types - Add memory columns to samples table - Update validate_profile() to support memory profiling data - Update ds_to_rprof / write_rprof for memory profiling roundtrip - Update pprof read/write to handle memory sample types - Add sample memory profiling data file (memory.out) - Add comprehensive tests for memory profiling Co-authored-by: krlmlr <1741643+krlmlr@users.noreply.github.com> --- R/api.R | 37 +++++++++++++++--- R/pprof-from-ds.R | 7 +++- R/pprof-to-ds.R | 38 +++++++++++++++++-- R/rprof-from-ds.R | 34 +++++++++++++++-- R/rprof-read.R | 25 +++++++++++- R/rprof-to-ds.R | 23 ++++++++++- R/rprof-write.R | 11 ++++++ inst/samples/rprof/memory.out | 14 +++++++ tests/testthat/test-rprof-memory.R | 61 ++++++++++++++++++++++++++++++ 9 files changed, 234 insertions(+), 16 deletions(-) create mode 100644 inst/samples/rprof/memory.out create mode 100644 tests/testthat/test-rprof-memory.R diff --git a/R/api.R b/R/api.R index 402ec90..461c1bd 100644 --- a/R/api.R +++ b/R/api.R @@ -64,18 +64,31 @@ validate_profile <- function(x) { stopifnot(undotted(names(x$sample_types)) == c("type", "unit")) stopifnot(is.character(x$sample_types$type)) stopifnot(is.character(x$sample_types$unit)) - #' It is currently restricted to one row with values `"samples"` and `"count"`, + #' The first row must have values `"samples"` and `"count"`, #' respectively. - stopifnot(nrow(x$sample_types) == 1) - stopifnot(x$sample_types$type == "samples") - stopifnot(x$sample_types$unit == "count") + stopifnot(nrow(x$sample_types) >= 1) + stopifnot(x$sample_types$type[[1]] == "samples") + stopifnot(x$sample_types$unit[[1]] == "count") + #' Additional rows may describe memory profiling data. + has_memory <- nrow(x$sample_types) > 1 + if (has_memory) { + stopifnot(nrow(x$sample_types) == 5) + stopifnot(x$sample_types$type == c("samples", "small_v", "big_v", "nodes", "dup_count")) + stopifnot(x$sample_types$unit == c("count", "cells", "cells", "bytes", "count")) + } #' #' The `samples` table has two columns, `value` (integer) and `locations` #' (list). + #' When memory profiling data is present, the table also has integer columns + #' `small_v`, `big_v`, `nodes`, and `dup_count`. #' Additional columns with a leading dot in the name are allowed #' after the required columns. - stopifnot(undotted(names(x$samples)) == c("value", "locations")) + expected_sample_cols <- c("value", "locations") + if (has_memory) { + expected_sample_cols <- c("value", "locations", "small_v", "big_v", "nodes", "dup_count") + } + stopifnot(undotted(names(x$samples)) == expected_sample_cols) stopifnot(is.integer(x$samples$value)) stopifnot(is.list(x$samples$locations)) #' The `value` column describes the number of consecutive samples for the @@ -91,6 +104,20 @@ validate_profile <- function(x) { stopifnot(unlist(map(x$samples$locations, "[[", "location_id")) %in% x$locations$location_id) #' The locations are listed in inner-first order, i.e., the first location #' corresponds to the innermost entry of the stack trace. + #' When memory profiling data is present, the `small_v`, `big_v`, `nodes`, + + #' and `dup_count` columns contain integer memory statistics per sample. + #' All memory values must be nonnegative. + if (has_memory) { + stopifnot(is.integer(x$samples$small_v)) + stopifnot(is.integer(x$samples$big_v)) + stopifnot(is.integer(x$samples$nodes)) + stopifnot(is.integer(x$samples$dup_count)) + stopifnot(x$samples$small_v >= 0L) + stopifnot(x$samples$big_v >= 0L) + stopifnot(x$samples$nodes >= 0L) + stopifnot(x$samples$dup_count >= 0L) + } #' #' The `locations` table has three integer columns, `location_id`, diff --git a/R/pprof-from-ds.R b/R/pprof-from-ds.R index 8f3d105..37f7bfe 100644 --- a/R/pprof-from-ds.R +++ b/R/pprof-from-ds.R @@ -33,9 +33,14 @@ add_sample_types_to_msg <- function(sample_types, msg) { } add_samples_to_msg <- function(samples, msg) { + has_memory <- "small_v" %in% names(samples) msg$sample <- lapply(split_rows(samples), function(s) { s_msg <- RProtoBuf::new(perftools.profiles.Sample) - s_msg$value <- s$value + if (has_memory) { + s_msg$value <- c(s$value, s$small_v, s$big_v, s$nodes, s$dup_count) + } else { + s_msg$value <- s$value + } s_msg$location_id <- s$locations[[1]]$location_id s_msg }) diff --git a/R/pprof-to-ds.R b/R/pprof-to-ds.R index 14c28fd..071029e 100644 --- a/R/pprof-to-ds.R +++ b/R/pprof-to-ds.R @@ -27,15 +27,47 @@ get_sample_types_from_msg <- function(msg) { sample_types$type <- msg$string_table[sample_types$type + 1] sample_types$unit <- msg$string_table[sample_types$unit + 1] - sample_types[1, ] + # Check if memory profiling types are present + mem_types <- c("small_v", "big_v", "nodes", "dup_count") + has_memory <- all(mem_types %in% sample_types$type) + + if (has_memory) { + # Keep first row (samples/count) and the memory rows + keep <- sample_types$type %in% c("samples", mem_types) + sample_types[keep, ] + } else { + sample_types[1, ] + } } get_samples_from_msg <- function(msg) { + # Determine which value indices to keep + n_types <- length(msg$sample_type) + all_types <- map(msg$sample_type, function(st) { + msg$string_table[as.integer(st$type) + 1] + }) + all_types <- unlist(all_types) + + mem_types <- c("small_v", "big_v", "nodes", "dup_count") + has_memory <- all(mem_types %in% all_types) + + if (has_memory) { + mem_indices <- match(mem_types, all_types) + } + samples <- map(msg$sample, function(s) { - tibble::tibble( - value = as.integer(s$value[[1]]), + values <- as.integer(s$value) + row <- tibble::tibble( + value = values[[1]], locations = list(tibble::tibble(location_id = as.integer(s$location_id))) ) + if (has_memory) { + row$small_v <- values[[mem_indices[[1]]]] + row$big_v <- values[[mem_indices[[2]]]] + row$nodes <- values[[mem_indices[[3]]]] + row$dup_count <- values[[mem_indices[[4]]]] + } + row }) samples <- tibble::as_tibble(do.call(rbind, samples)) samples diff --git a/R/rprof-from-ds.R b/R/rprof-from-ds.R index 1ed8442..1c565cf 100644 --- a/R/rprof-from-ds.R +++ b/R/rprof-from-ds.R @@ -1,6 +1,8 @@ ds_to_rprof <- function(ds) { validate_profile(ds) + has_memory <- nrow(ds$sample_types) > 1 + . <- ds$locations . <- merge(., ds$functions[c("function_id", "system_name", "filename")], by = "function_id", sort = FALSE, all.x = TRUE) . <- .[-1L] @@ -18,9 +20,15 @@ ds_to_rprof <- function(ds) { flat_locations <- . files <- paste0("#File ", unique_files$file_id, ": ", unique_files$filename) + + # Expand samples by value (repeat count) + sample_idx <- rep(seq_len(nrow(ds$samples)), ds$samples$value) + traces <- map_chr( - rep(ds$samples$locations, ds$samples$value), - function(loc) { + seq_along(sample_idx), + function(i) { + si <- sample_idx[[i]] + loc <- ds$samples$locations[[si]] . <- flat_locations[match(loc$location_id, flat_locations$location_id), ] stopifnot(.$location_id == loc$location_id) funs <- paste0( @@ -31,9 +39,27 @@ ds_to_rprof <- function(ds) { } ) + header <- if (has_memory) { + "memory profiling: line profiling: sample.interval=20000" + } else { + "line profiling: sample.interval=20000" + } + + # Build memory data for roundtrip compatibility + memory <- NULL + if (has_memory) { + memory <- tibble::tibble( + small_v = ds$samples$small_v[sample_idx], + big_v = ds$samples$big_v[sample_idx], + nodes = ds$samples$nodes[sample_idx], + dup_count = ds$samples$dup_count[sample_idx] + ) + } + tibble::lst( - header = "line profiling: sample.interval=20000", + header, files, - traces + traces, + memory ) } diff --git a/R/rprof-read.R b/R/rprof-read.R index 9821131..42552be 100644 --- a/R/rprof-read.R +++ b/R/rprof-read.R @@ -35,9 +35,32 @@ read_rprof_ll <- function(path) { header <- 1L files <- grep("^#File ", lines) traces <- setdiff(seq_along(lines), c(header, files)) + + memory_profiling <- startsWith(lines[header], "memory profiling:") + + # Strip memory data prefix from trace lines + memory <- NULL + trace_lines <- lines[traces] + if (memory_profiling && length(trace_lines) > 0) { + mem_rx <- "^:([0-9]+):([0-9]+):([0-9]+):([0-9]+):" + mem_matches <- regmatches(trace_lines, regexec(mem_rx, trace_lines)) + has_mem <- vapply(mem_matches, length, integer(1)) > 0 + if (any(has_mem)) { + memory <- tibble::tibble( + small_v = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 2L)), + big_v = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 3L)), + nodes = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 4L)), + dup_count = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 5L)) + ) + # Strip the memory prefix from trace lines + trace_lines <- sub(mem_rx, "", trace_lines) + } + } + list( header = lines[header], files = lines[files], - traces = lines[traces] + traces = trace_lines, + memory = memory ) } diff --git a/R/rprof-to-ds.R b/R/rprof-to-ds.R index eec30cd..cdc7500 100644 --- a/R/rprof-to-ds.R +++ b/R/rprof-to-ds.R @@ -19,10 +19,17 @@ rprof_to_ds <- function(rprof) { } get_sample_types_from_rprof <- function(rprof) { - tibble::tibble( + types <- tibble::tibble( type = "samples", unit = "count" ) + if (!is.null(rprof$memory)) { + types <- rbind(types, tibble::tibble( + type = c("small_v", "big_v", "nodes", "dup_count"), + unit = c("cells", "cells", "bytes", "count") + )) + } + types } get_flat_rprof_from_rprof <- function(rprof) { @@ -150,7 +157,19 @@ add_samples_to_flat_rprof <- function(flat_rprof) { .$locations <- map(.$locations, tibble::as_tibble, rownames = NULL) .$value <- 1L - . <- .[c("value", "locations")] + + memory <- flat_rprof$rprof$memory + if (!is.null(memory)) { + # Memory data is indexed by trace line (sample), match by sample_id + mem <- memory[.$sample_id, , drop = FALSE] + .$small_v <- mem$small_v + .$big_v <- mem$big_v + .$nodes <- mem$nodes + .$dup_count <- mem$dup_count + . <- .[c("value", "locations", "small_v", "big_v", "nodes", "dup_count")] + } else { + . <- .[c("value", "locations")] + } flat_rprof$samples <- . diff --git a/R/rprof-write.R b/R/rprof-write.R index 8e2432b..e0de9a3 100644 --- a/R/rprof-write.R +++ b/R/rprof-write.R @@ -2,6 +2,17 @@ #' @export write_rprof <- function(x, path) { rprof <- ds_to_rprof(x) + # Add memory prefix to traces when writing to file + if (!is.null(rprof$memory)) { + rprof$traces <- paste0( + ":", rprof$memory$small_v, + ":", rprof$memory$big_v, + ":", rprof$memory$nodes, + ":", rprof$memory$dup_count, ":", + rprof$traces + ) + } + rprof$memory <- NULL writeLines(unlist(rprof, use.names = FALSE), path) invisible(x) } diff --git a/inst/samples/rprof/memory.out b/inst/samples/rprof/memory.out new file mode 100644 index 0000000..3830a0f --- /dev/null +++ b/inst/samples/rprof/memory.out @@ -0,0 +1,14 @@ +memory profiling: line profiling: sample.interval=20000 +:341536:1289015:34667640:1635:"" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" +:293305:449462:23670472:6514:"anyDuplicated" "[.data.frame" "[" +:369435:615462:36216432:10044:"get" "Ops.data.frame" +:327669:523462:29319360:9780:"[<-.data.frame" "[<-" +:285807:435462:22434496:9769:"any" ".deparseOpts" "deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" +:363324:605462:35221424:10234:"as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" +:322360:515462:28440888:9864:"" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" +:281229:429462:21686672:9852:"Ops.data.frame" +:358636:597462:34433280:10212:"deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" +:318714:513462:27870752:9993:"%in%" "Ops.data.frame" +:279206:429462:21346248:10022:"[.data.frame" "[" +:356880:597462:34140176:10248:".deparseOpts" "deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" +:317207:513462:27611920:9989:"do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" diff --git a/tests/testthat/test-rprof-memory.R b/tests/testthat/test-rprof-memory.R new file mode 100644 index 0000000..0f329dd --- /dev/null +++ b/tests/testthat/test-rprof-memory.R @@ -0,0 +1,61 @@ +test_that("read memory profiling data", { + ds <- read_inst_rprof("rprof/memory.out") + expect_error(validate_profile(ds), NA) + + # sample_types should have 5 rows + + expect_equal(nrow(ds$sample_types), 5) + expect_equal(ds$sample_types$type, c("samples", "small_v", "big_v", "nodes", "dup_count")) + expect_equal(ds$sample_types$unit, c("count", "cells", "cells", "bytes", "count")) + + # samples should have memory columns + expect_true("small_v" %in% names(ds$samples)) + expect_true("big_v" %in% names(ds$samples)) + expect_true("nodes" %in% names(ds$samples)) + expect_true("dup_count" %in% names(ds$samples)) + + # memory values should be nonnegative integers + expect_true(is.integer(ds$samples$small_v)) + expect_true(is.integer(ds$samples$big_v)) + expect_true(is.integer(ds$samples$nodes)) + expect_true(is.integer(ds$samples$dup_count)) + expect_true(all(ds$samples$small_v >= 0)) + expect_true(all(ds$samples$big_v >= 0)) + expect_true(all(ds$samples$nodes >= 0)) + expect_true(all(ds$samples$dup_count >= 0)) +}) + +test_that("non-memory profiles have no memory columns", { + ds <- read_inst_rprof("rprof/1.out") + expect_error(validate_profile(ds), NA) + + expect_equal(nrow(ds$sample_types), 1) + expect_false("small_v" %in% names(ds$samples)) + expect_false("big_v" %in% names(ds$samples)) +}) + +test_that("memory profiling roundtrip via rprof", { + ds <- read_inst_rprof("rprof/memory.out") + + # Convert to rprof and back + ds1 <- rprof_to_ds(ds_to_rprof(ds)) + expect_error(validate_profile(ds1), NA) + + # roundtrip again + ds2 <- rprof_to_ds(ds_to_rprof(ds1)) + expect_error(validate_profile(ds2), NA) + + expect_identical(strip_dots(ds1), strip_dots(ds2)) +}) + +test_that("memory profiling write and read back", { + ds <- read_inst_rprof("rprof/memory.out") + + path <- tempfile("profiler_mem", fileext = ".out") + write_rprof(ds, path) + + ds1 <- read_rprof(path) + expect_error(validate_profile(ds1), NA) + + expect_identical(strip_dots(ds), strip_dots(ds1)) +}) From f9acc663b902c524afb2d862816cf0e235209c0d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 04:50:17 +0000 Subject: [PATCH 3/5] Fix documentation comment line break in api.R Co-authored-by: krlmlr <1741643+krlmlr@users.noreply.github.com> --- R/api.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/api.R b/R/api.R index 461c1bd..d636fdb 100644 --- a/R/api.R +++ b/R/api.R @@ -105,7 +105,6 @@ validate_profile <- function(x) { #' The locations are listed in inner-first order, i.e., the first location #' corresponds to the innermost entry of the stack trace. #' When memory profiling data is present, the `small_v`, `big_v`, `nodes`, - #' and `dup_count` columns contain integer memory statistics per sample. #' All memory values must be nonnegative. if (has_memory) { From ebed6a6eae36746837d95b2c5b7c38eca89ccecd Mon Sep 17 00:00:00 2001 From: Copilot Date: Fri, 13 Feb 2026 04:59:11 +0000 Subject: [PATCH 4/5] chore: Auto-update from GitHub Actions Run: https://github.com/r-prof/profile/actions/runs/21975108397 --- man/validate_profile.Rd | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/man/validate_profile.Rd b/man/validate_profile.Rd index ec6115b..2e0436e 100644 --- a/man/validate_profile.Rd +++ b/man/validate_profile.Rd @@ -48,11 +48,14 @@ that is accepted by \code{\link[=package_version]{package_version()}}. The \code{sample_types} table has two character columns, \code{type} and \code{unit}. Additional columns with a leading dot in the name are allowed after the required columns. -It is currently restricted to one row with values \code{"samples"} and \code{"count"}, +The first row must have values \code{"samples"} and \code{"count"}, respectively. +Additional rows may describe memory profiling data. The \code{samples} table has two columns, \code{value} (integer) and \code{locations} (list). +When memory profiling data is present, the table also has integer columns +\code{small_v}, \code{big_v}, \code{nodes}, and \code{dup_count}. Additional columns with a leading dot in the name are allowed after the required columns. The \code{value} column describes the number of consecutive samples for the @@ -63,6 +66,9 @@ For each \code{location_id} value a corresponding observation in the \code{locat table must exist. The locations are listed in inner-first order, i.e., the first location corresponds to the innermost entry of the stack trace. +When memory profiling data is present, the \code{small_v}, \code{big_v}, \code{nodes}, +and \code{dup_count} columns contain integer memory statistics per sample. +All memory values must be nonnegative. The \code{locations} table has three integer columns, \code{location_id}, \code{function_id}, and \code{line}. From f4636daf615b78044a2b22beb2971ae50aec27a1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 15:08:19 +0000 Subject: [PATCH 5/5] Make data model type-stable: always include memory columns sample_types always has 5 rows and samples always has 6 columns (value, locations, small_v, big_v, nodes, dup_count). When no memory profiling data is present, memory columns are all NA_integer_. Co-authored-by: krlmlr <1741643+krlmlr@users.noreply.github.com> --- R/api.R | 55 ++++++++++++------------------ R/pprof-from-ds.R | 11 ++++-- R/pprof-to-ds.R | 35 ++++++------------- R/rprof-from-ds.R | 2 +- R/rprof-to-ds.R | 20 +++++------ tests/testthat/test-rprof-memory.R | 24 ++++++++----- 6 files changed, 65 insertions(+), 82 deletions(-) diff --git a/R/api.R b/R/api.R index d636fdb..f280796 100644 --- a/R/api.R +++ b/R/api.R @@ -64,31 +64,19 @@ validate_profile <- function(x) { stopifnot(undotted(names(x$sample_types)) == c("type", "unit")) stopifnot(is.character(x$sample_types$type)) stopifnot(is.character(x$sample_types$unit)) - #' The first row must have values `"samples"` and `"count"`, - #' respectively. - stopifnot(nrow(x$sample_types) >= 1) - stopifnot(x$sample_types$type[[1]] == "samples") - stopifnot(x$sample_types$unit[[1]] == "count") - #' Additional rows may describe memory profiling data. - has_memory <- nrow(x$sample_types) > 1 - if (has_memory) { - stopifnot(nrow(x$sample_types) == 5) - stopifnot(x$sample_types$type == c("samples", "small_v", "big_v", "nodes", "dup_count")) - stopifnot(x$sample_types$unit == c("count", "cells", "cells", "bytes", "count")) - } + #' It always has five rows describing the sample count and memory profiling + #' data types. + stopifnot(nrow(x$sample_types) == 5) + stopifnot(x$sample_types$type == c("samples", "small_v", "big_v", "nodes", "dup_count")) + stopifnot(x$sample_types$unit == c("count", "cells", "cells", "bytes", "count")) #' - #' The `samples` table has two columns, `value` (integer) and `locations` - #' (list). - #' When memory profiling data is present, the table also has integer columns - #' `small_v`, `big_v`, `nodes`, and `dup_count`. + #' The `samples` table has six columns: `value` (integer), `locations` + #' (list), and integer columns `small_v`, `big_v`, `nodes`, and `dup_count` + #' for memory profiling data. #' Additional columns with a leading dot in the name are allowed #' after the required columns. - expected_sample_cols <- c("value", "locations") - if (has_memory) { - expected_sample_cols <- c("value", "locations", "small_v", "big_v", "nodes", "dup_count") - } - stopifnot(undotted(names(x$samples)) == expected_sample_cols) + stopifnot(undotted(names(x$samples)) == c("value", "locations", "small_v", "big_v", "nodes", "dup_count")) stopifnot(is.integer(x$samples$value)) stopifnot(is.list(x$samples$locations)) #' The `value` column describes the number of consecutive samples for the @@ -104,19 +92,18 @@ validate_profile <- function(x) { stopifnot(unlist(map(x$samples$locations, "[[", "location_id")) %in% x$locations$location_id) #' The locations are listed in inner-first order, i.e., the first location #' corresponds to the innermost entry of the stack trace. - #' When memory profiling data is present, the `small_v`, `big_v`, `nodes`, - #' and `dup_count` columns contain integer memory statistics per sample. - #' All memory values must be nonnegative. - if (has_memory) { - stopifnot(is.integer(x$samples$small_v)) - stopifnot(is.integer(x$samples$big_v)) - stopifnot(is.integer(x$samples$nodes)) - stopifnot(is.integer(x$samples$dup_count)) - stopifnot(x$samples$small_v >= 0L) - stopifnot(x$samples$big_v >= 0L) - stopifnot(x$samples$nodes >= 0L) - stopifnot(x$samples$dup_count >= 0L) - } + #' The `small_v`, `big_v`, `nodes`, and `dup_count` columns contain integer + #' memory statistics per sample. When memory profiling data is not available, + #' these columns are all `NA`. When present, all memory values must be + #' nonnegative. + stopifnot(is.integer(x$samples$small_v)) + stopifnot(is.integer(x$samples$big_v)) + stopifnot(is.integer(x$samples$nodes)) + stopifnot(is.integer(x$samples$dup_count)) + stopifnot(is.na(x$samples$small_v) | x$samples$small_v >= 0L) + stopifnot(is.na(x$samples$big_v) | x$samples$big_v >= 0L) + stopifnot(is.na(x$samples$nodes) | x$samples$nodes >= 0L) + stopifnot(is.na(x$samples$dup_count) | x$samples$dup_count >= 0L) #' #' The `locations` table has three integer columns, `location_id`, diff --git a/R/pprof-from-ds.R b/R/pprof-from-ds.R index 37f7bfe..71c71b2 100644 --- a/R/pprof-from-ds.R +++ b/R/pprof-from-ds.R @@ -2,6 +2,8 @@ ds_to_msg <- function(ds) { validate_profile(ds) provide_proto() + has_memory <- !all(is.na(ds$samples$small_v)) + msg <- RProtoBuf::new(perftools.profiles.Profile) msg$string_table <- unique(c( @@ -13,14 +15,17 @@ ds_to_msg <- function(ds) { ds$functions$filename )) - add_sample_types_to_msg(ds$sample_types, msg) + add_sample_types_to_msg(ds$sample_types, msg, has_memory) add_samples_to_msg(ds$samples, msg) add_locations_to_msg(ds$locations, msg) add_functions_to_msg(ds$functions, msg) msg } -add_sample_types_to_msg <- function(sample_types, msg) { +add_sample_types_to_msg <- function(sample_types, msg, has_memory) { + if (!has_memory) { + sample_types <- sample_types[1, , drop = FALSE] + } sample_types$type <- match(sample_types$type, msg$string_table) - 1L sample_types$unit <- match(sample_types$unit, msg$string_table) - 1L @@ -33,7 +38,7 @@ add_sample_types_to_msg <- function(sample_types, msg) { } add_samples_to_msg <- function(samples, msg) { - has_memory <- "small_v" %in% names(samples) + has_memory <- !all(is.na(samples$small_v)) msg$sample <- lapply(split_rows(samples), function(s) { s_msg <- RProtoBuf::new(perftools.profiles.Sample) if (has_memory) { diff --git a/R/pprof-to-ds.R b/R/pprof-to-ds.R index 071029e..c52400d 100644 --- a/R/pprof-to-ds.R +++ b/R/pprof-to-ds.R @@ -16,33 +16,14 @@ msg_to_ds <- function(msg) { } get_sample_types_from_msg <- function(msg) { - sample_types <- map(msg$sample_type, function(st) { - tibble::tibble( - type = as.integer(st$type), - unit = as.integer(st$unit) - ) - }) - sample_types <- merge_rows(sample_types) - - sample_types$type <- msg$string_table[sample_types$type + 1] - sample_types$unit <- msg$string_table[sample_types$unit + 1] - - # Check if memory profiling types are present - mem_types <- c("small_v", "big_v", "nodes", "dup_count") - has_memory <- all(mem_types %in% sample_types$type) - - if (has_memory) { - # Keep first row (samples/count) and the memory rows - keep <- sample_types$type %in% c("samples", mem_types) - sample_types[keep, ] - } else { - sample_types[1, ] - } + tibble::tibble( + type = c("samples", "small_v", "big_v", "nodes", "dup_count"), + unit = c("count", "cells", "cells", "bytes", "count") + ) } get_samples_from_msg <- function(msg) { - # Determine which value indices to keep - n_types <- length(msg$sample_type) + # Determine which value indices correspond to memory types all_types <- map(msg$sample_type, function(st) { msg$string_table[as.integer(st$type) + 1] }) @@ -51,6 +32,7 @@ get_samples_from_msg <- function(msg) { mem_types <- c("small_v", "big_v", "nodes", "dup_count") has_memory <- all(mem_types %in% all_types) + mem_indices <- NULL if (has_memory) { mem_indices <- match(mem_types, all_types) } @@ -66,6 +48,11 @@ get_samples_from_msg <- function(msg) { row$big_v <- values[[mem_indices[[2]]]] row$nodes <- values[[mem_indices[[3]]]] row$dup_count <- values[[mem_indices[[4]]]] + } else { + row$small_v <- NA_integer_ + row$big_v <- NA_integer_ + row$nodes <- NA_integer_ + row$dup_count <- NA_integer_ } row }) diff --git a/R/rprof-from-ds.R b/R/rprof-from-ds.R index 1c565cf..d799b75 100644 --- a/R/rprof-from-ds.R +++ b/R/rprof-from-ds.R @@ -1,7 +1,7 @@ ds_to_rprof <- function(ds) { validate_profile(ds) - has_memory <- nrow(ds$sample_types) > 1 + has_memory <- !all(is.na(ds$samples$small_v)) . <- ds$locations . <- merge(., ds$functions[c("function_id", "system_name", "filename")], by = "function_id", sort = FALSE, all.x = TRUE) diff --git a/R/rprof-to-ds.R b/R/rprof-to-ds.R index cdc7500..2064589 100644 --- a/R/rprof-to-ds.R +++ b/R/rprof-to-ds.R @@ -19,17 +19,10 @@ rprof_to_ds <- function(rprof) { } get_sample_types_from_rprof <- function(rprof) { - types <- tibble::tibble( - type = "samples", - unit = "count" + tibble::tibble( + type = c("samples", "small_v", "big_v", "nodes", "dup_count"), + unit = c("count", "cells", "cells", "bytes", "count") ) - if (!is.null(rprof$memory)) { - types <- rbind(types, tibble::tibble( - type = c("small_v", "big_v", "nodes", "dup_count"), - unit = c("cells", "cells", "bytes", "count") - )) - } - types } get_flat_rprof_from_rprof <- function(rprof) { @@ -166,10 +159,13 @@ add_samples_to_flat_rprof <- function(flat_rprof) { .$big_v <- mem$big_v .$nodes <- mem$nodes .$dup_count <- mem$dup_count - . <- .[c("value", "locations", "small_v", "big_v", "nodes", "dup_count")] } else { - . <- .[c("value", "locations")] + .$small_v <- NA_integer_ + .$big_v <- NA_integer_ + .$nodes <- NA_integer_ + .$dup_count <- NA_integer_ } + . <- .[c("value", "locations", "small_v", "big_v", "nodes", "dup_count")] flat_rprof$samples <- . diff --git a/tests/testthat/test-rprof-memory.R b/tests/testthat/test-rprof-memory.R index 0f329dd..54d266b 100644 --- a/tests/testthat/test-rprof-memory.R +++ b/tests/testthat/test-rprof-memory.R @@ -2,19 +2,18 @@ test_that("read memory profiling data", { ds <- read_inst_rprof("rprof/memory.out") expect_error(validate_profile(ds), NA) - # sample_types should have 5 rows - + # sample_types always has 5 rows expect_equal(nrow(ds$sample_types), 5) expect_equal(ds$sample_types$type, c("samples", "small_v", "big_v", "nodes", "dup_count")) expect_equal(ds$sample_types$unit, c("count", "cells", "cells", "bytes", "count")) - # samples should have memory columns + # samples always have memory columns expect_true("small_v" %in% names(ds$samples)) expect_true("big_v" %in% names(ds$samples)) expect_true("nodes" %in% names(ds$samples)) expect_true("dup_count" %in% names(ds$samples)) - # memory values should be nonnegative integers + # memory values should be nonnegative integers (not NA for memory profiles) expect_true(is.integer(ds$samples$small_v)) expect_true(is.integer(ds$samples$big_v)) expect_true(is.integer(ds$samples$nodes)) @@ -25,13 +24,22 @@ test_that("read memory profiling data", { expect_true(all(ds$samples$dup_count >= 0)) }) -test_that("non-memory profiles have no memory columns", { +test_that("non-memory profiles have NA memory columns", { ds <- read_inst_rprof("rprof/1.out") expect_error(validate_profile(ds), NA) - expect_equal(nrow(ds$sample_types), 1) - expect_false("small_v" %in% names(ds$samples)) - expect_false("big_v" %in% names(ds$samples)) + # sample_types always has 5 rows + expect_equal(nrow(ds$sample_types), 5) + + # memory columns are always present but all NA + expect_true("small_v" %in% names(ds$samples)) + expect_true("big_v" %in% names(ds$samples)) + expect_true("nodes" %in% names(ds$samples)) + expect_true("dup_count" %in% names(ds$samples)) + expect_true(all(is.na(ds$samples$small_v))) + expect_true(all(is.na(ds$samples$big_v))) + expect_true(all(is.na(ds$samples$nodes))) + expect_true(all(is.na(ds$samples$dup_count))) }) test_that("memory profiling roundtrip via rprof", {