From 72006670f23bc72da6f7db6b207805a1d2c0356d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 13 Feb 2026 04:31:43 +0000
Subject: [PATCH 1/5] Initial plan


From 362d14c86246c38195e427971136e4d644450e84 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 13 Feb 2026 04:47:48 +0000
Subject: [PATCH 2/5] Add memory profiling support to profile data model

- Parse memory profiling data (small_v, big_v, nodes, dup_count) from Rprof output
- Extend sample_types table to include memory sample types
- Add memory columns to samples table
- Update validate_profile() to support memory profiling data
- Update ds_to_rprof / write_rprof for memory profiling roundtrip
- Update pprof read/write to handle memory sample types
- Add sample memory profiling data file (memory.out)
- Add comprehensive tests for memory profiling

Co-authored-by: krlmlr <1741643+krlmlr@users.noreply.github.com>
---
 R/api.R                            | 37 +++++++++++++++---
 R/pprof-from-ds.R                  |  7 +++-
 R/pprof-to-ds.R                    | 38 +++++++++++++++++--
 R/rprof-from-ds.R                  | 34 +++++++++++++++--
 R/rprof-read.R                     | 25 +++++++++++-
 R/rprof-to-ds.R                    | 23 ++++++++++-
 R/rprof-write.R                    | 11 ++++++
 inst/samples/rprof/memory.out      | 14 +++++++
 tests/testthat/test-rprof-memory.R | 61 ++++++++++++++++++++++++++++++
 9 files changed, 234 insertions(+), 16 deletions(-)
 create mode 100644 inst/samples/rprof/memory.out
 create mode 100644 tests/testthat/test-rprof-memory.R

diff --git a/R/api.R b/R/api.R
index 402ec90..461c1bd 100644
--- a/R/api.R
+++ b/R/api.R
@@ -64,18 +64,31 @@ validate_profile <- function(x) {
   stopifnot(undotted(names(x$sample_types)) == c("type", "unit"))
   stopifnot(is.character(x$sample_types$type))
   stopifnot(is.character(x$sample_types$unit))
-  #' It is currently restricted to one row with values `"samples"` and `"count"`,
+  #' The first row must have values `"samples"` and `"count"`,
   #' respectively.
-  stopifnot(nrow(x$sample_types) == 1)
-  stopifnot(x$sample_types$type == "samples")
-  stopifnot(x$sample_types$unit == "count")
+  stopifnot(nrow(x$sample_types) >= 1)
+  stopifnot(x$sample_types$type[[1]] == "samples")
+  stopifnot(x$sample_types$unit[[1]] == "count")
+  #' Additional rows may describe memory profiling data.
+  has_memory <- nrow(x$sample_types) > 1
+  if (has_memory) {
+    stopifnot(nrow(x$sample_types) == 5)
+    stopifnot(x$sample_types$type == c("samples", "small_v", "big_v", "nodes", "dup_count"))
+    stopifnot(x$sample_types$unit == c("count", "cells", "cells", "bytes", "count"))
+  }
 
   #'
   #' The `samples` table has two columns, `value` (integer) and `locations`
   #' (list).
+  #' When memory profiling data is present, the table also has integer columns
+  #' `small_v`, `big_v`, `nodes`, and `dup_count`.
   #' Additional columns with a leading dot in the name are allowed
   #' after the required columns.
-  stopifnot(undotted(names(x$samples)) == c("value", "locations"))
+  expected_sample_cols <- c("value", "locations")
+  if (has_memory) {
+    expected_sample_cols <- c("value", "locations", "small_v", "big_v", "nodes", "dup_count")
+  }
+  stopifnot(undotted(names(x$samples)) == expected_sample_cols)
   stopifnot(is.integer(x$samples$value))
   stopifnot(is.list(x$samples$locations))
   #' The `value` column describes the number of consecutive samples for the
@@ -91,6 +104,20 @@ validate_profile <- function(x) {
   stopifnot(unlist(map(x$samples$locations, "[[", "location_id")) %in% x$locations$location_id)
   #' The locations are listed in inner-first order, i.e., the first location
   #' corresponds to the innermost entry of the stack trace.
+  #' When memory profiling data is present, the `small_v`, `big_v`, `nodes`,
+
+  #' and `dup_count` columns contain integer memory statistics per sample.
+  #' All memory values must be nonnegative.
+  if (has_memory) {
+    stopifnot(is.integer(x$samples$small_v))
+    stopifnot(is.integer(x$samples$big_v))
+    stopifnot(is.integer(x$samples$nodes))
+    stopifnot(is.integer(x$samples$dup_count))
+    stopifnot(x$samples$small_v >= 0L)
+    stopifnot(x$samples$big_v >= 0L)
+    stopifnot(x$samples$nodes >= 0L)
+    stopifnot(x$samples$dup_count >= 0L)
+  }
 
   #'
   #' The `locations` table has three integer columns, `location_id`,
diff --git a/R/pprof-from-ds.R b/R/pprof-from-ds.R
index 8f3d105..37f7bfe 100644
--- a/R/pprof-from-ds.R
+++ b/R/pprof-from-ds.R
@@ -33,9 +33,14 @@ add_sample_types_to_msg <- function(sample_types, msg) {
 }
 
 add_samples_to_msg <- function(samples, msg) {
+  has_memory <- "small_v" %in% names(samples)
   msg$sample <- lapply(split_rows(samples), function(s) {
     s_msg <- RProtoBuf::new(perftools.profiles.Sample)
-    s_msg$value <- s$value
+    if (has_memory) {
+      s_msg$value <- c(s$value, s$small_v, s$big_v, s$nodes, s$dup_count)
+    } else {
+      s_msg$value <- s$value
+    }
     s_msg$location_id <- s$locations[[1]]$location_id
     s_msg
   })
diff --git a/R/pprof-to-ds.R b/R/pprof-to-ds.R
index 14c28fd..071029e 100644
--- a/R/pprof-to-ds.R
+++ b/R/pprof-to-ds.R
@@ -27,15 +27,47 @@ get_sample_types_from_msg <- function(msg) {
   sample_types$type <- msg$string_table[sample_types$type + 1]
   sample_types$unit <- msg$string_table[sample_types$unit + 1]
 
-  sample_types[1, ]
+  # Check if memory profiling types are present
+  mem_types <- c("small_v", "big_v", "nodes", "dup_count")
+  has_memory <- all(mem_types %in% sample_types$type)
+
+  if (has_memory) {
+    # Keep first row (samples/count) and the memory rows
+    keep <- sample_types$type %in% c("samples", mem_types)
+    sample_types[keep, ]
+  } else {
+    sample_types[1, ]
+  }
 }
 
 get_samples_from_msg <- function(msg) {
+  # Determine which value indices to keep
+  n_types <- length(msg$sample_type)
+  all_types <- map(msg$sample_type, function(st) {
+    msg$string_table[as.integer(st$type) + 1]
+  })
+  all_types <- unlist(all_types)
+
+  mem_types <- c("small_v", "big_v", "nodes", "dup_count")
+  has_memory <- all(mem_types %in% all_types)
+
+  if (has_memory) {
+    mem_indices <- match(mem_types, all_types)
+  }
+
   samples <- map(msg$sample, function(s) {
-    tibble::tibble(
-      value = as.integer(s$value[[1]]),
+    values <- as.integer(s$value)
+    row <- tibble::tibble(
+      value = values[[1]],
       locations = list(tibble::tibble(location_id = as.integer(s$location_id)))
     )
+    if (has_memory) {
+      row$small_v <- values[[mem_indices[[1]]]]
+      row$big_v <- values[[mem_indices[[2]]]]
+      row$nodes <- values[[mem_indices[[3]]]]
+      row$dup_count <- values[[mem_indices[[4]]]]
+    }
+    row
   })
   samples <- tibble::as_tibble(do.call(rbind, samples))
   samples
diff --git a/R/rprof-from-ds.R b/R/rprof-from-ds.R
index 1ed8442..1c565cf 100644
--- a/R/rprof-from-ds.R
+++ b/R/rprof-from-ds.R
@@ -1,6 +1,8 @@
 ds_to_rprof <- function(ds) {
   validate_profile(ds)
 
+  has_memory <- nrow(ds$sample_types) > 1
+
   . <- ds$locations
   . <- merge(., ds$functions[c("function_id", "system_name", "filename")], by = "function_id", sort = FALSE, all.x = TRUE)
   . <- .[-1L]
@@ -18,9 +20,15 @@ ds_to_rprof <- function(ds) {
   flat_locations <- .
 
   files <- paste0("#File ", unique_files$file_id, ": ", unique_files$filename)
+
+  # Expand samples by value (repeat count)
+  sample_idx <- rep(seq_len(nrow(ds$samples)), ds$samples$value)
+
   traces <- map_chr(
-    rep(ds$samples$locations, ds$samples$value),
-    function(loc) {
+    seq_along(sample_idx),
+    function(i) {
+      si <- sample_idx[[i]]
+      loc <- ds$samples$locations[[si]]
       . <- flat_locations[match(loc$location_id, flat_locations$location_id), ]
       stopifnot(.$location_id == loc$location_id)
       funs <- paste0(
@@ -31,9 +39,27 @@ ds_to_rprof <- function(ds) {
     }
   )
 
+  header <- if (has_memory) {
+    "memory profiling: line profiling: sample.interval=20000"
+  } else {
+    "line profiling: sample.interval=20000"
+  }
+
+  # Build memory data for roundtrip compatibility
+  memory <- NULL
+  if (has_memory) {
+    memory <- tibble::tibble(
+      small_v = ds$samples$small_v[sample_idx],
+      big_v = ds$samples$big_v[sample_idx],
+      nodes = ds$samples$nodes[sample_idx],
+      dup_count = ds$samples$dup_count[sample_idx]
+    )
+  }
+
   tibble::lst(
-    header = "line profiling: sample.interval=20000",
+    header,
     files,
-    traces
+    traces,
+    memory
   )
 }
diff --git a/R/rprof-read.R b/R/rprof-read.R
index 9821131..42552be 100644
--- a/R/rprof-read.R
+++ b/R/rprof-read.R
@@ -35,9 +35,32 @@ read_rprof_ll <- function(path) {
   header <- 1L
   files <- grep("^#File ", lines)
   traces <- setdiff(seq_along(lines), c(header, files))
+
+  memory_profiling <- startsWith(lines[header], "memory profiling:")
+
+  # Strip memory data prefix from trace lines
+  memory <- NULL
+  trace_lines <- lines[traces]
+  if (memory_profiling && length(trace_lines) > 0) {
+    mem_rx <- "^:([0-9]+):([0-9]+):([0-9]+):([0-9]+):"
+    mem_matches <- regmatches(trace_lines, regexec(mem_rx, trace_lines))
+    has_mem <- vapply(mem_matches, length, integer(1)) > 0
+    if (any(has_mem)) {
+      memory <- tibble::tibble(
+        small_v = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 2L)),
+        big_v = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 3L)),
+        nodes = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 4L)),
+        dup_count = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 5L))
+      )
+      # Strip the memory prefix from trace lines
+      trace_lines <- sub(mem_rx, "", trace_lines)
+    }
+  }
+
   list(
     header = lines[header],
     files = lines[files],
-    traces = lines[traces]
+    traces = trace_lines,
+    memory = memory
   )
 }
diff --git a/R/rprof-to-ds.R b/R/rprof-to-ds.R
index eec30cd..cdc7500 100644
--- a/R/rprof-to-ds.R
+++ b/R/rprof-to-ds.R
@@ -19,10 +19,17 @@ rprof_to_ds <- function(rprof) {
 }
 
 get_sample_types_from_rprof <- function(rprof) {
-  tibble::tibble(
+  types <- tibble::tibble(
     type = "samples",
     unit = "count"
   )
+  if (!is.null(rprof$memory)) {
+    types <- rbind(types, tibble::tibble(
+      type = c("small_v", "big_v", "nodes", "dup_count"),
+      unit = c("cells", "cells", "bytes", "count")
+    ))
+  }
+  types
 }
 
 get_flat_rprof_from_rprof <- function(rprof) {
@@ -150,7 +157,19 @@ add_samples_to_flat_rprof <- function(flat_rprof) {
   .$locations <- map(.$locations, tibble::as_tibble, rownames = NULL)
 
   .$value <- 1L
-  . <- .[c("value", "locations")]
+
+  memory <- flat_rprof$rprof$memory
+  if (!is.null(memory)) {
+    # Memory data is indexed by trace line (sample), match by sample_id
+    mem <- memory[.$sample_id, , drop = FALSE]
+    .$small_v <- mem$small_v
+    .$big_v <- mem$big_v
+    .$nodes <- mem$nodes
+    .$dup_count <- mem$dup_count
+    . <- .[c("value", "locations", "small_v", "big_v", "nodes", "dup_count")]
+  } else {
+    . <- .[c("value", "locations")]
+  }
 
   flat_rprof$samples <- .
 
diff --git a/R/rprof-write.R b/R/rprof-write.R
index 8e2432b..e0de9a3 100644
--- a/R/rprof-write.R
+++ b/R/rprof-write.R
@@ -2,6 +2,17 @@
 #' @export
 write_rprof <- function(x, path) {
   rprof <- ds_to_rprof(x)
+  # Add memory prefix to traces when writing to file
+  if (!is.null(rprof$memory)) {
+    rprof$traces <- paste0(
+      ":", rprof$memory$small_v,
+      ":", rprof$memory$big_v,
+      ":", rprof$memory$nodes,
+      ":", rprof$memory$dup_count, ":",
+      rprof$traces
+    )
+  }
+  rprof$memory <- NULL
   writeLines(unlist(rprof, use.names = FALSE), path)
   invisible(x)
 }
diff --git a/inst/samples/rprof/memory.out b/inst/samples/rprof/memory.out
new file mode 100644
index 0000000..3830a0f
--- /dev/null
+++ b/inst/samples/rprof/memory.out
@@ -0,0 +1,14 @@
+memory profiling: line profiling: sample.interval=20000
+:341536:1289015:34667640:1635:"<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:293305:449462:23670472:6514:"anyDuplicated" "[.data.frame" "[" 
+:369435:615462:36216432:10044:"get" "Ops.data.frame" 
+:327669:523462:29319360:9780:"[<-.data.frame" "[<-" 
+:285807:435462:22434496:9769:"any" ".deparseOpts" "deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:363324:605462:35221424:10234:"as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:322360:515462:28440888:9864:"<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:281229:429462:21686672:9852:"Ops.data.frame" 
+:358636:597462:34433280:10212:"deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:318714:513462:27870752:9993:"%in%" "Ops.data.frame" 
+:279206:429462:21346248:10022:"[.data.frame" "[" 
+:356880:597462:34140176:10248:".deparseOpts" "deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:317207:513462:27611920:9989:"do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
diff --git a/tests/testthat/test-rprof-memory.R b/tests/testthat/test-rprof-memory.R
new file mode 100644
index 0000000..0f329dd
--- /dev/null
+++ b/tests/testthat/test-rprof-memory.R
@@ -0,0 +1,61 @@
+test_that("read memory profiling data", {
+  ds <- read_inst_rprof("rprof/memory.out")
+  expect_error(validate_profile(ds), NA)
+
+  # sample_types should have 5 rows
+
+  expect_equal(nrow(ds$sample_types), 5)
+  expect_equal(ds$sample_types$type, c("samples", "small_v", "big_v", "nodes", "dup_count"))
+  expect_equal(ds$sample_types$unit, c("count", "cells", "cells", "bytes", "count"))
+
+  # samples should have memory columns
+  expect_true("small_v" %in% names(ds$samples))
+  expect_true("big_v" %in% names(ds$samples))
+  expect_true("nodes" %in% names(ds$samples))
+  expect_true("dup_count" %in% names(ds$samples))
+
+  # memory values should be nonnegative integers
+  expect_true(is.integer(ds$samples$small_v))
+  expect_true(is.integer(ds$samples$big_v))
+  expect_true(is.integer(ds$samples$nodes))
+  expect_true(is.integer(ds$samples$dup_count))
+  expect_true(all(ds$samples$small_v >= 0))
+  expect_true(all(ds$samples$big_v >= 0))
+  expect_true(all(ds$samples$nodes >= 0))
+  expect_true(all(ds$samples$dup_count >= 0))
+})
+
+test_that("non-memory profiles have no memory columns", {
+  ds <- read_inst_rprof("rprof/1.out")
+  expect_error(validate_profile(ds), NA)
+
+  expect_equal(nrow(ds$sample_types), 1)
+  expect_false("small_v" %in% names(ds$samples))
+  expect_false("big_v" %in% names(ds$samples))
+})
+
+test_that("memory profiling roundtrip via rprof", {
+  ds <- read_inst_rprof("rprof/memory.out")
+
+  # Convert to rprof and back
+  ds1 <- rprof_to_ds(ds_to_rprof(ds))
+  expect_error(validate_profile(ds1), NA)
+
+  # roundtrip again
+  ds2 <- rprof_to_ds(ds_to_rprof(ds1))
+  expect_error(validate_profile(ds2), NA)
+
+  expect_identical(strip_dots(ds1), strip_dots(ds2))
+})
+
+test_that("memory profiling write and read back", {
+  ds <- read_inst_rprof("rprof/memory.out")
+
+  path <- tempfile("profiler_mem", fileext = ".out")
+  write_rprof(ds, path)
+
+  ds1 <- read_rprof(path)
+  expect_error(validate_profile(ds1), NA)
+
+  expect_identical(strip_dots(ds), strip_dots(ds1))
+})

From f9acc663b902c524afb2d862816cf0e235209c0d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 13 Feb 2026 04:50:17 +0000
Subject: [PATCH 3/5] Fix documentation comment line break in api.R

Co-authored-by: krlmlr <1741643+krlmlr@users.noreply.github.com>
---
 R/api.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/api.R b/R/api.R
index 461c1bd..d636fdb 100644
--- a/R/api.R
+++ b/R/api.R
@@ -105,7 +105,6 @@ validate_profile <- function(x) {
   #' The locations are listed in inner-first order, i.e., the first location
   #' corresponds to the innermost entry of the stack trace.
   #' When memory profiling data is present, the `small_v`, `big_v`, `nodes`,
-
   #' and `dup_count` columns contain integer memory statistics per sample.
   #' All memory values must be nonnegative.
   if (has_memory) {

From ebed6a6eae36746837d95b2c5b7c38eca89ccecd Mon Sep 17 00:00:00 2001
From: Copilot <Copilot@users.noreply.github.com>
Date: Fri, 13 Feb 2026 04:59:11 +0000
Subject: [PATCH 4/5] chore: Auto-update from GitHub Actions

Run: https://github.com/r-prof/profile/actions/runs/21975108397
---
 man/validate_profile.Rd | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/man/validate_profile.Rd b/man/validate_profile.Rd
index ec6115b..2e0436e 100644
--- a/man/validate_profile.Rd
+++ b/man/validate_profile.Rd
@@ -48,11 +48,14 @@ that is accepted by \code{\link[=package_version]{package_version()}}.
 The \code{sample_types} table has two character columns, \code{type} and \code{unit}.
 Additional columns with a leading dot in the name are allowed
 after the required columns.
-It is currently restricted to one row with values \code{"samples"} and \code{"count"},
+The first row must have values \code{"samples"} and \code{"count"},
 respectively.
+Additional rows may describe memory profiling data.
 
 The \code{samples} table has two columns, \code{value} (integer) and \code{locations}
 (list).
+When memory profiling data is present, the table also has integer columns
+\code{small_v}, \code{big_v}, \code{nodes}, and \code{dup_count}.
 Additional columns with a leading dot in the name are allowed
 after the required columns.
 The \code{value} column describes the number of consecutive samples for the
@@ -63,6 +66,9 @@ For each \code{location_id} value a corresponding observation in the \code{locat
 table must exist.
 The locations are listed in inner-first order, i.e., the first location
 corresponds to the innermost entry of the stack trace.
+When memory profiling data is present, the \code{small_v}, \code{big_v}, \code{nodes},
+and \code{dup_count} columns contain integer memory statistics per sample.
+All memory values must be nonnegative.
 
 The \code{locations} table has three integer columns, \code{location_id},
 \code{function_id}, and \code{line}.

From f4636daf615b78044a2b22beb2971ae50aec27a1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 13 Feb 2026 15:08:19 +0000
Subject: [PATCH 5/5] Make data model type-stable: always include memory
 columns

sample_types always has 5 rows and samples always has 6 columns
(value, locations, small_v, big_v, nodes, dup_count). When no
memory profiling data is present, memory columns are all NA_integer_.

Co-authored-by: krlmlr <1741643+krlmlr@users.noreply.github.com>
---
 R/api.R                            | 55 ++++++++++++------------------
 R/pprof-from-ds.R                  | 11 ++++--
 R/pprof-to-ds.R                    | 35 ++++++-------------
 R/rprof-from-ds.R                  |  2 +-
 R/rprof-to-ds.R                    | 20 +++++------
 tests/testthat/test-rprof-memory.R | 24 ++++++++-----
 6 files changed, 65 insertions(+), 82 deletions(-)

diff --git a/R/api.R b/R/api.R
index d636fdb..f280796 100644
--- a/R/api.R
+++ b/R/api.R
@@ -64,31 +64,19 @@ validate_profile <- function(x) {
   stopifnot(undotted(names(x$sample_types)) == c("type", "unit"))
   stopifnot(is.character(x$sample_types$type))
   stopifnot(is.character(x$sample_types$unit))
-  #' The first row must have values `"samples"` and `"count"`,
-  #' respectively.
-  stopifnot(nrow(x$sample_types) >= 1)
-  stopifnot(x$sample_types$type[[1]] == "samples")
-  stopifnot(x$sample_types$unit[[1]] == "count")
-  #' Additional rows may describe memory profiling data.
-  has_memory <- nrow(x$sample_types) > 1
-  if (has_memory) {
-    stopifnot(nrow(x$sample_types) == 5)
-    stopifnot(x$sample_types$type == c("samples", "small_v", "big_v", "nodes", "dup_count"))
-    stopifnot(x$sample_types$unit == c("count", "cells", "cells", "bytes", "count"))
-  }
+  #' It always has five rows describing the sample count and memory profiling
+  #' data types.
+  stopifnot(nrow(x$sample_types) == 5)
+  stopifnot(x$sample_types$type == c("samples", "small_v", "big_v", "nodes", "dup_count"))
+  stopifnot(x$sample_types$unit == c("count", "cells", "cells", "bytes", "count"))
 
   #'
-  #' The `samples` table has two columns, `value` (integer) and `locations`
-  #' (list).
-  #' When memory profiling data is present, the table also has integer columns
-  #' `small_v`, `big_v`, `nodes`, and `dup_count`.
+  #' The `samples` table has six columns: `value` (integer), `locations`
+  #' (list), and integer columns `small_v`, `big_v`, `nodes`, and `dup_count`
+  #' for memory profiling data.
   #' Additional columns with a leading dot in the name are allowed
   #' after the required columns.
-  expected_sample_cols <- c("value", "locations")
-  if (has_memory) {
-    expected_sample_cols <- c("value", "locations", "small_v", "big_v", "nodes", "dup_count")
-  }
-  stopifnot(undotted(names(x$samples)) == expected_sample_cols)
+  stopifnot(undotted(names(x$samples)) == c("value", "locations", "small_v", "big_v", "nodes", "dup_count"))
   stopifnot(is.integer(x$samples$value))
   stopifnot(is.list(x$samples$locations))
   #' The `value` column describes the number of consecutive samples for the
@@ -104,19 +92,18 @@ validate_profile <- function(x) {
   stopifnot(unlist(map(x$samples$locations, "[[", "location_id")) %in% x$locations$location_id)
   #' The locations are listed in inner-first order, i.e., the first location
   #' corresponds to the innermost entry of the stack trace.
-  #' When memory profiling data is present, the `small_v`, `big_v`, `nodes`,
-  #' and `dup_count` columns contain integer memory statistics per sample.
-  #' All memory values must be nonnegative.
-  if (has_memory) {
-    stopifnot(is.integer(x$samples$small_v))
-    stopifnot(is.integer(x$samples$big_v))
-    stopifnot(is.integer(x$samples$nodes))
-    stopifnot(is.integer(x$samples$dup_count))
-    stopifnot(x$samples$small_v >= 0L)
-    stopifnot(x$samples$big_v >= 0L)
-    stopifnot(x$samples$nodes >= 0L)
-    stopifnot(x$samples$dup_count >= 0L)
-  }
+  #' The `small_v`, `big_v`, `nodes`, and `dup_count` columns contain integer
+  #' memory statistics per sample. When memory profiling data is not available,
+  #' these columns are all `NA`. When present, all memory values must be
+  #' nonnegative.
+  stopifnot(is.integer(x$samples$small_v))
+  stopifnot(is.integer(x$samples$big_v))
+  stopifnot(is.integer(x$samples$nodes))
+  stopifnot(is.integer(x$samples$dup_count))
+  stopifnot(is.na(x$samples$small_v) | x$samples$small_v >= 0L)
+  stopifnot(is.na(x$samples$big_v) | x$samples$big_v >= 0L)
+  stopifnot(is.na(x$samples$nodes) | x$samples$nodes >= 0L)
+  stopifnot(is.na(x$samples$dup_count) | x$samples$dup_count >= 0L)
 
   #'
   #' The `locations` table has three integer columns, `location_id`,
diff --git a/R/pprof-from-ds.R b/R/pprof-from-ds.R
index 37f7bfe..71c71b2 100644
--- a/R/pprof-from-ds.R
+++ b/R/pprof-from-ds.R
@@ -2,6 +2,8 @@ ds_to_msg <- function(ds) {
   validate_profile(ds)
   provide_proto()
 
+  has_memory <- !all(is.na(ds$samples$small_v))
+
   msg <- RProtoBuf::new(perftools.profiles.Profile)
 
   msg$string_table <- unique(c(
@@ -13,14 +15,17 @@ ds_to_msg <- function(ds) {
     ds$functions$filename
   ))
 
-  add_sample_types_to_msg(ds$sample_types, msg)
+  add_sample_types_to_msg(ds$sample_types, msg, has_memory)
   add_samples_to_msg(ds$samples, msg)
   add_locations_to_msg(ds$locations, msg)
   add_functions_to_msg(ds$functions, msg)
   msg
 }
 
-add_sample_types_to_msg <- function(sample_types, msg) {
+add_sample_types_to_msg <- function(sample_types, msg, has_memory) {
+  if (!has_memory) {
+    sample_types <- sample_types[1, , drop = FALSE]
+  }
   sample_types$type <- match(sample_types$type, msg$string_table) - 1L
   sample_types$unit <- match(sample_types$unit, msg$string_table) - 1L
 
@@ -33,7 +38,7 @@ add_sample_types_to_msg <- function(sample_types, msg) {
 }
 
 add_samples_to_msg <- function(samples, msg) {
-  has_memory <- "small_v" %in% names(samples)
+  has_memory <- !all(is.na(samples$small_v))
   msg$sample <- lapply(split_rows(samples), function(s) {
     s_msg <- RProtoBuf::new(perftools.profiles.Sample)
     if (has_memory) {
diff --git a/R/pprof-to-ds.R b/R/pprof-to-ds.R
index 071029e..c52400d 100644
--- a/R/pprof-to-ds.R
+++ b/R/pprof-to-ds.R
@@ -16,33 +16,14 @@ msg_to_ds <- function(msg) {
 }
 
 get_sample_types_from_msg <- function(msg) {
-  sample_types <- map(msg$sample_type, function(st) {
-    tibble::tibble(
-      type = as.integer(st$type),
-      unit = as.integer(st$unit)
-    )
-  })
-  sample_types <- merge_rows(sample_types)
-
-  sample_types$type <- msg$string_table[sample_types$type + 1]
-  sample_types$unit <- msg$string_table[sample_types$unit + 1]
-
-  # Check if memory profiling types are present
-  mem_types <- c("small_v", "big_v", "nodes", "dup_count")
-  has_memory <- all(mem_types %in% sample_types$type)
-
-  if (has_memory) {
-    # Keep first row (samples/count) and the memory rows
-    keep <- sample_types$type %in% c("samples", mem_types)
-    sample_types[keep, ]
-  } else {
-    sample_types[1, ]
-  }
+  tibble::tibble(
+    type = c("samples", "small_v", "big_v", "nodes", "dup_count"),
+    unit = c("count", "cells", "cells", "bytes", "count")
+  )
 }
 
 get_samples_from_msg <- function(msg) {
-  # Determine which value indices to keep
-  n_types <- length(msg$sample_type)
+  # Determine which value indices correspond to memory types
   all_types <- map(msg$sample_type, function(st) {
     msg$string_table[as.integer(st$type) + 1]
   })
@@ -51,6 +32,7 @@ get_samples_from_msg <- function(msg) {
   mem_types <- c("small_v", "big_v", "nodes", "dup_count")
   has_memory <- all(mem_types %in% all_types)
 
+  mem_indices <- NULL
   if (has_memory) {
     mem_indices <- match(mem_types, all_types)
   }
@@ -66,6 +48,11 @@ get_samples_from_msg <- function(msg) {
       row$big_v <- values[[mem_indices[[2]]]]
       row$nodes <- values[[mem_indices[[3]]]]
       row$dup_count <- values[[mem_indices[[4]]]]
+    } else {
+      row$small_v <- NA_integer_
+      row$big_v <- NA_integer_
+      row$nodes <- NA_integer_
+      row$dup_count <- NA_integer_
     }
     row
   })
diff --git a/R/rprof-from-ds.R b/R/rprof-from-ds.R
index 1c565cf..d799b75 100644
--- a/R/rprof-from-ds.R
+++ b/R/rprof-from-ds.R
@@ -1,7 +1,7 @@
 ds_to_rprof <- function(ds) {
   validate_profile(ds)
 
-  has_memory <- nrow(ds$sample_types) > 1
+  has_memory <- !all(is.na(ds$samples$small_v))
 
   . <- ds$locations
   . <- merge(., ds$functions[c("function_id", "system_name", "filename")], by = "function_id", sort = FALSE, all.x = TRUE)
diff --git a/R/rprof-to-ds.R b/R/rprof-to-ds.R
index cdc7500..2064589 100644
--- a/R/rprof-to-ds.R
+++ b/R/rprof-to-ds.R
@@ -19,17 +19,10 @@ rprof_to_ds <- function(rprof) {
 }
 
 get_sample_types_from_rprof <- function(rprof) {
-  types <- tibble::tibble(
-    type = "samples",
-    unit = "count"
+  tibble::tibble(
+    type = c("samples", "small_v", "big_v", "nodes", "dup_count"),
+    unit = c("count", "cells", "cells", "bytes", "count")
   )
-  if (!is.null(rprof$memory)) {
-    types <- rbind(types, tibble::tibble(
-      type = c("small_v", "big_v", "nodes", "dup_count"),
-      unit = c("cells", "cells", "bytes", "count")
-    ))
-  }
-  types
 }
 
 get_flat_rprof_from_rprof <- function(rprof) {
@@ -166,10 +159,13 @@ add_samples_to_flat_rprof <- function(flat_rprof) {
     .$big_v <- mem$big_v
     .$nodes <- mem$nodes
     .$dup_count <- mem$dup_count
-    . <- .[c("value", "locations", "small_v", "big_v", "nodes", "dup_count")]
   } else {
-    . <- .[c("value", "locations")]
+    .$small_v <- NA_integer_
+    .$big_v <- NA_integer_
+    .$nodes <- NA_integer_
+    .$dup_count <- NA_integer_
   }
+  . <- .[c("value", "locations", "small_v", "big_v", "nodes", "dup_count")]
 
   flat_rprof$samples <- .
 
diff --git a/tests/testthat/test-rprof-memory.R b/tests/testthat/test-rprof-memory.R
index 0f329dd..54d266b 100644
--- a/tests/testthat/test-rprof-memory.R
+++ b/tests/testthat/test-rprof-memory.R
@@ -2,19 +2,18 @@ test_that("read memory profiling data", {
   ds <- read_inst_rprof("rprof/memory.out")
   expect_error(validate_profile(ds), NA)
 
-  # sample_types should have 5 rows
-
+  # sample_types always has 5 rows
   expect_equal(nrow(ds$sample_types), 5)
   expect_equal(ds$sample_types$type, c("samples", "small_v", "big_v", "nodes", "dup_count"))
   expect_equal(ds$sample_types$unit, c("count", "cells", "cells", "bytes", "count"))
 
-  # samples should have memory columns
+  # samples always have memory columns
   expect_true("small_v" %in% names(ds$samples))
   expect_true("big_v" %in% names(ds$samples))
   expect_true("nodes" %in% names(ds$samples))
   expect_true("dup_count" %in% names(ds$samples))
 
-  # memory values should be nonnegative integers
+  # memory values should be nonnegative integers (not NA for memory profiles)
   expect_true(is.integer(ds$samples$small_v))
   expect_true(is.integer(ds$samples$big_v))
   expect_true(is.integer(ds$samples$nodes))
@@ -25,13 +24,22 @@ test_that("read memory profiling data", {
   expect_true(all(ds$samples$dup_count >= 0))
 })
 
-test_that("non-memory profiles have no memory columns", {
+test_that("non-memory profiles have NA memory columns", {
   ds <- read_inst_rprof("rprof/1.out")
   expect_error(validate_profile(ds), NA)
 
-  expect_equal(nrow(ds$sample_types), 1)
-  expect_false("small_v" %in% names(ds$samples))
-  expect_false("big_v" %in% names(ds$samples))
+  # sample_types always has 5 rows
+  expect_equal(nrow(ds$sample_types), 5)
+
+  # memory columns are always present but all NA
+  expect_true("small_v" %in% names(ds$samples))
+  expect_true("big_v" %in% names(ds$samples))
+  expect_true("nodes" %in% names(ds$samples))
+  expect_true("dup_count" %in% names(ds$samples))
+  expect_true(all(is.na(ds$samples$small_v)))
+  expect_true(all(is.na(ds$samples$big_v)))
+  expect_true(all(is.na(ds$samples$nodes)))
+  expect_true(all(is.na(ds$samples$dup_count)))
 })
 
 test_that("memory profiling roundtrip via rprof", {