profile/R/api.R at f4636daf615b78044a2b22beb2971ae50aec27a1 · r-prof/profile · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#' Definition of the profile data format
#'
#' @description
#' The data format is stable between major releases.
#' In case of major updates, compatibility functions will be provided.
#'
#' The `validate_profile()` function checks a profile data object
#' for compatibility with the specification.
#' Versioning information embedded in the data is considered.
#'
#' @section Data model:
#' \figure{dm.png}
#' @name validate_profile
#' @param x Profile data, e.g., as returned by [read_pprof()] or [read_rprof()].
#' @export
#' @examples
#' rprof_file <- system.file("samples/rprof/1.out", package = "profile")
#' ds <- read_rprof(rprof_file)
#' validate_profile(ds)
#'
#' bad_ds <- ds
#' bad_ds$samples <- NULL
#' try(validate_profile(bad_ds))
validate_profile <- function(x) {
  #' @details
  #' The profile data is stored in an object of class `"profile_data"`,
  #' which is a named list of [tibble]s.
  stopifnot(is.list(x))
  stopifnot(inherits(x, "profile_data"))
  components <- undotted(names(x))
  stopifnot(map_lgl(x[components], tibble::is_tibble))

  #' This named list has the following components, subsequently referred to as
  #' *tables*:
  #' - `meta`
  #' - `sample_types`
  #' - `samples`
  #' - `locations`
  #' - `functions`
  stopifnot(identical(
    undotted(names(x)),
    c("meta", "sample_types", "samples", "locations", "functions")
  ))
  #' (Components with names starting with a dot are permitted
  #' after the required components, but will be ignored.)

  #'
  #' The `meta` table has two character columns, `key` and `value`.
  #' Additional columns with a leading dot in the name are allowed
  #' after the required columns.
  stopifnot(undotted(names(x$meta)) == c("key", "value"))
  stopifnot(is.character(x$meta$key))
  stopifnot(is.character(x$meta$value))
  #' It is currently restricted to one row with key `"version"` and a value
  #' that is accepted by [package_version()].
  stopifnot(nrow(x$meta) == 1)
  stopifnot(x$meta$key == "version")
  package_version(x$meta$value)

  #'
  #' The `sample_types` table has two character columns, `type` and `unit`.
  #' Additional columns with a leading dot in the name are allowed
  #' after the required columns.
  stopifnot(undotted(names(x$sample_types)) == c("type", "unit"))
  stopifnot(is.character(x$sample_types$type))
  stopifnot(is.character(x$sample_types$unit))
  #' It always has five rows describing the sample count and memory profiling
  #' data types.
  stopifnot(nrow(x$sample_types) == 5)
  stopifnot(x$sample_types$type == c("samples", "small_v", "big_v", "nodes", "dup_count"))
  stopifnot(x$sample_types$unit == c("count", "cells", "cells", "bytes", "count"))

  #'
  #' The `samples` table has six columns: `value` (integer), `locations`
  #' (list), and integer columns `small_v`, `big_v`, `nodes`, and `dup_count`
  #' for memory profiling data.
  #' Additional columns with a leading dot in the name are allowed
  #' after the required columns.
  stopifnot(undotted(names(x$samples)) == c("value", "locations", "small_v", "big_v", "nodes", "dup_count"))
  stopifnot(is.integer(x$samples$value))
  stopifnot(is.list(x$samples$locations))
  #' The `value` column describes the number of consecutive samples for the
  #' given location, and must be greater than zero.
  stopifnot(x$samples$value > 0)
  #' Each element of the `locations` column is a tibble with one integer column,
  #' `location_id`.
  stopifnot(map_lgl(x$samples$locations, tibble::is_tibble))
  stopifnot(map_chr(x$samples$locations, map_chr, class) == "integer")
  stopifnot(map_chr(x$samples$locations, names) == "location_id")
  #' For each `location_id` value a corresponding observation in the `locations`
  #' table must exist.
  stopifnot(unlist(map(x$samples$locations, "[[", "location_id")) %in% x$locations$location_id)
  #' The locations are listed in inner-first order, i.e., the first location
  #' corresponds to the innermost entry of the stack trace.
  #' The `small_v`, `big_v`, `nodes`, and `dup_count` columns contain integer
  #' memory statistics per sample. When memory profiling data is not available,
  #' these columns are all `NA`. When present, all memory values must be
  #' nonnegative.
  stopifnot(is.integer(x$samples$small_v))
  stopifnot(is.integer(x$samples$big_v))
  stopifnot(is.integer(x$samples$nodes))
  stopifnot(is.integer(x$samples$dup_count))
  stopifnot(is.na(x$samples$small_v) | x$samples$small_v >= 0L)
  stopifnot(is.na(x$samples$big_v) | x$samples$big_v >= 0L)
  stopifnot(is.na(x$samples$nodes) | x$samples$nodes >= 0L)
  stopifnot(is.na(x$samples$dup_count) | x$samples$dup_count >= 0L)

  #'
  #' The `locations` table has three integer columns, `location_id`,
  #' `function_id`, and `line`.
  #' Additional columns with a leading dot in the name are allowed
  #' after the required columns.
  stopifnot(undotted(names(x$locations)) == c("location_id", "function_id", "line"))
  stopifnot(is.integer(x$locations$location_id))
  stopifnot(is.integer(x$locations$function_id))
  stopifnot(is.integer(x$locations$line))
  #' All `location_id` values are unique.
  stopifnot(!anyDuplicated(x$locations$location_id))
  #' For each `function_id` value a corresponding observation in the `functions`
  #' table must exist. `NA` values are permitted.
  stopifnot(is.na(x$locations$function_id) | x$locations$function_id %in% x$functions$function_id)
  #' The `line` column describes the line in the source code this location
  #' corresponds to, zero if unknown. All values must be nonnegative.
  #' `NA` values are permitted.
  stopifnot(is.na(x$locations$line) | x$locations$line >= 0)

  #'
  #' The `functions` table has five columns, `function_id` (integer),
  #' `name`, `system_name` and `file_name` (character), and `start_line` (integer).
  #' Additional columns with a leading dot in the name are allowed
  #' after the required columns.
  stopifnot(undotted(names(x$functions)) == c("function_id", "name", "system_name", "filename", "start_line"))
  stopifnot(is.integer(x$functions$function_id))
  stopifnot(is.character(x$functions$name))
  stopifnot(is.character(x$functions$system_name))
  stopifnot(is.character(x$functions$filename))
  stopifnot(is.integer(x$functions$start_line))
  #' All `function_id` values are unique.
  stopifnot(!anyDuplicated(x$functions$function_id))
  #' The `name`, `system_name` and `filename` columns describe function names
  #' (demangled and mangled), and source file names for a function.
  #' Both `name` and `system_name` must not contain empty strings.
  stopifnot(x$functions$name != "")
  stopifnot(x$functions$system_name != "")
  #' The `start_line` column describes the start line of a function in its
  #' source file, zero if unknown. All values must be nonnegative.
  stopifnot(x$functions$start_line >= 0)

}

undotted <- function(x) {
  x[seq_len(max(grep("^[^.]", x)))]
}

get_default_meta <- function() {
  tibble::tibble(
    key = "version",
    value = "1.0"
  )
}

new_profile_data <- function(x) {
  structure(x, class = "profile_data")
}

#' @export
print.profile_data <- function(x, ...) {
  cat(format(x, ...), sep = "\n")
  invisible(x)
}

#' @export
format.profile_data <- function(x, ...) {
  paste0("Profile data: ", nrow(x$samples), " samples")
}

#' dm_from_profile
#'
#' @description
#' The `dm_from_profile()` function converts a profile to a dm object.
#' The \pkg{dm} package must be installed.
#' See [dm::dm()] for more information.
#'
#' @rdname validate_profile
#' @export
#' @examplesIf rlang::is_installed("dm")
#'
#' dm <- dm_from_profile(ds)
#' print(dm)
#' @examplesIf rlang::is_installed(c("dm", "DiagrammeR"))
#'
#' dm::dm_draw(dm)
dm_from_profile <- function(x) {
  stopifnot(inherits(x, "profile_data"))
  check_installed("dm")

  samples <- x$samples
  locations <- x$locations
  functions <- x$functions
  samples$sample_id <- seq_len(nrow(samples))
  samples_locations <- tibble::tibble(
    sample_id = rep(samples$sample_id, vapply(samples$locations, nrow, integer(1))),
    tibble::as_tibble(do.call(rbind, samples$locations))
  )
  samples$locations <- NULL

  #' @importFrom tibble %>%
  dm::dm(samples, locations, functions, samples_locations) %>%
    dm::dm_add_pk(functions, function_id) %>%
    dm::dm_add_pk(locations, location_id) %>%
    dm::dm_add_pk(samples, sample_id) %>%
    dm::dm_add_fk(samples_locations, sample_id, samples) %>%
    dm::dm_add_fk(samples_locations, location_id, locations) %>%
    dm::dm_add_fk(locations, function_id, functions)
}

utils::globalVariables("function_id")
utils::globalVariables("location_id")
utils::globalVariables("sample_id")