tmap/scales.qmd at main · geocompx/tmap · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
```{r}
#| echo: false
source("code/before_script.R")
```

# Scales of visual variables {#sec-scales}

\index{visual variables}
\index{scales}
\index{color scales}
\index{size scales}
\index{shape scales}
\index{tm\_scale}
Sections [-@sec-colors], [-@sec-sizes], and [-@sec-shapes] showed how to set colors, sizes, and shapes for different types of spatial objects.
In them, we often used the `tm_scale()` function to modify the appearance of the map, such as changing the color palettes (`col.scale` and `fill.scale`), sizes (`size.scale`), or shapes (`shape.scale`).
The `tm_scale()` function automatically sets the scale for the given visual variable and the data type (factor, numeric, and integer).
Thus, for example, when we provide a character variable's name to the `fill` argument, then the `tm_scale()` function automatically sets the color scale for a categorical variable, and when we provide a numeric variable's name to the `size` argument, then the `tm_scale()` function automatically sets the size scale for a continuous variable.

However, we often want to have more control over how our spatial objects are presented on the map.
For that purpose, the `tm_scale()` function comes with several derived functions that can be used to modify and customize the used scale.
@tbl-scale-table presents all of them.

<!-- tmap_options()$scales.var -->
<!-- add short description of each scale function in the table below? -->
```{r}
#| echo: false
scales_df = tibble::tribble(
  ~Function, ~Description,
  "tm_scale_categorical()", "Categorical scale",
  "tm_scale_ordinal()", "Ordinal scale",
  "tm_scale_discrete()", "Discrete scale",
  "tm_scale_rank()", "Rank scale",
  "tm_scale_intervals()", "Intervals scale",
  "tm_scale_continuous()", "Continuous scale",
  "tm_scale_continuous_log(), tm_scale_continuous_log2(), tm_scale_continuous_log10(), tm_scale_continuous_log1p(), tm_scale_continuous_sqrt(), tm_scale_continuous_pseudo_log()", "Logarithmic scales",
  "tm_scale_asis()", "As-is scale",
  "tm_scale_rgb()", "RGB scale",
  "tm_scale_bivariate()", "Bivariate scale"
)
```

```{r}
#| label: tbl-scale-table
#| tbl-cap: "Map scales."
#| echo: false
#| warning: false
#| message: false
kableExtra::kbl(scales_df,
                booktabs = TRUE) |>
  kableExtra::kable_styling("striped",
                            latex_options = "striped",
                            full_width = FALSE) |>
  kableExtra::column_spec(1, width = "4cm", monospace = TRUE) |>
  kableExtra::column_spec(2, width = "8cm")
```

\index{scales!examples}
@fig-scales shows examples of how some of them look like when applied to the same variable.
In the rest of the chapter, we go through each of the scales in more detail.
We mostly focus on using scales for the `fill.scale` argument, but the same principles apply to the `col.scale`, `size.scale`, and `shape.scale` arguments.

```{r}
#| label: fig-scales
#| echo: false
#| warning: false
#| message: false
#| fig-cap: "Examples of different scales applied to the same variable."
#| fig-asp: 0.5
library(tmap)
library(sf)
slo_regions = read_sf("data/slovenia/slo_regions.gpkg")
slo_regions$pop65perc2 = as.integer(slo_regions$pop65perc)
tm_shape(slo_regions) +
    tm_polygons(rep("pop65perc2", 6),
                fill.scale = list(tm_scale_categorical(),
                                  tm_scale_ordinal(),
                                  tm_scale_discrete(),
                                  tm_scale_intervals(),
                                  tm_scale_continuous(),
                                  tm_scale_continuous_log10()),
                fill.legend = tm_legend(title = "", position = tm_pos_in("LEFT", "TOP"))) +
    tm_layout(panel.labels = c("tm_scale_categorical()", "tm_scale_ordinal()",
                               "tm_scale_discrete()", "tm_scale_intervals()",
                               "tm_scale_continuous()", "tm_scale_continuous_log10()"),
              inner.margins = c(0.05, 0.4, 0.1, 0.05),
              legend.text.size = 0.5)
```

The examples in this chapter are based on the `slo_regions` dataset, which contains information about the regions of Slovenia.
Let's read it first.

```{r}
#| warning: false
#| message: false
library(tmap)
library(sf)
slo_regions = read_sf("data/slovenia/slo_regions.gpkg")
```

## Categorical scales {#sec-categorical-scales}

\index{categorical maps}
\index{categorical scales}
\index{tm\_scale\_categorical}
An example of a categorical map can be seen in @fig-colorscales2.
We created it by providing a character variable's name, `"region_group"`, in the `fill` argument.

```{r}
#| label: fig-colorscales2
#| warning: false
#| message: false
#| fig-height: 2
#| fig-cap: Example of a map in which polygons are colored based on the values of a categorical
#|   variable.
tm_shape(slo_regions) +
  tm_polygons(fill = "region_group")
# same as:
# tm_shape(slo_regions) +
#   tm_polygons(fill = "region_group", fill.scale = tm_scale_categorical())
```

The `tm_polygons(fill = "region_group", fill.scale = tm_scale_categorical())` code is run automatically in the background in this case.
It is possible to change the names of legend labels with the `labels` argument of the `tm_scale()` function.
As mentioned in the @sec-colors, we can also change the color palette with the `values` argument.

```{r}
#| eval: false
#| echo: false
tm_shape(slo_regions) +
  tm_polygons(fill = "region_group",
              fill.scale = tm_scale_categorical(labels = as.character(1:5)))
```

## Ordinal scales {#sec-ordinal-scales}

\index{ordinal maps}
\index{tm\_scale\_ordinal}
The ordinal scale is similar to the categorical scale, but it is used for ordered categorical variables.
By default, this scale uses a sequential color palette, which is suitable for ordered categorical variables (@fig-ordinal-scales).
We may use `tm_scale_ordinal()` when the order of the values matters to highlight a hierarchy or ranking.

```{r}
#| label: fig-ordinal-scales
#| warning: false
#| message: false
#| fig-cap: "Example of a map in which polygons are colored based on the values of an ordered categorical variable."
tm_shape(slo_regions) +
  tm_polygons(fill = "region_group",
              fill.scale = tm_scale_ordinal())
```

## Discrete scales {#sec-discrete-scales}

\index{discrete maps}
<!-- Martijn: a better example is probably needed here -- we could add some new variable to the data. -->

```{r}
#| label: fig-discrete-scales
#| warning: false
#| message: false
#| fig-cap: "Example of a map in which polygons are colored based on the values of a discrete variable."
tm_shape(slo_regions) +
  tm_polygons(fill = "urbn_type",
              fill.scale = tm_scale_discrete())
```

## Rank scales {#sec-rank-scales}

\index{rank maps}
<!-- Martijn: any ideas for a good example? -->

## Intervals scales {#sec-intervals-scales}

<!-- optimal number of classes? 3-7 -->
\index{intervals maps}
\index{tm\_scale\_intervals}
Interval scales are used to represent continuous numerical variables using a set of class intervals.
In other words, values are divided into several groups based on their properties.
Several approaches can be used to convert continuous variables to intervals, and each of them could result in different groupings of values (@tbl-style-table).^[Most of these approaches in **tmap** use the **classInt** package [@R-classInt] internally, therefore, some additional information on their use can be found in the `?classIntervals` function's documentation.]

```{r}
#| echo: false
#| label: tbl-style-table
#| tbl-cap: "Styles available to create intervals maps."
#| warning: false
#| message: false
styles_df = tibble::tribble(
  ~Style, ~Description,
  '"pretty"', "Pretty breaks",
  '"fixed"', "Manual breaks",
  '"equal"', "Equal intervals",
  '"sd"', "Standard deviation intervals",
  '"quantile"', "Quantile intervals",
  '"jenks"', "Jenks optimization breaks",
  '"fisher"', "Fisher's method",
  '"dpih"', "Kernel density estimation breaks",
  '"kmeans"', "K-means clustering breaks",
  '"bclust"', "Bagged clustering breaks",
  '"hclust"', "Hierarchical clustering breaks",
  '"headtails"', "Head/tail breaks",
  '"log10_pretty"', "Logarithmic base-10 breaks"
)
kableExtra::kbl(styles_df,
                booktabs = TRUE) |>
  kableExtra::kable_styling("striped",
                            latex_options = "striped",
                            full_width = FALSE) |>
  kableExtra::column_spec(1, width = "4cm", monospace = TRUE) |>
  kableExtra::column_spec(2, width = "8cm")
```

### Pretty breaks

\index{pretty breaks}
By default, the `tm_scale_intervals()` function is used in the background (@fig-intervals-methods-1).
It uses a style called "pretty", which creates breaks that are whole numbers and spaces them evenly ^[For more information, visit the `?pretty()` function documentation].

```{r}
#| label: intervals-methods1
#| eval: false
tm_shape(slo_regions) +
  tm_polygons(fill = "pop_dens")
# same as:
# tm_shape(slo_regions) +
#   tm_polygons(fill = "pop_dens", fill.scale = tm_scale_intervals())
```

It is also possible to indicate the desired number of classes using the `n` argument of the `tm_scale()` function provided to the `fill.scale` argument.
While not every `n` is possible depending on the input values, **tmap** tries to create a number of classes as close as possible to the preferred one.

### Manual breaks

\index{manual breaks}
The next approach is to manually select the limits of each break with the `breaks` argument of `tm_scale()` (@fig-intervals-methods-2).
This can be useful when we have some pre-defined breaks or when we want to compare values between several maps.
It expects threshold values for each break, therefore, if we want to have three breaks, we need to provide four thresholds.^[You may also use `-Inf` and `Inf` to represent the lowest and highest possible values, respectively.]
Additionally, we can add a label to each break with the `labels` argument.

```{r}
#| label: intervals-methods2
#| eval: false
tm_shape(slo_regions) +
  tm_polygons(fill = "pop_dens",
              fill.scale = tm_scale_intervals(
                breaks = c(0, 100, 150, 250),
                labels = c("low", "medium", "high"))
              )
```

### Automatic breaks

\index{automatic breaks}
Another approach is to create breaks automatically using one of many existing classification methods with the `style` argument of the `tm_scale()` function.
Three basic methods are `"equal"`, `"sd"`, and `"quantile"` styles.
Let's consider a variable with 100 observations with values ranging from 0 to 10.
The `"equal"` style divides them into *n* equal-sized intervals.
This style works well when the values change fairly continuously and do not contain any outliers.
In **tmap**, the number of classes are computed automatically<!--?nclass.Sturges--> or we can specify the number of classes with the `n` argument.
For example, when we set `n` to 4, then our breaks are represented by four classes ranging from 0 to 2.5, 2.5 to 5, 5 to 7.5, and 7.5 to 10.
The `"sd"` style represents how much the values of a given variable vary from its mean, with each interval having a constant width of the standard deviation.
This style is used when it is vital to show how values relate to the mean.
The `"quantile"` style creates several classes with exactly the same number of objects (e.g., spatial features), but having intervals of various lengths.
This method has an advantage in that it does not have any empty classes or classes with too few or too many values.
However, the resulting intervals from the `"quantile"` style can often be misleading, with very different values located in the same class.

To create classes that, on the one hand, contain similar values, and on the other hand, are different from the other classes, we can use an optimization method.
The most common one used in cartography is the Jenks optimization method implemented as the `"jenks"` style (@fig-intervals-methods-3).

```{r}
#| label: intervals-methods3
#| eval: false
tm_shape(slo_regions) +
  tm_polygons(fill = "pop_dens",
              fill.scale = tm_scale_intervals(style = "jenks"))
```

The Fisher method (`style = "fisher"`) has a similar role, which creates groups with maximized homogeneity [@fisher_grouping_1958].
A different approach is used by the `dpih` style, which uses kernel density estimations to select the width of the intervals [@wand_databased_1997].
You can visit `?KernSmooth::dpih` for more details.

Another group of classification methods uses existing clustering methods.
It includes k-means clustering (`"kmeans"`), bagged clustering (`"bclust"`), and hierarchical clustering (`"hclust"`).
They create classes based on the similarity of values, where the number of classes is specified with the `n` argument.

Finally, there are a few methods created to work well for a variable with a heavy-tailed distribution, including `"headtails"` and `"log10_pretty"`.
The `"headtails"` style is an implementation of the head/tail breaks method aimed at heavily right-skewed data.
In it, values of the given variable are being divided around the mean into two parts, and the process continues iteratively for the values above the mean (the head) until the head part values are no longer heavy-tailed distributed [@jiang_head_2013].
The `"log10_pretty"` style uses a logarithmic base-10 transformation (@fig-intervals-methods-4).
In this style, each class starts with a value ten times larger than the beginning of the previous class.
In other words, each following class shows us the next order of magnitude.
This style allows for a better distinction between low, medium, and high values.
However, maps with logarithmically transformed variables are usually less intuitive for the readers and require more attention from them.

```{r}
#| label: intervals-methods4
#| eval: false
tm_shape(slo_regions) +
  tm_polygons(fill = "pop_dens",
              fill.scale = tm_scale_intervals(style = "log10_pretty"))
```

```{r}
#| label: fig-intervals-methods
#| warning: false
#| message: false
#| echo: false
#| layout-ncol: 2
#| fig-cap: "Examples of four methods of creating intervals maps."
#| fig-subcap:
#|   - '"pretty" method'
#|   - '"fixed" method'
#|   - '"jenks" method'
#|   - '"log10_pretty" method'
<<intervals-methods1>>
<<intervals-methods2>>
<<intervals-methods3>>
<<intervals-methods4>>
```

<!-- The numeric variable can be either regarded as a continuous variable or a count (integer) variable. See as.count. Only applicable if style is "pretty", "fixed", or "log10_pretty". -->

## Continuous scales {#sec-continuous-scales}

\index{continuous maps}
\index{tm\_scale\_continuous}
\index{tm\_scale\_rank}
\index{tm\_scale\_continuous\_log10}
Continuous maps also represent continuous numerical variables, but without any discrete class intervals (@fig-cont-methods).
A few continuous methods exist in **tmap**, including `tm_scale_continuous()`, `tm_scale_rank()`, and `tm_scale_continuous_log10()`.

The `tm_scale_continuous()` function creates a smooth, linear gradient.
In other words, the change in values is proportionally related to the change in colors.
We can see that in @fig-cont-methods-1, where the value change from 50 to 100 has a similar impact on the color scale as the value change from 100 to 150.
The continuous scale is similar to the pretty style, where the values also change linearly.
The main difference between them is that we can distinguish between, for example, values of 110 and 140 in the former, while both values have exactly the same color in the latter.
The continuous scale works well in situations where there is a large number of spatial vector objects or a large number of raster cells, and where the values change continuously (do not have many outliers).

```{r}
#| label: cont-methods1
#| eval: false
tm_shape(slo_regions) +
  tm_polygons(fill = "pop_dens",
              fill.scale = tm_scale_continuous())
```

However, when the presented variable is skewed or has some outliers, we can use either `tm_scale_rank()` or `tm_scale_continuous_log10()`.
The `tm_scale_rank()` scale also uses a smooth gradient with a large number of colors, but the values on the legend do not change linearly (@fig-cont-methods-2).
<!--JN: Martijn, please check the next sentence for correctness-->
It is fairly analogous to the `"quantile"` style, with the values on a color scale that divides a dataset into several equal-sized groups.

```{r}
#| label: cont-methods2
#| eval: false
tm_shape(slo_regions) +
  tm_polygons(fill = "pop_dens",
              fill.scale = tm_scale_rank())
```

Finally, the `tm_scale_continuous_log10()` scale is the continuous equivalent of the `"log10_pretty"` style of `tm_scale_intervals()` (@fig-cont-methods-3).

```{r}
#| label: cont-methods3
#| eval: false
tm_shape(slo_regions) +
  tm_polygons(fill = "pop_dens",
              fill.scale = tm_scale_continuous_log10())
```

```{r}
#| label: fig-cont-methods
#| warning: false
#| message: false
#| echo: false
#| fig-height: 3.46
#| layout-nrow: 3
#| fig-cap: "Examples of three methods of creating continuous maps."
#| fig-subcap:
#|   - '"continuous" method'
#|   - '"rank" method'
#|   - '"log10" method'
<<cont-methods1>>
<<cont-methods2>>
<<cont-methods3>>
```

## As-is scales {#sec-asis-scales}

\index{as-is scales}
In most cases, we want to use a color scale based on the variable's values.
Then, **tmap** takes these values, groups them into classes, and assigns a color to each class.
However, it is also possible to use the values of the variables directly to specify the fill color or other visual properties of the spatial objects.
For example, your spatial vector data may contain a column with colors, such as `"blue"`, `"red"`, and `"green"`, and then we want to use these colors directly to color the polygons.
This is known as an as-is scale, and it can be used with the `tm_scale_asis()` function.

Let's take a look at the `urbn_type_col` variable in the `slo_regions` dataset.
It contains the names of the colors (in a hexadecimal format) that we want to use to color the polygons.

```{r}
slo_regions$urbn_type_col
```

To fill each polygon with the color specified in the `urbn_type_col` variable, we should specify that column with the `fill` argument of the `tm_polygons()` function and then set the `fill.scale` argument to `tm_scale_asis()` (@fig-asis-scales).

```{r}
#| label: fig-asis-scales
#| warning: false
#| message: false
#| fig-cap: "Example of a map in which polygons are directly colored based on the values of a variable (an as-is scale)."
tm_shape(slo_regions) +
  tm_polygons(fill = "urbn_type_col",
              fill.scale = tm_scale_asis())
```

This scale does not return any legend -- it is not needed, as the colors are already specified in the data.

The `tm_scale_asis()` function can be used not only for the `fill` argument, but also for the `col`, `size`, `shape`, and other related arguments.
In such cases, the values of the variable should be appropriate for the visual property, e.g., a set of colors for `col` or a set of numeric values representing sizes, for `size`, and representing shapes for `shape`.

```{r}
#| echo: false
#| eval: false
tm_shape(slo_regions) +
  tm_polygons() +
  tm_symbols(size = "urbn_type",
             size.scale = tm_scale_asis())
```

## RGB scales {#sec-rgb-scales}

```{r}
#| message: false
library(terra)
sat = rast("data/slovenia/slo_mosaic.tif")
```

\index{RGB scales}
\index{tm\_scale\_rgb}
The `sat` **terra** object contains four bands of the Sentinel-2 image for Slovenia.
The bands (blue, green, red, and near-infrared) are stored as layer as `B02`, `B03`, `B04`, and `B08`.
We can plot all of the bands independently or as a combination of three bands.
This combination is known as a color composite image, and we can create such images with the `tm_rgb()` function (@fig-tmrgbs).

Standard composite image (*true color composite*) uses the visible red, green, and blue bands to represent the data in natural colors.
We can specify which layer in `sat` relates to red (third band), green (second band), and blue (first band) color in `tm_rgb()` with the `tm_vars()` function.
The result is a true color composite, with green colors representing forests and other types of vegetation, and yellow-ish colors showing bare areas (@fig-tmrgbs-1).

```{r}
#| label: tmrgbs1
#| eval: false
tm_shape(sat) +
  tm_rgb(tm_vars(x = c("B04", "B03", "B02"),  multivariate = TRUE),
         col.scale = tm_scale_rgb(stretch = TRUE, probs = c(0.02, 0.98)))
```

::: {.callout-note}
The `tm_vars()` function allows to specify more than one variable to be visualized at the same time.
This is useful when we want to present a few variables as facets or combine multiple variables or layers to create a color composite image (`tm_rgb()`), a bivariate map, or an animation.
Its main arguments:

- `x`: variable names, variable indices, or a dimension name (for `sf`, `stars`, and `SpatRaster` objects)
- `dimvalues`: dimension values (for `stars` objects)
- `n`: (if specified) the first *n* variables that are used
- `multivariate`: in case multiple variables are specified, should they serve as facets (FALSE; default) or as a multivariate visual variable?
<!-- - `animate`: should the variable(s) be animated? -->
:::

True color images are straightforward to interpret and understand, but they make subtle differences in features challenging to recognize.
However, nothing stops us from using the above tools to integrate different bands to create so called *false color composites*.
Various band combinations emphasize some spatial characteristics, such as water, agriculture, etc., and allow us to visualize wavelengths that our eyes can not see.
<!-- add some reference?? -->
@fig-tmrgbs-2 shows a composite of near-infrared, red, and green bands, highlighting vegetation with a bright red color.

```{r}
#| label: tmrgbs2
#| eval: false
tm_shape(sat) +
  tm_rgb(tm_vars(x = c("B08", "B04", "B03"), multivariate = TRUE),
         col.scale = tm_scale_rgb(stretch = TRUE, probs = c(0.02, 0.98)))
```

```{r}
#| label: fig-tmrgbs
#| message: false
#| echo: false
#| layout-nrow: 1
#| fig-cap: 'Two color composite images'
#| fig-subcap:
#|   - 'True color composite image'
#|   - 'False color composite image'
<<tmrgbs1>>
<<tmrgbs2>>
```

The above examples use **terra**'s `SpatRaster` objects as input, but the `tm_rgb()` function can also work with `stars` objects.
For example, we can read the same GeoTIFF file with the `read_stars()` function and then use the `tm_rgb()` function to create a color composite image.
However, in this case we need to specify the `dimvalues` argument of the `tm_vars()` function as the `sat_stars` object consists of one attribute (`x = "slo_mosaic.tif"`) with four dimensions (bands; `dimvalues = c("B04", "B03", "B02")`).

```{r}
#| label: tmrgbs1s
#| eval: false
#| message: false
library(stars)
sat_stars = read_stars("data/slovenia/slo_mosaic.tif")
tm_shape(sat_stars) +
  tm_rgb(tm_vars(x = "slo_mosaic.tif", dimvalues = c("B04", "B03", "B02"),
                 multivariate = TRUE),
         col.scale = tm_scale_rgb(stretch = TRUE, probs = c(0.02, 0.98)))
```

## Bivariate scales {#sec-bivariate-scales}

\index{bivariate maps}
In cases when we have two variables that we want to visualize, the most common approach is to create two separate maps, one for each variable (@fig-bivariate-scales1).
We can also do that using the `tm_vars()` function, which allows us to specify multiple variables (see the code below) or with the facet functions (see @sec-facets).

```{r}
#| label: fig-bivariate-scales1
#| message: false
#| fig-cap: "Example of two separate maps, one for each variable."
tm_shape(slo_regions) +
  tm_polygons(fill = tm_vars(c("pop_dens", "pop65perc")),
              fill.scale = tm_scale(values = "purples"))
```

Another approach is to create a bivariate map, where the two variables are combined into a single map.
This is done by representing not only the values of each variable independently but also their interaction using a bivariate color scale.
@fig-bivariate-scales2 shows an example of a bivariate map, where the population density and the percentage of the population aged 65 and older are combined into a single map.
Areas with a small population density and a small percentage of the population aged 65 and older are colored in gray, while areas with a high population density and a high percentage of the population aged 65 and older are colored in dark brown.
Additionally, the areas with a high population density and a low percentage of the population aged 65 and older are colored in purple, while areas with a low population density and a high percentage of the population aged 65 and older are colored in gold.

\index{tm\_scale\_bivariate}
\index{tm\_legend\_bivariate}
The creation of bivariate maps is done with the `tm_vars()` function, where we specify the two variables we want to combine and set the `multivariate` argument to `TRUE`.
Bivariate maps also require a special color palette, which can be specified with `tm_scale_bivariate()` provided to the `*.scale` argument.
<!-- advice regarding color palettes? -->
Additionally, we can modify the `*.legend` argument with `tm_legend_bivariate()`.
In the case of bivariate maps, the `xlab` and `ylab` arguments are used to add labels to the x and y axes of the legend, respectively.

```{r}
#| label: fig-bivariate-scales2
#| message: false
#| fig-cap: "Example of a bivariate map, where two variables are combined into a single map."
tm_shape(slo_regions) +
  tm_polygons(fill = tm_vars(c("pop_dens", "pop65perc"), multivariate = TRUE),
              fill.scale = tm_scale_bivariate(values = "purplegold"),
              fill.legend = tm_legend_bivariate(xlab = "Pop. 65+ %",
                                                ylab = "Pop. density\n(per sq. km)"))
```

The map of a relationship between two variables can also be done for raster data, such as `SpatRaster` and `stars` objects.
Below, we read the monthly average temperature for Slovenia from a GeoTIFF file and then select only the two months of interest -- January and July.
Our aim is to visualize how winter and summer temperatures differ in Slovenia.

```{r}
library(terra)
slo_tavg = rast("data/slovenia/slo_tavg.tif")
slo_tavg = slo_tavg[[c(1, 7)]] # select only January and July
```

As you can see below, the syntax to create a bivariate map using `tm_vars()` is the same as for vector data, except in this case, we use `tm_raster()` instead of `tm_polygons()` (@fig-tmrastbiv-1).

```{r}
#| label: tmrastbiv1
#| eval: false
tm_shape(slo_tavg) +
  tm_raster(col = tm_vars(x = c("tavg_1", "tavg_7"), multivariate = TRUE))
```

One important difference between a regular map and a bivariate map is that the latter is based on two scales -- and we are able to specify them separately.
Thus, all of the scales from sections [-@sec-categorical-scales] to [-@sec-continuous-scales] can be used for bivariate maps, and we may use one of them for each variable.
In the example below, we use two scales for the temperature -- one for the January temperatures and one for the July temperatures.
In both cases, our scales are intervals, but we customize their breaks and add text labels.

\index{bivariate color scales}
Another difference is that a regular color scale is one-dimensional -- it has only one axis, and thus we can either use its original form (e.g., from light to dark blue) or flip it (e.g., from dark to light blue).
A bivariate color scale, on the other hand, is two-dimensional.
It has two axes, and this opens up many more possibilities for modifying the scale.
In the example below, we are using the `"pinkblue"` color scale, which, by default, features a gray color at the bottom left, with the vertical axis transitioning to purple and the horizontal axis transitioning to green.
The colors mix in the top right corner to form a dark blue.
However, in our case, we may consider flipping this color scale along the main diagonal, so that the vertical axis transitions to green, while the horizontal one transitions to purple.
This seems more intuitive, as the vertical axis represents January temperatures, which are typically colder than July temperatures in Slovenia.

```{r}
#| label: tmrastbiv2
#| eval: false
tm_shape(slo_tavg) +
  tm_raster(col = tm_vars(c("tavg_1", "tavg_7"), multivariate = TRUE),
            col.scale = tm_scale_bivariate(
              values = "//pinkblue",
              scale1 = tm_scale_intervals(
                breaks = c(-8, -4, 0, 4, 8),
                labels = c("Very cold", "Cold", "Mild", "Warm")
              ),
              scale2 = tm_scale_intervals(
                breaks = c(5, 10, 15, 20, 25),
                labels = c("Very cold", "Cold", "Mild", "Warm")
              )
            ),
            col.legend = tm_legend_bivariate(
              xlab = "July", ylab = "January", reverse = FALSE
            )
          )
```

@fig-tmrastbiv-2 shows a customized bivariate map for the January and July temperatures in Slovenia.
We may see that most of the country is characterized by mild temperatures in both months, while the cold temperatures are observed in the mountainous regions in the northwest, and the warm temperatures are observed in the southwest part of the country.
There are also a few areas where the temperature category is very different between the two months, such as the northeastern part of the country, with warm July and cold January temperatures.

```{r}
#| label: fig-tmrastbiv
#| message: false
#| echo: false
#| layout-ncol: 1
#| fig-cap: "Bivariate maps for raster data."
#| fig-subcap:
#|   - simple map
#|   - customized map
<<tmrastbiv1>>
<<tmrastbiv2>>
```

<!-- https://github.com/cols4all/cols4all-R/issues/45 -->

All possible flipping operators for bivariate color scales are shown in @tbl-flipops-table.

```{r}
#| echo: false
flipops = tibble::tribble(
  ~Operator,  ~Description,
  "",         "Normal order",
  "-",        "Flip columns (left-right)",
  "|",        "Flip rows (top-bottom)",
  "+",        "Flip rows and columns",
  "//",        "Flip main diagonal (top-left to bottom-right)",
  "\\\\",     "Flip other diagonal (top-right to bottom-left)",
  "-//",       "Flip main diagonal and columns",
  "-\\\\",    "Flip other diagonal and columns"
)
```

```{r}
#| label: tbl-flipops-table
#| tbl-cap: "Operators for flipping bivariate color scales."
#| echo: false
#| warning: false
#| message: false
kableExtra::kbl(flipops, booktabs = TRUE) |>
  kableExtra::kable_styling("striped",
                            latex_options = "striped",
                            full_width = FALSE) |>
  kableExtra::column_spec(1, monospace = TRUE)
```

Additionally, @fig-tmo-bivariate shows how these flipping operators work in action on an example bivariate color scale.

```{r}
#| label: fig-tmo-bivariate
#| fig-cap: "Bivariate color scales with different flipping operators."
#| message: false
#| warning: false
#| echo: false
#| fig-asp: 0.5
tmo_maker = function(op) {
  tm_shape(slo_tavg) +
    tm_raster(col = tm_vars(c("tavg_1", "tavg_7"), multivariate = TRUE),
              col.legend = tm_legend(title = op,
                                     title.size = 2,
                                     title.align = "center",
                                     xlab = "", ylab = "",
                                     item.width = 3, item.height = 3,
                                     frame = FALSE),
              col.scale = tm_scale_bivariate(values = paste0(op, "pinkblue"),
                scale1 = tm_scale_intervals(breaks = c(-8, -4, 4, 8),
                                            labels = ""),
                scale2 = tm_scale_intervals(breaks = c(5, 10, 20, 25),
                                            labels = ""))) +
    tm_layout(legend.only = TRUE, inner.margins = 0, outer.margins = 0) +
    tm_options(component.autoscale = FALSE)
}
to1 = tmo_maker(" ")
to2 = tmo_maker("-")
to3 = tmo_maker("|")
to4 = tmo_maker("+")
to5 = tmo_maker("//")
to6 = tmo_maker("\\\\")
to7 = tmo_maker("-//")
to8 = tmo_maker("-\\\\")
tmap_arrange(to1, to2, to3, to4, to5, to6, to7, to8,
              ncol = 4, nrow = 2, outer.margins = 0)
```


<!-- Other ideas: -->

<!-- A section with some common usecases and their comparison: -->
<!-- 1. Simple variable (e.g., population density) using two or so scales -->
<!-- 2. Relative change/ratio/logarith of ratio -->