Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/OperationsAndTroubleshootingGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,16 @@ riak eval "riak_client:remove_node_from_coverage()."

#### Completing a Repair

The data can then be recovered from the other nodes in the cluster issuing the `riak_client:repair_node()` command from the `remote_console` of the replacement node. This will prompt all vnodes which partially overlap the data held in the vnodes on the replacement node to race to play a role in repairing the node. Each vnode will only repair the data which overlaps, filtering out any data that another vnode has already repaired (or is in the process of repairing).
The data can then be recovered from the other nodes in the cluster issuing the `riak admin node repair start [-n NODE]` command. This will prompt all vnodes which partially overlap the data held in the vnodes on the replacement node to race to play a role in repairing the node. Each vnode will only repair the data which overlaps, filtering out any data that another vnode has already repaired (or is in the process of repairing).

<span>Available from Riak 3.4.0</span>{: .label .label-purple }To improve the performance of repair, the `repair_span` configuration in the [riak_core schema section of riak.conf](https://github.com/OpenRiak/riak_core/blob/openriak-3.4/priv/riak_core.schema) can be changed to `double_pair`, and this has been proven to be more effective when used with the leveled backend together with the enablement of the `repair_deferred` option in the [riak_kv schema section of riak.conf](https://github.com/OpenRiak/riak_kv/blob/openriak-3.4/priv/riak_kv.schema).

The combination of `repair_span = double_pair, repair_deferred = enabled` is significantly more effective when repairing under load. With these configuration options, it should be noted that repairs will happen in key order, not in reverse order of receipt (the default). With these changes, using the leveled backend, non-functional testing demonstrates that repairs can complete efficiently even when nodes are persistently at 100% CPU utilisation due to the handling of application requests.

Repair uses handoffs, and so can be tracked as with other cluster change operations. Once handoffs are complete, Tictac AAE should be re-enabled, e.g. by using `riak_client:tictacaae_resume_node().`. Once Tictac AAE confirms all vnodes are in-sync - then [`participate_in_coverage` can be re-enabled](#riak_client-remote_console-commands).

The progress of repairs can be inspected with `riak admin node repair status`, and stopped with `riak admin node repair stop`.

### Rolling Replacement

A rolling replacement is an extension of the [proactive replacement](#proactive-replacement) process. In a rolling replacement, a group of new nodes are installed. There is then a rolling process where some nodes are proactively replaced by the new nodes; and once those replaced nodes are free - they are use to proactively replace other nodes in the cluster.
Expand Down
3 changes: 2 additions & 1 deletion src/riak_kv_cli_registry.erl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
-module(riak_kv_cli_registry).

-define(CLI_MODULES, [riak_kv_tictacaae_cli,
riak_kv_vnode_status_cli
riak_kv_vnode_status_cli,
riak_kv_node_cli
]).

-export([register_cli/0
Expand Down
239 changes: 239 additions & 0 deletions src/riak_kv_node_cli.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2026 TI Tokyo. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------

-module(riak_kv_node_cli).

-behaviour(clique_handler).

-include_lib("kernel/include/logger.hrl").

-export([register_cli/0]).

register_cli() ->
register_all_usage(),
register_all_commands().

register_all_usage() ->
clique:register_usage(["riak-admin", "node"], node_usage()),
clique:register_usage(["riak-admin", "node", "repair"], node_repair_usage()),
clique:register_usage(["riak-admin", "node", "repair", "start"], node_repair_start_usage()),
clique:register_usage(["riak-admin", "node", "repair", "status"], node_repair_status_usage()),
clique:register_usage(["riak-admin", "node", "repair", "stop", '*'], node_repair_stop_usage()).

register_all_commands() ->
lists:foreach(
fun(Args) -> apply(clique, register_command, Args) end,
[node_repair_status_specs(),
node_repair_start_specs(),
node_repair_stop_specs()
]).

node_usage() ->
["riak admin node repair { start | status | stop REASON } [OPTIONS]\n",
"See individual subcommand usage for options and arguments.\n"
].
node_repair_usage() ->
node_usage().

node_repair_status_usage() ->
["riak admin node repair status [-n|--node NODE|all] [-f|--format table|json]\n",
"Print status of any ongoing partition repairs on NODE.\n"
].

node_repair_start_usage() ->
["riak admin node repair start [-n|--node NODE]\n",
"Start partition repair on NODE.\n"
].

node_repair_stop_usage() ->
["riak admin node repair stop [-n|--node NODE|all] REASON\n",
"Kill all an ongoing partition repair on NODE, with REASON.\n"
].

-define(NODEOPT, {node, [{shortname, "n"},
{longname, "node"},
{typecast, fun to_node/1}]}).
-define(FMTOPTION, {format, [{shortname, "f"},
{longname, "format"},
{typecast, fun to_fmt/1}]}).

node_repair_status_specs() ->
[["riak-admin", "node", "repair", "status"],
[], [?NODEOPT, ?FMTOPTION],
fun(A, B, C) -> main(fun node_repair_status_cmd/3, A, B, C) end
].
node_repair_start_specs() ->
[["riak-admin", "node", "repair", "start"],
[], [?NODEOPT],
fun(A, B, C) -> main(fun node_repair_start_cmd/3, A, B, C) end
].
node_repair_stop_specs() ->
[["riak-admin", "node", "repair", "stop", '*'],
[], [?NODEOPT],
fun(A, B, C) -> main(fun node_repair_stop_cmd/3, A, B, C) end
].


main(Fun, A, B, C) ->
try
Fun(A, B, C)
catch
Class:Reason:Stack ->
logger:error("node repair: handler failed: ~p:~p stack=~p",
[Class, Reason, Stack]),
[alert("Error: ~p:~p", [Class, Reason])]
end.

node_repair_status_cmd(_Cmd, _Args, Opts) ->
Nodes =
case [A || {node, A} <- Opts] of
[all] ->
[node() | nodes()];
[] ->
[node()];
NN ->
NN
end,
Fmt = extract_fmt_option(Opts),

Res = get_node_repair_status(Nodes),

case Fmt of
table ->
Table =
[begin
Rows =
[[{mod, Mod}, {idx, Idx}, {pid, list_to_binary(pid_to_list(Pid))}]
|| {Mod, Idx, Pid} <- NRes],
case Rows of
[] ->
text("No active node repairs on ~s\n", [Node]);
_ ->
[text("Node repairs on ~s", [Node]), table(Rows)]
end
end || {Node, NRes} <- Res],
lists:flatten(Table);
json ->
[text("~s", [riak_kv_wm_json:encode(
[#{node => Node,
status => [jsonify_status(S) || S <- Statuses]}
|| {Node, Statuses} <- Res])])]
end.

get_node_repair_status(Nodes) ->
[begin
Vnodes = erpc:call(Node, riak_core_vnode_manager, all_vnodes, []),
Statuses = [{Mod, Idx, Pid,
erpc:call(Node, riak_core_vnode_manager, repair_status, [{Mod, Idx}])}
|| {Mod, Idx, Pid} <- Vnodes],
{Node, [{Mod, Idx, Pid} || {Mod, Idx, Pid, Status} <- Statuses, Status /= not_found]}
end || Node <- Nodes].

nodes_running_repair() ->
AllSS = get_node_repair_status([node() | nodes()]),
[N || {N, SS} <- AllSS, SS /= []].

jsonify_status({Mod, Idx, Pid}) ->
#{mod => Mod,
idx => Idx,
pid => list_to_binary(pid_to_list(Pid))}.

node_repair_start_cmd(_Cmd, _Args, Opts) ->
Node =
case [A || {node, A} <- Opts] of
[all] -> invalid;
[] -> node();
[N] -> N;
_ -> invalid
end,
case Node of
invalid ->
[alert("Error: node repair can be started on one node at a time")];
Node ->
case nodes_running_repair() of
[] ->
case erpc:call(Node, riak_client, repair_node, []) of
ok ->
[text("Node repair started on ~s.", [Node])];
{error, BadRpcReason} ->
[alert("Error: failed to start node repair on ~s: ~p", [Node, BadRpcReason])]
end;
NwAA ->
[alert("There are repairs currently ongoing on node~s ~s.\n"
"Wait until these are completed before starting a new node repair.",
[ending(NwAA), string:join([atom_to_list(N) || N <- NwAA], ",")])]
end
end.

node_repair_stop_cmd([_, _, _, _, Reason], _, Opts) ->
Nodes =
case [A || {node, A} <- Opts] of
[all] ->
[node() | nodes()];
[] ->
[node()];
NN ->
NN
end,
AllNodesWithRepairs = nodes_running_repair(),
Items =
[begin
case lists:member(Node, AllNodesWithRepairs) of
false ->
io_lib:format("\n* ~s: No active repairs", [Node]);
true ->
case erpc:call(Node, riak_core_vnode_manager, kill_repairs, [Reason]) of
ok ->
io_lib:format("\n* ~s: Node repair stopped", [Node]);
{error, BadRpcReason} ->
io_lib:format("\n* ~s: Failed to stop node repair on: ~p", [Node, BadRpcReason])
end
end
end || Node <- Nodes],
[clique_status:list("Stopping repairs", Items)].


extract_fmt_option(Opts) ->
case [A || {format, A} <- Opts] of
[] -> table;
["table"] -> table;
["json"] -> json;
_ -> invalid
end.

to_node("all") ->
all;
to_node(A) ->
clique_typecast:to_node(A).

to_fmt(A) ->
A.

text(F, A) ->
clique_status:text(lists:flatten(io_lib:format(F, A))).
alert(S) ->
alert(S, []).
alert(F, A) ->
clique_status:alert([text(F, A)]).
table(A) ->
clique_status:table(A).

ending([_]) -> "";
ending(_) -> "s".
8 changes: 4 additions & 4 deletions src/riak_kv_tictacaae_cli.erl
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ fold_cmd([_, _, _ | Items], Keys, Options) ->
},
{ok, BB} = riak_client:aae_fold(Query),
Printable = [printable_bin(B) || B <- BB],
io:format(FD, "~s\n", [mochijson2:encode(Printable)])
io:format(FD, "~s\n", [riak_kv_wm_json:encode(Printable)])
end);

["find-keys"] ->
Expand All @@ -697,7 +697,7 @@ fold_cmd([_, _, _ | Items], Keys, Options) ->
Printable = [#{<<"key">> => printable_bin(K),
<<"sibling_count">> => SibCnt
} || {_B, K, SibCnt} <- KK],
io:format(FD, "~s\n", [mochijson2:encode(Printable)])
io:format(FD, "~s\n", [riak_kv_wm_json:encode(Printable)])
end);

["count-keys"] ->
Expand Down Expand Up @@ -730,7 +730,7 @@ fold_cmd([_, _, _ | Items], Keys, Options) ->
Printable = [#{bucket => printable_bin(B),
key => printable_bin(K),
vclock => printable_vclock(VC)} || {B, K, VC} <- TT],
io:format(FD, "~s\n", [mochijson2:encode(Printable)])
io:format(FD, "~s\n", [riak_kv_wm_json:encode(Printable)])
end);

["count-tombstones"] ->
Expand Down Expand Up @@ -779,7 +779,7 @@ fold_cmd([_, _, _ | Items], Keys, Options) ->
TS = proplists:get_value(total_size, SS),
Sizes = proplists:get_value(sizes, SS),
Siblings = proplists:get_value(siblings, SS),
io:format(FD, "~s\n", [mochijson2:encode(
io:format(FD, "~s\n", [riak_kv_wm_json:encode(
#{total_count => TC,
total_size => TS,
sizes => [#{min => Min,
Expand Down
Loading