diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MapreduceRestoreSnapshotHelper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MapreduceRestoreSnapshotHelper.java new file mode 100644 index 000000000000..628ac63ef2c1 --- /dev/null +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MapreduceRestoreSnapshotHelper.java @@ -0,0 +1,831 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapreduce; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.io.Reference; +import org.apache.hadoop.hbase.mob.MobUtils; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.regionserver.StoreContext; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; +import org.apache.hadoop.hbase.regionserver.StoreUtils; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotManifest; +import org.apache.hadoop.hbase.snapshot.SnapshotTTLExpiredException; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.MapreduceHFileArchiver; +import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.io.IOUtils; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Helper to Restore/Clone a Snapshot + *
+ * The helper assumes that a table is already created, and by calling restore() the content present + * in the snapshot will be restored as the new content of the table. + *
+ * Clone from Snapshot: If the target table is empty, the restore operation is just a "clone + * operation", where the only operations are: + *
+ * Restore from Snapshot: + *
+ * The store file in the snapshot can be a simple hfile, an HFileLink or a reference. + *
+ *
+ * + *+ * @param familyDir destination directory for the store file + * @param regionInfo destination region info for the table + * @param storeFile reference file name + */ + private StoreFileInfo restoreReferenceFile(final Path familyDir, final RegionInfo regionInfo, + final SnapshotProtos.SnapshotRegionManifest.StoreFile storeFile, final StoreFileTracker tracker) + throws IOException { + String hfileName = storeFile.getName(); + StoreFileInfo storeFileInfo = null; + + // Extract the referred information (hfile name and parent region) + Path refPath = + StoreFileInfo + .getReferredToFile( + new Path( + new Path( + new Path(new Path(snapshotTable.getNamespaceAsString(), + snapshotTable.getQualifierAsString()), regionInfo.getEncodedName()), + familyDir.getName()), + hfileName)); + String snapshotRegionName = refPath.getParent().getParent().getName(); + String fileName = refPath.getName(); + + // The new reference should have the cloned region name as parent, if it is a clone. + String clonedRegionName = Bytes.toString(regionsMap.get(Bytes.toBytes(snapshotRegionName))); + if (clonedRegionName == null) clonedRegionName = snapshotRegionName; + + // The output file should be a reference link table=snapshotRegion-fileName.clonedRegionName + Path linkPath = null; + String refLink = fileName; + if (!HFileLink.isHFileLink(fileName)) { + refLink = HFileLink.createHFileLinkName(snapshotTable, snapshotRegionName, fileName); + linkPath = new Path(familyDir, + HFileLink.createHFileLinkName(snapshotTable, regionInfo.getEncodedName(), hfileName)); + } + + Path outPath = new Path(familyDir, refLink + '.' + clonedRegionName); + + // Create the new reference + if (storeFile.hasReference()) { + Reference reference = Reference.convert(storeFile.getReference()); + tracker.createAndCommitReference(reference, outPath); + storeFileInfo = new StoreFileInfo(conf, fs, outPath, reference); + } else { + InputStream in; + if (linkPath != null) { + HFileLink hfileLink = HFileLink.buildFromHFileLinkPattern(conf, linkPath); + storeFileInfo = new StoreFileInfo(conf, fs, outPath, hfileLink); + tracker.add(Collections.singletonList(storeFileInfo)); + in = hfileLink.open(fs); + } else { + linkPath = new Path(new Path( + HRegion.getRegionDir(snapshotManifest.getSnapshotDir(), regionInfo.getEncodedName()), + familyDir.getName()), hfileName); + in = fs.open(linkPath); + } + OutputStream out = fs.create(outPath); + IOUtils.copyBytes(in, out, conf); + } + + // Add the daughter region to the map + String regionName = Bytes.toString(regionsMap.get(regionInfo.getEncodedNameAsBytes())); + if (regionName == null) { + regionName = regionInfo.getEncodedName(); + } + LOG.debug("Restore reference {} to {}", regionName, clonedRegionName); + synchronized (parentsMap) { + Pair+ * The source table looks like: + * 1234/abc (original file) + * 5678/abc.1234 (reference file) + * + * After the clone operation looks like: + * wxyz/table=1234-abc + * stuv/table=1234-abc.wxyz + * + * NOTE that the region name in the clone changes (md5 of regioninfo) + * and the reference should reflect that change. + *+ * + *
+ * If the same file already exists in the archive, it is moved to a timestamped directory under
+ * the archive directory and the new file is put in its place.
+ * @param archiveDir {@link Path} to the directory that stores the archives of the hfiles
+ * @param currentFile {@link Path} to the original HFile that will be archived
+ * @param archiveStartTime time the archiving started, to resolve naming conflicts
+ * @return true if the file is successfully archived. false if there was a
+ * problem, but the operation still completed.
+ * @throws IOException on failure to complete {@link FileSystem} operations.
+ */
+ private static boolean resolveAndArchiveFile(Path archiveDir, MapreduceHFileArchiver.File currentFile,
+ String archiveStartTime) throws IOException {
+ // build path as it should be in the archive
+ String filename = currentFile.getName();
+ Path archiveFile = new Path(archiveDir, filename);
+ FileSystem fs = currentFile.getFileSystem();
+
+ // An existing destination file in the archive is unexpected, but we handle it here.
+ if (fs.exists(archiveFile)) {
+ if (!fs.exists(currentFile.getPath())) {
+ // If the file already exists in the archive, and there is no current file to archive, then
+ // assume that the file in archive is correct. This is an unexpected situation, suggesting a
+ // race condition or split brain.
+ // In HBASE-26718 this was found when compaction incorrectly happened during warmupRegion.
+ LOG.warn("{} exists in archive. Attempted to archive nonexistent file {}.", archiveFile,
+ currentFile);
+ // We return success to match existing behavior in this method, where FileNotFoundException
+ // in moveAndClose is ignored.
+ return true;
+ }
+ // There is a conflict between the current file and the already existing archived file.
+ // Move the archived file to a timestamped backup. This is a really, really unlikely
+ // situation, where we get the same name for the existing file, but is included just for that
+ // 1 in trillion chance. We are potentially incurring data loss in the archive directory if
+ // the files are not identical. The timestamped backup will be cleaned by HFileCleaner as it
+ // has no references.
+ FileStatus curStatus = fs.getFileStatus(currentFile.getPath());
+ FileStatus archiveStatus = fs.getFileStatus(archiveFile);
+ long curLen = curStatus.getLen();
+ long archiveLen = archiveStatus.getLen();
+ long curMtime = curStatus.getModificationTime();
+ long archiveMtime = archiveStatus.getModificationTime();
+ if (curLen != archiveLen) {
+ LOG.error(
+ "{} already exists in archive with different size than current {}."
+ + " archiveLen: {} currentLen: {} archiveMtime: {} currentMtime: {}",
+ archiveFile, currentFile, archiveLen, curLen, archiveMtime, curMtime);
+ throw new IOException(
+ archiveFile + " already exists in archive with different size" + " than " + currentFile);
+ }
+
+ LOG.error(
+ "{} already exists in archive, moving to timestamped backup and overwriting"
+ + " current {}. archiveLen: {} currentLen: {} archiveMtime: {} currentMtime: {}",
+ archiveFile, currentFile, archiveLen, curLen, archiveMtime, curMtime);
+
+ // move the archive file to the stamped backup
+ Path backedupArchiveFile = new Path(archiveDir, filename + SEPARATOR + archiveStartTime);
+ if (!fs.rename(archiveFile, backedupArchiveFile)) {
+ LOG.error("Could not rename archive file to backup: " + backedupArchiveFile
+ + ", deleting existing file in favor of newer.");
+ // try to delete the existing file, if we can't rename it
+ if (!fs.delete(archiveFile, false)) {
+ throw new IOException("Couldn't delete existing archive file (" + archiveFile
+ + ") or rename it to the backup file (" + backedupArchiveFile
+ + ") to make room for similarly named file.");
+ }
+ } else {
+ LOG.info("Backed up archive file from {} to {}.", archiveFile, backedupArchiveFile);
+ }
+ }
+
+ LOG.trace("No existing file in archive for {}, free to archive original file.", archiveFile);
+
+ // at this point, we should have a free spot for the archive file
+ boolean success = false;
+ for (int i = 0; !success && i < DEFAULT_RETRIES_NUMBER; ++i) {
+ if (i > 0) {
+ // Ensure that the archive directory exists.
+ // The previous "move to archive" operation has failed probably because
+ // the cleaner has removed our archive directory (HBASE-7643).
+ // (we're in a retry loop, so don't worry too much about the exception)
+ try {
+ if (!fs.exists(archiveDir)) {
+ if (fs.mkdirs(archiveDir)) {
+ LOG.debug("Created archive directory {}", archiveDir);
+ }
+ }
+ } catch (IOException e) {
+ LOG.warn("Failed to create directory {}", archiveDir, e);
+ }
+ }
+
+ try {
+ success = currentFile.moveAndClose(archiveFile);
+ } catch (FileNotFoundException fnfe) {
+ LOG.warn("Failed to archive " + currentFile
+ + " because it does not exist! Skipping and continuing on.", fnfe);
+ success = true;
+ } catch (IOException e) {
+ success = false;
+ // When HFiles are placed on a filesystem other than HDFS a rename operation can be a
+ // non-atomic file copy operation. It can take a long time to copy a large hfile and if
+ // interrupted there may be a partially copied file present at the destination. We must
+ // remove the partially copied file, if any, or otherwise the archive operation will fail
+ // indefinitely from this point.
+ LOG.warn("Failed to archive " + currentFile + " on try #" + i, e);
+ try {
+ fs.delete(archiveFile, false);
+ } catch (FileNotFoundException fnfe) {
+ // This case is fine.
+ } catch (IOException ee) {
+ // Complain about other IO exceptions
+ LOG.warn("Failed to clean up from failure to archive " + currentFile + " on try #" + i,
+ ee);
+ }
+ }
+ }
+
+ if (!success) {
+ LOG.error("Failed to archive " + currentFile);
+ return false;
+ }
+
+ LOG.debug("Archived from {} to {}", currentFile, archiveFile);
+ return true;
+ }
+
+ /**
+ * Without regard for backup, delete a region. Should be used with caution.
+ * @param regionDir {@link Path} to the region to be deleted.
+ * @param fs FileSystem from which to delete the region
+ * @return true on successful deletion, false otherwise
+ * @throws IOException on filesystem operation failure
+ */
+ private static boolean deleteRegionWithoutArchiving(FileSystem fs, Path regionDir)
+ throws IOException {
+ if (fs.delete(regionDir, true)) {
+ LOG.debug("Deleted {}", regionDir);
+ return true;
+ }
+ LOG.debug("Failed to delete directory {}", regionDir);
+ return false;
+ }
+
+
+ /**
+ * Adapt a type to match the {@link MapreduceHFileArchiver.File} interface, which is used internally for handling
+ * archival/removal of files
+ * @param