This commit is contained in:
Fabian Jahr 2024-05-16 03:17:51 +02:00 committed by GitHub
commit 534b1dc688
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 266 additions and 76 deletions

View File

@ -14,6 +14,7 @@
#include <logging.h>
#include <primitives/block.h>
#include <primitives/transaction.h>
#include <protocol.h>
#include <script/interpreter.h>
#include <script/script.h>
#include <uint256.h>
@ -542,3 +543,33 @@ std::unique_ptr<const CChainParams> CChainParams::TestNet()
{
return std::make_unique<const CTestNetParams>();
}
std::vector<int> CChainParams::GetAvailableSnapshotHeights() const {
std::vector<int> heights;
heights.reserve(m_assumeutxo_data.size());
for (const auto& data : m_assumeutxo_data) {
heights.emplace_back(data.height);
}
return heights;
}
std::optional<ChainType> GetNetworkForMagic(MessageStartChars& message) {
const auto mainnet_msg = CChainParams::Main()->MessageStart();
const auto testnet_msg = CChainParams::TestNet()->MessageStart();
auto regtest_opts = CChainParams::RegTestOptions{};
const auto regtest_msg = CChainParams::RegTest(regtest_opts)->MessageStart();
auto signet_opts = CChainParams::SigNetOptions{};
const auto signet_msg = CChainParams::SigNet(signet_opts)->MessageStart();
if (std::equal(message.begin(), message.end(), mainnet_msg.data())) {
return ChainType::MAIN;
} else if (std::equal(message.begin(), message.end(), testnet_msg.data())) {
return ChainType::TESTNET;
} else if (std::equal(message.begin(), message.end(), regtest_msg.data())) {
return ChainType::REGTEST;
} else if (std::equal(message.begin(), message.end(), signet_msg.data())) {
return ChainType::SIGNET;
}
return std::nullopt;
}

View File

@ -9,6 +9,7 @@
#include <consensus/params.h>
#include <kernel/messagestartchars.h>
#include <primitives/block.h>
#include <protocol.h>
#include <uint256.h>
#include <util/chaintype.h>
#include <util/hash_type.h>
@ -93,6 +94,7 @@ public:
const Consensus::Params& GetConsensus() const { return consensus; }
const MessageStartChars& MessageStart() const { return pchMessageStart; }
uint16_t GetDefaultPort() const { return nDefaultPort; }
std::vector<int> GetAvailableSnapshotHeights() const;
const CBlock& GenesisBlock() const { return genesis; }
/** Default value for -checkmempool and -checkblockindex argument */
@ -183,4 +185,6 @@ protected:
ChainTxData chainTxData;
};
std::optional<ChainType> GetNetworkForMagic(MessageStartChars& pchMessageStart);
#endif // BITCOIN_KERNEL_CHAINPARAMS_H

View File

@ -6,16 +6,22 @@
#ifndef BITCOIN_NODE_UTXO_SNAPSHOT_H
#define BITCOIN_NODE_UTXO_SNAPSHOT_H
#include <chainparams.h>
#include <kernel/chainparams.h>
#include <kernel/cs_main.h>
#include <serialize.h>
#include <sync.h>
#include <uint256.h>
#include <util/chaintype.h>
#include <util/fs.h>
#include <cstdint>
#include <optional>
#include <string_view>
// UTXO set snapshot magic bytes
static constexpr std::array<uint8_t, 5> SNAPSHOT_MAGIC_BYTES = {'u', 't', 'x', 'o', 0xff};
class Chainstate;
namespace node {
@ -23,10 +29,14 @@ namespace node {
//! assumeutxo Chainstate can be constructed.
class SnapshotMetadata
{
const uint16_t m_version{1};
const std::set<uint16_t> m_supported_versions{1};
public:
//! The hash of the block that reflects the tip of the chain for the
//! UTXO set contained in this snapshot.
uint256 m_base_blockhash;
uint32_t m_base_blockheight;
//! The number of coins in the UTXO set contained in this snapshot. Used
//! during snapshot load to estimate progress of UTXO set reconstruction.
@ -35,11 +45,55 @@ public:
SnapshotMetadata() { }
SnapshotMetadata(
const uint256& base_blockhash,
const int base_blockheight,
uint64_t coins_count) :
m_base_blockhash(base_blockhash),
m_base_blockheight(base_blockheight),
m_coins_count(coins_count) { }
SERIALIZE_METHODS(SnapshotMetadata, obj) { READWRITE(obj.m_base_blockhash, obj.m_coins_count); }
template <typename Stream>
inline void Serialize(Stream& s) const {
s << SNAPSHOT_MAGIC_BYTES;
s << m_version;
s << Params().MessageStart();
s << m_base_blockheight;
s << m_base_blockhash;
s << m_coins_count;
}
template <typename Stream>
inline void Unserialize(Stream& s) {
// Read the snapshot magic bytes
std::array<uint8_t, SNAPSHOT_MAGIC_BYTES.size()> snapshot_magic;
s >> snapshot_magic;
if (snapshot_magic != SNAPSHOT_MAGIC_BYTES) {
throw std::ios_base::failure("Invalid UTXO set snapshot magic bytes. Please check if this is indeed a snapshot file or if you are using an outdated snapshot format.", std::error_code{});
}
// Read the version
uint16_t version;
s >> version;
if (m_supported_versions.find(version) == m_supported_versions.end()) {
throw std::ios_base::failure(strprintf("Version of snapshot %s does not match any of the supported versions.", version), std::error_code{});
}
// Read the network magic (pchMessageStart)
MessageStartChars message;
s >> message;
if (!std::equal(message.begin(), message.end(), Params().MessageStart().data())) {
auto metadata_network = GetNetworkForMagic(message);
if (metadata_network) {
std::string network_string{ChainTypeToString(metadata_network.value())};
throw std::ios_base::failure(strprintf("The network of the snapshot (%s) does not match the network of this node (%s).", network_string, Params().GetChainTypeString()), std::error_code{});
} else {
throw std::ios_base::failure("This snapshot has been created for an unrecognized network. This could be a custom signet, a new testnet or possibly caused by data corruption.", std::error_code{});
}
}
s >> m_base_blockheight;
s >> m_base_blockhash;
s >> m_coins_count;
}
};
//! The file in the snapshot chainstate dir which stores the base blockhash. This is

View File

@ -34,6 +34,7 @@
#include <rpc/server_util.h>
#include <rpc/util.h>
#include <script/descriptor.h>
#include <serialize.h>
#include <streams.h>
#include <sync.h>
#include <txdb.h>
@ -2696,29 +2697,60 @@ UniValue CreateUTXOSnapshot(
tip->nHeight, tip->GetBlockHash().ToString(),
fs::PathToString(path), fs::PathToString(temppath)));
SnapshotMetadata metadata{tip->GetBlockHash(), maybe_stats->coins_count};
SnapshotMetadata metadata{tip->GetBlockHash(), tip->nHeight, maybe_stats->coins_count};
afile << metadata;
COutPoint key;
Txid last_hash;
Coin coin;
unsigned int iter{0};
size_t written_coins_count{0};
std::vector<std::pair<uint32_t, Coin>> coins;
// To reduce space the serialization format of the snapshot avoids
// duplication of tx hashes. The code takes advantage of the guarantee by
// leveldb that keys are lexicographically sorted.
// In the coins vector we collect all coins that belong to a certain tx hash
// (key.hash) and when we have them all (key.hash != last_hash) we write
// them to file using the below lambda function.
// See also https://github.com/bitcoin/bitcoin/issues/25675
auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) {
afile << last_hash;
WriteCompactSize(afile, coins.size());
for (const auto& [n, coin] : coins) {
WriteCompactSize(afile, n);
afile << coin;
++written_coins_count;
}
};
pcursor->GetKey(key);
last_hash = key.hash;
while (pcursor->Valid()) {
if (iter % 5000 == 0) node.rpc_interruption_point();
++iter;
if (pcursor->GetKey(key) && pcursor->GetValue(coin)) {
afile << key;
afile << coin;
if (key.hash != last_hash) {
write_coins_to_file(afile, last_hash, coins, written_coins_count);
last_hash = key.hash;
coins.clear();
}
coins.emplace_back(key.n, coin);
}
pcursor->Next();
}
if (!coins.empty()) {
write_coins_to_file(afile, last_hash, coins, written_coins_count);
}
CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count);
afile.fclose();
UniValue result(UniValue::VOBJ);
result.pushKV("coins_written", maybe_stats->coins_count);
result.pushKV("coins_written", written_coins_count);
result.pushKV("base_hash", tip->GetBlockHash().ToString());
result.pushKV("base_height", tip->nHeight);
result.pushKV("path", path.utf8string());
@ -2778,12 +2810,26 @@ static RPCHelpMan loadtxoutset()
}
SnapshotMetadata metadata;
afile >> metadata;
try {
afile >> metadata;
} catch (const std::ios_base::failure& e) {
throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to parse metadata: %s", e.what()));
}
uint256 base_blockhash = metadata.m_base_blockhash;
int base_blockheight = metadata.m_base_blockheight;
if (!chainman.GetParams().AssumeutxoForBlockhash(base_blockhash).has_value()) {
auto available_heights = chainman.GetParams().GetAvailableSnapshotHeights();
std::ostringstream oss;
for (auto it = available_heights.begin(); it != available_heights.end(); ++it) {
oss << (it != available_heights.begin() ? ", " : "") << *it;
}
std::string heights_formatted = oss.str();
throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to load UTXO snapshot, "
"assumeutxo block hash in snapshot metadata not recognized (%s)", base_blockhash.ToString()));
"assumeutxo block hash in snapshot metadata not recognized (hash: %s, height: %s). The following snapshot heights are available: %s.",
base_blockhash.ToString(),
base_blockheight,
heights_formatted));
}
CBlockIndex* snapshot_start_block = WITH_LOCK(::cs_main,
return chainman.m_blockman.LookupBlockIndex(base_blockhash));

View File

@ -226,10 +226,13 @@ struct SnapshotTestSetup : TestChain100Setup {
// A UTXO is missing but count is correct
metadata.m_coins_count -= 1;
COutPoint outpoint;
Txid txid;
auto_infile >> txid;
// coins size
(void)ReadCompactSize(auto_infile);
// vout index
(void)ReadCompactSize(auto_infile);
Coin coin;
auto_infile >> outpoint;
auto_infile >> coin;
}));

View File

@ -5660,69 +5660,81 @@ bool ChainstateManager::PopulateAndValidateSnapshot(
return false;
}
COutPoint outpoint;
Coin coin;
const uint64_t coins_count = metadata.m_coins_count;
uint64_t coins_left = metadata.m_coins_count;
LogPrintf("[snapshot] loading coins from snapshot %s\n", base_blockhash.ToString());
LogPrintf("[snapshot] loading %d coins from snapshot %s\n", coins_left, base_blockhash.ToString());
int64_t coins_processed{0};
while (coins_left > 0) {
try {
coins_file >> outpoint;
coins_file >> coin;
} catch (const std::ios_base::failure&) {
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
if (coin.nHeight > base_height ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
if (!MoneyRange(coin.out.nValue)) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
coins_count - coins_left);
return false;
}
Txid txid;
coins_file >> txid;
size_t coins_per_txid{0};
coins_per_txid = ReadCompactSize(coins_file);
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
--coins_left;
++coins_processed;
if (coins_processed % 1000000 == 0) {
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
coins_processed,
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
}
// Batch write and flush (if we need to) every so often.
//
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
// means <5MB of memory imprecision.
if (coins_processed % 120000 == 0) {
if (m_interrupt) {
if (coins_per_txid > coins_left) {
LogPrintf("[snapshot] mismatch in coins count in snapshot metadata and actual snapshot data\n");
return false;
}
const auto snapshot_cache_state = WITH_LOCK(::cs_main,
return snapshot_chainstate.GetCoinsCacheSizeState());
for (size_t i = 0; i < coins_per_txid; i++) {
COutPoint outpoint;
Coin coin;
outpoint.n = static_cast<uint32_t>(ReadCompactSize(coins_file));
outpoint.hash = txid;
coins_file >> coin;
if (coin.nHeight > base_height ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
if (!MoneyRange(coin.out.nValue)) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
coins_count - coins_left);
return false;
}
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
// This is a hack - we don't know what the actual best block is, but that
// doesn't matter for the purposes of flushing the cache here. We'll set this
// to its correct value (`base_blockhash`) below after the coins are loaded.
coins_cache.SetBestBlock(GetRandHash());
--coins_left;
++coins_processed;
// No need to acquire cs_main since this chainstate isn't being used yet.
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
if (coins_processed % 1000000 == 0) {
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
coins_processed,
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
}
// Batch write and flush (if we need to) every so often.
//
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
// means <5MB of memory imprecision.
if (coins_processed % 120000 == 0) {
if (m_interrupt) {
return false;
}
const auto snapshot_cache_state = WITH_LOCK(::cs_main,
return snapshot_chainstate.GetCoinsCacheSizeState());
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
// This is a hack - we don't know what the actual best block is, but that
// doesn't matter for the purposes of flushing the cache here. We'll set this
// to its correct value (`base_blockhash`) below after the coins are loaded.
coins_cache.SetBestBlock(GetRandHash());
// No need to acquire cs_main since this chainstate isn't being used yet.
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
}
}
}
} catch (const std::ios_base::failure&) {
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
coins_processed);
return false;
}
}
@ -5735,7 +5747,8 @@ bool ChainstateManager::PopulateAndValidateSnapshot(
bool out_of_coins{false};
try {
coins_file >> outpoint;
Txid txid;
coins_file >> txid;
} catch (const std::ios_base::failure&) {
// We expect an exception since we should be out of coins.
out_of_coins = true;

View File

@ -885,6 +885,12 @@ private:
CBlockIndex* m_best_invalid GUARDED_BY(::cs_main){nullptr};
//! Internal helper for ActivateSnapshot().
//!
//! De-serialization of a snapshot that is created with
//! CreateUTXOSnapshot() in rpc/blockchain.cpp.
//! To reduce space the serialization format of the snapshot avoids
//! duplication of tx hashes. The code takes advantage of the guarantee by
//! leveldb that keys are lexicographically sorted.
[[nodiscard]] bool PopulateAndValidateSnapshot(
Chainstate& snapshot_chainstate,
AutoFile& coins_file,

View File

@ -75,41 +75,74 @@ class AssumeutxoTest(BitcoinTestFramework):
with self.nodes[1].assert_debug_log([log_msg]):
assert_raises_rpc_error(-32603, f"Unable to load UTXO snapshot{rpc_details}", self.nodes[1].loadtxoutset, bad_snapshot_path)
self.log.info(" - snapshot file with invalid file magic")
bad_magic = 0xf00f00f000
with open(bad_snapshot_path, 'wb') as f:
f.write(bad_magic.to_bytes(5, "big") + valid_snapshot_contents[5:])
assert_raises_rpc_error(-32603, "Unable to parse metadata: Invalid UTXO set snapshot magic bytes. Please check if this is indeed a snapshot file or if you are using an outdated snapshot format.", self.nodes[1].loadtxoutset, bad_snapshot_path)
self.log.info(" - snapshot file with unsupported version")
for version in [0, 2]:
with open(bad_snapshot_path, 'wb') as f:
f.write(valid_snapshot_contents[:5] + version.to_bytes(2, "little") + valid_snapshot_contents[7:])
assert_raises_rpc_error(-32603, f"Unable to parse metadata: Version of snapshot {version} does not match any of the supported versions.", self.nodes[1].loadtxoutset, bad_snapshot_path)
self.log.info(" - snapshot file with mismatching network magic")
invalid_magics = [
# magic, name, real
[0xf9beb4d9, "main", True],
[0x0b110907, "test", True],
[0x0a03cf40, "signet", True],
[0x00000000, "", False],
[0xffffffff, "", False],
]
for [magic, name, real] in invalid_magics:
with open(bad_snapshot_path, 'wb') as f:
f.write(valid_snapshot_contents[:7] + magic.to_bytes(4, 'big') + valid_snapshot_contents[11:])
if real:
assert_raises_rpc_error(-32603, f"Unable to parse metadata: The network of the snapshot ({name}) does not match the network of this node (regtest).", self.nodes[1].loadtxoutset, bad_snapshot_path)
else:
assert_raises_rpc_error(-32603, "Unable to parse metadata: This snapshot has been created for an unrecognized network. This could be a custom signet, a new testnet or possibly caused by data corruption.", self.nodes[1].loadtxoutset, bad_snapshot_path)
self.log.info(" - snapshot file referring to a block that is not in the assumeutxo parameters")
prev_block_hash = self.nodes[0].getblockhash(SNAPSHOT_BASE_HEIGHT - 1)
bogus_block_hash = "0" * 64 # Represents any unknown block hash
# The height is not used for anything critical currently, so we just
# confirm the manipulation in the error message
bogus_height = 1337
for bad_block_hash in [bogus_block_hash, prev_block_hash]:
with open(bad_snapshot_path, 'wb') as f:
# block hash of the snapshot base is stored right at the start (first 32 bytes)
f.write(bytes.fromhex(bad_block_hash)[::-1] + valid_snapshot_contents[32:])
error_details = f", assumeutxo block hash in snapshot metadata not recognized ({bad_block_hash})"
f.write(valid_snapshot_contents[:11] + bogus_height.to_bytes(4, "little") + bytes.fromhex(bad_block_hash)[::-1] + valid_snapshot_contents[47:])
error_details = f", assumeutxo block hash in snapshot metadata not recognized (hash: {bad_block_hash}, height: {bogus_height}). The following snapshot heights are available: 110, 299."
expected_error(rpc_details=error_details)
self.log.info(" - snapshot file with wrong number of coins")
valid_num_coins = int.from_bytes(valid_snapshot_contents[32:32 + 8], "little")
valid_num_coins = int.from_bytes(valid_snapshot_contents[47:47 + 8], "little")
for off in [-1, +1]:
with open(bad_snapshot_path, 'wb') as f:
f.write(valid_snapshot_contents[:32])
f.write(valid_snapshot_contents[:47])
f.write((valid_num_coins + off).to_bytes(8, "little"))
f.write(valid_snapshot_contents[32 + 8:])
f.write(valid_snapshot_contents[47 + 8:])
expected_error(log_msg=f"bad snapshot - coins left over after deserializing 298 coins" if off == -1 else f"bad snapshot format or truncated snapshot after deserializing 299 coins")
self.log.info(" - snapshot file with alternated UTXO data")
self.log.info(" - snapshot file with alternated but parsable UTXO data results in different hash")
cases = [
# (content, offset, wrong_hash, custom_message)
[b"\xff" * 32, 0, "7d52155c9a9fdc4525b637ef6170568e5dad6fabd0b1fdbb9432010b8453095b", None], # wrong outpoint hash
[(1).to_bytes(4, "little"), 32, "9f4d897031ab8547665b4153317ae2fdbf0130c7840b66427ebc48b881cb80ad", None], # wrong outpoint index
[b"\x81", 36, "3da966ba9826fb6d2604260e01607b55ba44e1a5de298606b08704bc62570ea8", None], # wrong coin code VARINT
[b"\x80", 36, "091e893b3ccb4334378709578025356c8bcb0a623f37c7c4e493133c988648e5", None], # another wrong coin code
[b"\x84\x58", 36, None, "[snapshot] bad snapshot data after deserializing 0 coins"], # wrong coin case with height 364 and coinbase 0
[b"\xCA\xD2\x8F\x5A", 41, None, "[snapshot] bad snapshot data after deserializing 0 coins - bad tx out value"], # Amount exceeds MAX_MONEY
[(2).to_bytes(1, "little"), 32, None, "[snapshot] bad snapshot data after deserializing 1 coins"], # wrong outpoint hash
[b"\x01", 33, "9f4d897031ab8547665b4153317ae2fdbf0130c7840b66427ebc48b881cb80ad", None], # wrong outpoint index
[b"\x81", 34, "3da966ba9826fb6d2604260e01607b55ba44e1a5de298606b08704bc62570ea8", None], # wrong coin code VARINT
[b"\x80", 34, "091e893b3ccb4334378709578025356c8bcb0a623f37c7c4e493133c988648e5", None], # another wrong coin code
[b"\x84\x58", 34, None, "[snapshot] bad snapshot data after deserializing 0 coins"], # wrong coin case with height 364 and coinbase 0
[b"\xCA\xD2\x8F\x5A", 39, None, "[snapshot] bad snapshot data after deserializing 0 coins - bad tx out value"], # Amount exceeds MAX_MONEY
]
for content, offset, wrong_hash, custom_message in cases:
with open(bad_snapshot_path, "wb") as f:
f.write(valid_snapshot_contents[:(32 + 8 + offset)])
# Prior to offset: Snapshot magic, snapshot version, network magic, height, hash, coins count
f.write(valid_snapshot_contents[:(5 + 2 + 4 + 4 + 32 + 8 + offset)])
f.write(content)
f.write(valid_snapshot_contents[(32 + 8 + offset + len(content)):])
f.write(valid_snapshot_contents[(5 + 2 + 4 + 4 + 32 + 8 + offset + len(content)):])
log_msg = custom_message if custom_message is not None else f"[snapshot] bad snapshot content hash: expected a4bf3407ccb2cc0145c49ebba8fa91199f8a3903daf0883875941497d2493c27, got {wrong_hash}"
expected_error(log_msg=log_msg)

View File

@ -43,7 +43,7 @@ class DumptxoutsetTest(BitcoinTestFramework):
# UTXO snapshot hash should be deterministic based on mocked time.
assert_equal(
sha256sum_file(str(expected_path)).hex(),
'b1bacb602eacf5fbc9a7c2ef6eeb0d229c04e98bdf0c2ea5929012cd0eae3830')
'2f775f82811150d310527b5ff773f81fb0fb517e941c543c1f7c4d38fd2717b3')
assert_equal(
out['txoutset_hash'], 'a0b7baa3bf5ccbd3279728f230d7ca0c44a76e9923fca8f32dbfd08d65ea496a')