Use actual batch size for resize estimates

And optimize import startup:

Remember start_height position during initial count_blocks pass
to avoid having to reread entire file again to arrive at start_height
This commit is contained in:
Howard Chu 2017-09-17 00:12:42 +01:00
parent 7abdba0a5c
commit d2d8a41c47
No known key found for this signature in database
GPG key ID: FD2A70B44AB11BA7
8 changed files with 135 additions and 67 deletions

View file

@ -711,7 +711,7 @@ public:
*
* @return true if we started the batch, false if already started
*/
virtual bool batch_start(uint64_t batch_num_blocks=0) = 0;
virtual bool batch_start(uint64_t batch_num_blocks=0, uint64_t batch_bytes=0) = 0;
/**
* @brief ends a batch transaction

View file

@ -548,7 +548,7 @@ bool BlockchainLMDB::need_resize(uint64_t threshold_size) const
#endif
}
void BlockchainLMDB::check_and_resize_for_batch(uint64_t batch_num_blocks)
void BlockchainLMDB::check_and_resize_for_batch(uint64_t batch_num_blocks, uint64_t batch_bytes)
{
LOG_PRINT_L3("BlockchainLMDB::" << __func__);
LOG_PRINT_L1("[" << __func__ << "] " << "checking DB size");
@ -557,7 +557,7 @@ void BlockchainLMDB::check_and_resize_for_batch(uint64_t batch_num_blocks)
uint64_t increase_size = 0;
if (batch_num_blocks > 0)
{
threshold_size = get_estimated_batch_size(batch_num_blocks);
threshold_size = get_estimated_batch_size(batch_num_blocks, batch_bytes);
LOG_PRINT_L1("calculated batch size: " << threshold_size);
// The increased DB size could be a multiple of threshold_size, a fixed
@ -580,7 +580,7 @@ void BlockchainLMDB::check_and_resize_for_batch(uint64_t batch_num_blocks)
}
}
uint64_t BlockchainLMDB::get_estimated_batch_size(uint64_t batch_num_blocks) const
uint64_t BlockchainLMDB::get_estimated_batch_size(uint64_t batch_num_blocks, uint64_t batch_bytes) const
{
LOG_PRINT_L3("BlockchainLMDB::" << __func__);
uint64_t threshold_size = 0;
@ -607,6 +607,11 @@ uint64_t BlockchainLMDB::get_estimated_batch_size(uint64_t batch_num_blocks) con
uint64_t total_block_size = 0;
LOG_PRINT_L1("[" << __func__ << "] " << "m_height: " << m_height << " block_start: " << block_start << " block_stop: " << block_stop);
size_t avg_block_size = 0;
if (batch_bytes)
{
avg_block_size = batch_bytes / batch_num_blocks;
goto estim;
}
if (m_height == 0)
{
LOG_PRINT_L1("No existing blocks to check for average block size");
@ -635,6 +640,7 @@ uint64_t BlockchainLMDB::get_estimated_batch_size(uint64_t batch_num_blocks) con
avg_block_size = total_block_size / num_blocks_used;
LOG_PRINT_L1("average block size across recent " << num_blocks_used << " blocks: " << avg_block_size);
}
estim:
if (avg_block_size < min_block_size)
avg_block_size = min_block_size;
LOG_PRINT_L1("estimated average block size for batch: " << avg_block_size);
@ -2540,7 +2546,7 @@ bool BlockchainLMDB::for_all_outputs(std::function<bool(uint64_t amount, const c
}
// batch_num_blocks: (optional) Used to check if resize needed before batch transaction starts.
bool BlockchainLMDB::batch_start(uint64_t batch_num_blocks)
bool BlockchainLMDB::batch_start(uint64_t batch_num_blocks, uint64_t batch_bytes)
{
LOG_PRINT_L3("BlockchainLMDB::" << __func__);
if (! m_batch_transactions)
@ -2554,7 +2560,7 @@ bool BlockchainLMDB::batch_start(uint64_t batch_num_blocks)
check_open();
m_writer = boost::this_thread::get_id();
check_and_resize_for_batch(batch_num_blocks);
check_and_resize_for_batch(batch_num_blocks, batch_bytes);
m_write_batch_txn = new mdb_txn_safe();

View file

@ -264,7 +264,7 @@ public:
);
virtual void set_batch_transactions(bool batch_transactions);
virtual bool batch_start(uint64_t batch_num_blocks=0);
virtual bool batch_start(uint64_t batch_num_blocks=0, uint64_t batch_bytes=0);
virtual void batch_commit();
virtual void batch_stop();
virtual void batch_abort();
@ -294,8 +294,8 @@ private:
void do_resize(uint64_t size_increase=0);
bool need_resize(uint64_t threshold_size=0) const;
void check_and_resize_for_batch(uint64_t batch_num_blocks);
uint64_t get_estimated_batch_size(uint64_t batch_num_blocks) const;
void check_and_resize_for_batch(uint64_t batch_num_blocks, uint64_t batch_bytes);
uint64_t get_estimated_batch_size(uint64_t batch_num_blocks, uint64_t batch_bytes) const;
virtual void add_block( const block& blk
, const size_t& block_size

View file

@ -230,11 +230,22 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path
return false;
}
uint64_t start_height = 1, seek_height;
if (opt_resume)
start_height = core.get_blockchain_storage().get_current_blockchain_height();
seek_height = start_height;
BootstrapFile bootstrap;
streampos pos;
// BootstrapFile bootstrap(import_file_path);
uint64_t total_source_blocks = bootstrap.count_blocks(import_file_path);
uint64_t total_source_blocks = bootstrap.count_blocks(import_file_path, pos, seek_height);
MINFO("bootstrap file last block number: " << total_source_blocks-1 << " (zero-based height) total blocks: " << total_source_blocks);
if (total_source_blocks-1 <= start_height)
{
return false;
}
std::cout << ENDL;
std::cout << "Preparing to read blocks..." << ENDL;
std::cout << ENDL;
@ -259,11 +270,7 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path
block b;
transaction tx;
int quit = 0;
uint64_t bytes_read = 0;
uint64_t start_height = 1;
if (opt_resume)
start_height = core.get_blockchain_storage().get_current_blockchain_height();
uint64_t bytes_read;
// Note that a new blockchain will start with block number 0 (total blocks: 1)
// due to genesis block being added at initialization.
@ -280,18 +287,35 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path
bool use_batch = opt_batch && !opt_verify;
if (use_batch)
core.get_blockchain_storage().get_db().batch_start(db_batch_size);
MINFO("Reading blockchain from bootstrap file...");
std::cout << ENDL;
std::list<block_complete_entry> blocks;
// Within the loop, we skip to start_height before we start adding.
// TODO: Not a bottleneck, but we can use what's done in count_blocks() and
// only do the chunk size reads, skipping the chunk content reads until we're
// at start_height.
// Skip to start_height before we start adding.
{
bool q2 = false;
import_file.seekg(pos);
bytes_read = bootstrap.count_bytes(import_file, start_height-seek_height, h, q2);
if (q2)
{
quit = 2;
goto quitting;
}
h = start_height;
}
if (use_batch)
{
uint64_t bytes, h2;
bool q2;
pos = import_file.tellg();
bytes = bootstrap.count_bytes(import_file, db_batch_size, h2, q2);
if (import_file.eof())
import_file.clear();
import_file.seekg(pos);
core.get_blockchain_storage().get_db().batch_start(db_batch_size, bytes);
}
while (! quit)
{
uint32_t chunk_size;
@ -344,11 +368,6 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path
bytes_read += chunk_size;
MDEBUG("Total bytes read: " << bytes_read);
if (h + NUM_BLOCKS_PER_CHUNK < start_height + 1)
{
h += NUM_BLOCKS_PER_CHUNK;
continue;
}
if (h > block_stop)
{
std::cout << refresh_string << "block " << h-1
@ -456,11 +475,16 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path
{
if ((h-1) % db_batch_size == 0)
{
uint64_t bytes, h2;
bool q2;
std::cout << refresh_string;
// zero-based height
std::cout << ENDL << "[- batch commit at height " << h-1 << " -]" << ENDL;
core.get_blockchain_storage().get_db().batch_stop();
core.get_blockchain_storage().get_db().batch_start(db_batch_size);
pos = import_file.tellg();
bytes = bootstrap.count_bytes(import_file, db_batch_size, h2, q2);
import_file.seekg(pos);
core.get_blockchain_storage().get_db().batch_start(db_batch_size, bytes);
std::cout << ENDL;
core.get_blockchain_storage().get_db().show_stats();
}
@ -477,6 +501,7 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path
}
} // while
quitting:
import_file.close();
if (opt_verify)

View file

@ -375,39 +375,15 @@ uint64_t BootstrapFile::seek_to_first_chunk(std::ifstream& import_file)
return full_header_size;
}
uint64_t BootstrapFile::count_blocks(const std::string& import_file_path)
uint64_t BootstrapFile::count_bytes(std::ifstream& import_file, uint64_t blocks, uint64_t& h, bool& quit)
{
boost::filesystem::path raw_file_path(import_file_path);
boost::system::error_code ec;
if (!boost::filesystem::exists(raw_file_path, ec))
{
MFATAL("bootstrap file not found: " << raw_file_path);
throw std::runtime_error("Aborting");
}
std::ifstream import_file;
import_file.open(import_file_path, std::ios_base::binary | std::ifstream::in);
uint64_t h = 0;
if (import_file.fail())
{
MFATAL("import_file.open() fail");
throw std::runtime_error("Aborting");
}
uint64_t full_header_size; // 4 byte magic + length of header structures
full_header_size = seek_to_first_chunk(import_file);
MINFO("Scanning blockchain from bootstrap file...");
block b;
bool quit = false;
uint64_t bytes_read = 0;
int progress_interval = 10;
uint32_t chunk_size;
char buf1[sizeof(chunk_size)];
std::string str1;
char buf1[2048];
while (! quit)
h = 0;
while (1)
{
uint32_t chunk_size;
import_file.read(buf1, sizeof(chunk_size));
if (!import_file) {
std::cout << refresh_string;
@ -415,15 +391,7 @@ uint64_t BootstrapFile::count_blocks(const std::string& import_file_path)
quit = true;
break;
}
h += NUM_BLOCKS_PER_CHUNK;
if ((h-1) % progress_interval == 0)
{
std::cout << "\r" << "block height: " << h-1 <<
" " <<
std::flush;
}
bytes_read += sizeof(chunk_size);
str1.assign(buf1, sizeof(chunk_size));
if (! ::serialization::parse_binary(str1, chunk_size))
throw std::runtime_error("Error in deserialization of chunk_size");
@ -456,6 +424,64 @@ uint64_t BootstrapFile::count_blocks(const std::string& import_file_path)
throw std::runtime_error("Aborting");
}
bytes_read += chunk_size;
h += NUM_BLOCKS_PER_CHUNK;
if (h >= blocks)
break;
}
return bytes_read;
}
uint64_t BootstrapFile::count_blocks(const std::string& import_file_path)
{
streampos dummy_pos;
uint64_t dummy_height = 0;
return count_blocks(import_file_path, dummy_pos, dummy_height);
}
// If seek_height is non-zero on entry, return a stream position <= this height when finished.
// And return the actual height corresponding to this position. Allows the caller to locate its
// starting position without having to reread the entire file again.
uint64_t BootstrapFile::count_blocks(const std::string& import_file_path, streampos &start_pos, uint64_t& seek_height)
{
boost::filesystem::path raw_file_path(import_file_path);
boost::system::error_code ec;
if (!boost::filesystem::exists(raw_file_path, ec))
{
MFATAL("bootstrap file not found: " << raw_file_path);
throw std::runtime_error("Aborting");
}
std::ifstream import_file;
import_file.open(import_file_path, std::ios_base::binary | std::ifstream::in);
uint64_t start_height = seek_height;
uint64_t h = 0;
if (import_file.fail())
{
MFATAL("import_file.open() fail");
throw std::runtime_error("Aborting");
}
uint64_t full_header_size; // 4 byte magic + length of header structures
full_header_size = seek_to_first_chunk(import_file);
MINFO("Scanning blockchain from bootstrap file...");
bool quit = false;
uint64_t bytes_read = 0, blocks;
int progress_interval = 10;
while (! quit)
{
if (start_height && h + progress_interval >= start_height - 1)
{
start_height = 0;
start_pos = import_file.tellg();
seek_height = h;
}
bytes_read += count_bytes(import_file, progress_interval, blocks, quit);
h += blocks;
std::cout << "\r" << "block height: " << h-1 <<
" " <<
std::flush;
// std::cout << refresh_string;
MDEBUG("Number bytes scanned: " << bytes_read);

View file

@ -56,6 +56,8 @@ class BootstrapFile
{
public:
uint64_t count_bytes(std::ifstream& import_file, uint64_t blocks, uint64_t& h, bool& quit);
uint64_t count_blocks(const std::string& dir_path, streampos& start_pos, uint64_t& seek_height);
uint64_t count_blocks(const std::string& dir_path);
uint64_t seek_to_first_chunk(std::ifstream& import_file);

View file

@ -3666,6 +3666,7 @@ bool Blockchain::prepare_handle_incoming_blocks(const std::list<block_complete_e
MTRACE("Blockchain::" << __func__);
TIME_MEASURE_START(prepare);
bool stop_batch;
uint64_t bytes = 0;
// Order of locking must be:
// m_incoming_tx_lock (optional)
@ -3687,7 +3688,15 @@ bool Blockchain::prepare_handle_incoming_blocks(const std::list<block_complete_e
if(blocks_entry.size() == 0)
return false;
while (!(stop_batch = m_db->batch_start(blocks_entry.size()))) {
for (const auto &entry : blocks_entry)
{
bytes += entry.block.size();
for (const auto &tx_blob : entry.txs)
{
bytes += tx_blob.size();
}
}
while (!(stop_batch = m_db->batch_start(blocks_entry.size(), bytes))) {
m_blockchain_lock.unlock();
m_tx_pool.unlock();
epee::misc_utils::sleep_no_w(1000);

View file

@ -53,7 +53,7 @@ public:
virtual std::string get_db_name() const { return std::string(); }
virtual bool lock() { return true; }
virtual void unlock() { }
virtual bool batch_start(uint64_t batch_num_blocks=0) { return true; }
virtual bool batch_start(uint64_t batch_num_blocks=0, uint64_t batch_bytes=0) { return true; }
virtual void batch_stop() {}
virtual void set_batch_transactions(bool) {}
virtual void block_txn_start(bool readonly=false) {}