Skip to content
Snippets Groups Projects
Commit 4ce077ee authored by kerz@chromium.org's avatar kerz@chromium.org
Browse files

Merge 75787 - Improved memory usage while applying patch.

Reduced total size of allocations from 520MB to 318MB.
The general technique is to allocate the correct size rather than grow into
the correct size and overshoot.

1. Find file sizes and allocate buffers of that size for the input files.

2. Pre-allocate a buffer for the collected inputs for the final diff.

3. Calculate the size for (2) during compression and include it in the patch
   header.

The courgette.exe command line tool now calls the same ApplyEnsemblePatch
entry point that is called by the installer.  This ensures measurements of
courgette.exe are a better reflection of the installer.

BUG=72459

Review URL: http://codereview.chromium.org/6546008

TBR=sra@chromium.org
Review URL: http://codereview.chromium.org/6602017

git-svn-id: svn://svn.chromium.org/chrome/branches/648/src@76289 0039d316-1c4b-4281-b951-d872f2087c98
parent b74a1b93
No related merge requests found
......@@ -52,7 +52,11 @@ std::string ReadOrFail(const std::wstring& file_name, const char* kind) {
#else
FilePath file_path(WideToASCII(file_name));
#endif
int64 file_size = 0;
if (!file_util::GetFileSize(file_path, &file_size))
Problem("Can't read %s file.", kind);
std::string buffer;
buffer.reserve(static_cast<size_t>(file_size));
if (!file_util::ReadFileToString(file_path, &buffer))
Problem("Can't read %s file.", kind);
return buffer;
......@@ -285,20 +289,48 @@ void GenerateEnsemblePatch(const std::wstring& old_file,
void ApplyEnsemblePatch(const std::wstring& old_file,
const std::wstring& patch_file,
const std::wstring& new_file) {
std::string old_buffer = ReadOrFail(old_file, "'old' input");
std::string patch_buffer = ReadOrFail(patch_file, "'patch' input");
// We do things a little differently here in order to call the same Courgette
// entry point as the installer. That entry point point takes file names and
// returns an status code but does not output any diagnostics.
#if defined(OS_WIN)
FilePath old_path(old_file);
FilePath patch_path(patch_file);
FilePath new_path(new_file);
#else
FilePath old_path(WideToASCII(old_file));
FilePath patch_path(WideToASCII(patch_file));
FilePath new_path(WideToASCII(new_file));
#endif
courgette::SourceStream old_stream;
courgette::SourceStream patch_stream;
old_stream.Init(old_buffer);
patch_stream.Init(patch_buffer);
courgette::SinkStream new_stream;
courgette::Status status =
courgette::ApplyEnsemblePatch(&old_stream, &patch_stream, &new_stream);
courgette::ApplyEnsemblePatch(old_path.value().c_str(),
patch_path.value().c_str(),
new_path.value().c_str());
if (status == courgette::C_OK)
return;
// Diagnose the error.
if (status == courgette::C_BAD_ENSEMBLE_MAGIC)
Problem("Not a courgette patch");
if (status == courgette::C_BAD_ENSEMBLE_VERSION)
Problem("Wrong version patch");
if (status == courgette::C_BAD_ENSEMBLE_HEADER)
Problem("Corrupt patch");
// If we failed due to a missing input file, this will
// print the message.
std::string old_buffer = ReadOrFail(old_file, "'old' input");
old_buffer.clear();
std::string patch_buffer = ReadOrFail(patch_file, "'patch' input");
patch_buffer.clear();
if (status != courgette::C_OK) Problem("-apply failed.");
// Non-input related errors:
if (status == courgette::C_WRITE_OPEN_ERROR)
Problem("Can't open output");
if (status == courgette::C_WRITE_ERROR)
Problem("Can't write output");
WriteSinkToFile(&new_stream, new_file);
Problem("-apply failed.");
}
void GenerateBSDiffPatch(const std::wstring& old_file,
......
......@@ -108,6 +108,7 @@ struct CourgettePatchFile {
// version
// source-checksum
// target-checksum
// final-patch-input-size (an allocation hint)
// multiple-streams:
// stream 0:
// number-of-transformed-elements (N) - varint32
......@@ -136,7 +137,7 @@ struct CourgettePatchFile {
static const uint32 kMagic = 'C' | ('o' << 8) | ('u' << 16);
static const uint32 kVersion = 20090320;
static const uint32 kVersion = 20110216;
// Transformation method IDs.
enum TransformationMethodId {
......
......@@ -8,6 +8,7 @@
#include "base/basictypes.h"
#include "base/file_util.h"
#include "base/logging.h"
#include "courgette/crc.h"
#include "courgette/image_info.h"
......@@ -63,6 +64,7 @@ class EnsemblePatchApplication {
uint32 source_checksum_;
uint32 target_checksum_;
uint32 final_patch_input_size_prediction_;
std::vector<TransformationPatcher*> patchers_;
......@@ -73,7 +75,8 @@ class EnsemblePatchApplication {
};
EnsemblePatchApplication::EnsemblePatchApplication()
: source_checksum_(0), target_checksum_(0) {
: source_checksum_(0), target_checksum_(0),
final_patch_input_size_prediction_(0) {
}
EnsemblePatchApplication::~EnsemblePatchApplication() {
......@@ -103,6 +106,9 @@ Status EnsemblePatchApplication::ReadHeader(SourceStream* header_stream) {
if (!header_stream->ReadVarint32(&target_checksum_))
return C_BAD_ENSEMBLE_HEADER;
if (!header_stream->ReadVarint32(&final_patch_input_size_prediction_))
return C_BAD_ENSEMBLE_HEADER;
return C_OK;
}
......@@ -214,6 +220,8 @@ Status EnsemblePatchApplication::TransformDown(
SinkStream* basic_elements) {
// Construct blob of original input followed by reformed elements.
basic_elements->Reserve(final_patch_input_size_prediction_);
// The original input:
basic_elements->Write(base_region_.start(), base_region_.length());
......@@ -231,6 +239,9 @@ Status EnsemblePatchApplication::TransformDown(
if (!transformed_elements->Empty())
return C_STREAM_NOT_CONSUMED;
// We have totally consumed transformed_elements, so can free the
// storage to which it referred.
corrected_elements_storage_.Retire();
return C_OK;
}
......@@ -374,13 +385,21 @@ Status ApplyEnsemblePatch(const FilePath::CharType* old_file_name,
return status;
// Header smells good so read the whole patch file for real.
int64 patch_file_size = 0;
if (!file_util::GetFileSize(patch_file_path, &patch_file_size))
return C_READ_ERROR;
std::string patch_file_buffer;
patch_file_buffer.reserve(static_cast<size_t>(patch_file_size));
if (!file_util::ReadFileToString(patch_file_path, &patch_file_buffer))
return C_READ_ERROR;
// Read the old_file.
FilePath old_file_path(old_file_name);
int64 old_file_size = 0;
if (!file_util::GetFileSize(old_file_path, &old_file_size))
return C_READ_ERROR;
std::string old_file_buffer;
old_file_buffer.reserve(static_cast<size_t>(old_file_size));
if (!file_util::ReadFileToString(old_file_path, &old_file_buffer))
return C_READ_ERROR;
......
......@@ -351,6 +351,9 @@ Status GenerateEnsemblePatch(SourceStream* base,
if (delta2_status != C_OK)
return delta2_status;
// Last use, free storage.
linearized_predicted_transformed_elements.Retire();
//
// Generate sub-patch for whole enchilada.
//
......@@ -381,8 +384,12 @@ Status GenerateEnsemblePatch(SourceStream* base,
if (!corrected_transformed_elements_source_set.Empty())
return C_STREAM_NOT_CONSUMED;
// No more references to this stream's buffer.
linearized_corrected_transformed_elements.Retire();
FreeGenerators(&generators);
size_t final_patch_input_size = predicted_ensemble.Length();
SourceStream predicted_ensemble_source;
predicted_ensemble_source.Init(predicted_ensemble);
Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
......@@ -401,6 +408,7 @@ Status GenerateEnsemblePatch(SourceStream* base,
CalculateCrc(old_region.start(), old_region.length()));
final_patch->WriteVarint32(
CalculateCrc(new_region.start(), new_region.length()));
final_patch->WriteSizeVarint32(final_patch_input_size);
if (!patch_streams.CopyTo(final_patch))
return C_STREAM_ERROR;
......
......@@ -211,8 +211,12 @@ void SinkStream::WriteSizeVarint32(size_t value) {
void SinkStream::Append(SinkStream* other) {
Write(other->buffer_.c_str(), other->buffer_.size());
other->buffer_.clear();
other->buffer_.reserve(0); // Non-binding request to reduce storage.
other->Retire();
}
void SinkStream::Retire() {
buffer_.clear();
buffer_.reserve(0); // Non-binding request to reduce storage.
}
////////////////////////////////////////////////////////////////////////////////
......@@ -335,6 +339,14 @@ void SinkStreamSet::CopyHeaderTo(SinkStream* header) {
bool SinkStreamSet::CopyTo(SinkStream *combined_stream) {
SinkStream header;
CopyHeaderTo(&header);
// Reserve the correct amount of storage.
size_t length = header.Length();
for (size_t i = 0; i < count_; ++i) {
length += stream(i)->Length();
}
combined_stream->Reserve(length);
combined_stream->Append(&header);
for (size_t i = 0; i < count_; ++i) {
combined_stream->Append(stream(i));
......
......@@ -147,6 +147,9 @@ class SinkStream {
// Hints that the stream will grow by an additional |length| bytes.
void Reserve(size_t length) { buffer_.reserve(length + buffer_.length()); }
// Finished with this stream and any storage it has.
void Retire();
private:
std::string buffer_; // Use a string to manage the stream's memory.
......@@ -187,6 +190,11 @@ class SourceStreamSet {
DISALLOW_COPY_AND_ASSIGN(SourceStreamSet);
};
// A SinkStreamSet is a set of SinkStreams. Data is collected by writing to the
// component streams. When data collection is complete, it is destructively
// transferred, either by flattening into one stream (CopyTo), or transfering
// data pairwise into another SinkStreamSet by calling that SinkStreamSet's
// WriteSet method.
class SinkStreamSet {
public:
SinkStreamSet();
......@@ -199,8 +207,8 @@ class SinkStreamSet {
// Returns a pointer to a substream.
SinkStream* stream(size_t id) { return id < count_ ? &streams_[id] : NULL; }
// CopyTo serializes the streams in the SinkStreamSet into a single target
// stream or file. The serialized format may be re-read by initializing a
// CopyTo serializes the streams in this SinkStreamSet into a single target
// stream. The serialized format may be re-read by initializing a
// SourceStreamSet with a buffer containing the data.
bool CopyTo(SinkStream* combined_stream);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment