aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsanjay@google.com <sanjay@google.com@62dab493-f737-651d-591e-8d6aee1b9529>2012-03-15 16:24:26 +0000
committersanjay@google.com <sanjay@google.com@62dab493-f737-651d-591e-8d6aee1b9529>2012-03-15 16:24:26 +0000
commite83668fa64e932a64712c99398be0acfe75367af (patch)
treee5c2bed9104f74183981e1ff254074b362f6a856
parentf168d0177b095ac7a608f6aafb9efc96976b6b3c (diff)
downloadsrc-e83668fa64e932a64712c99398be0acfe75367af.tar.gz
use mmap on 64-bit machines to speed-up reads; small build fixes
git-svn-id: http://leveldb.googlecode.com/svn/trunk@62 62dab493-f737-651d-591e-8d6aee1b9529
-rw-r--r--Makefile36
-rw-r--r--table/block.cc9
-rw-r--r--table/block.h6
-rw-r--r--table/format.cc19
-rw-r--r--table/format.h5
-rw-r--r--table/table.cc11
-rw-r--r--table/table_test.cc2
-rw-r--r--util/env_posix.cc50
8 files changed, 99 insertions, 39 deletions
diff --git a/Makefile b/Makefile
index d337905..c284d4c 100644
--- a/Makefile
+++ b/Makefile
@@ -124,65 +124,65 @@ db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@
db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL)
- $(CXX) $(LDFLAGS) -lsqlite3 doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@
+ $(CXX) -lsqlite3 doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LDFLAGS
db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) -lkyotocabinet doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) -o $@
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
+ $(CXX) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
$(MEMENVLIBRARY) : helpers/memenv/memenv.o
rm -f $@
$(AR) -rs $@ helpers/memenv/memenv.o
memenv_test : helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@
+ $(CXX) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@ $(LDFLAGS)
ifeq ($(PLATFORM), IOS)
# For iOS, create universal object files to be used on both the simulator and
diff --git a/table/block.cc b/table/block.cc
index 40aa318..06eb6f8 100644
--- a/table/block.cc
+++ b/table/block.cc
@@ -19,9 +19,10 @@ inline uint32_t Block::NumRestarts() const {
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
}
-Block::Block(const char* data, size_t size)
+Block::Block(const char* data, size_t size, bool take_ownership)
: data_(data),
- size_(size) {
+ size_(size),
+ owned_(take_ownership) {
if (size_ < sizeof(uint32_t)) {
size_ = 0; // Error marker
} else {
@@ -35,7 +36,9 @@ Block::Block(const char* data, size_t size)
}
Block::~Block() {
- delete[] data_;
+ if (owned_) {
+ delete[] data_;
+ }
}
// Helper routine: decode the next block entry starting at "p",
diff --git a/table/block.h b/table/block.h
index 9eb6f02..76088a4 100644
--- a/table/block.h
+++ b/table/block.h
@@ -16,8 +16,9 @@ class Comparator;
class Block {
public:
// Initialize the block with the specified contents.
- // Takes ownership of data[] and will delete[] it when done.
- Block(const char* data, size_t size);
+ // Takes ownership of data[] and will delete[] it when done iff
+ // "take_ownership is true.
+ Block(const char* data, size_t size, bool take_ownership);
~Block();
@@ -30,6 +31,7 @@ class Block {
const char* data_;
size_t size_;
uint32_t restart_offset_; // Offset in data_ of restart array
+ bool owned_; // Block owns data_[]
// No copying allowed
Block(const Block&);
diff --git a/table/format.cc b/table/format.cc
index 23728d8..25b85a2 100644
--- a/table/format.cc
+++ b/table/format.cc
@@ -66,8 +66,10 @@ Status Footer::DecodeFrom(Slice* input) {
Status ReadBlock(RandomAccessFile* file,
const ReadOptions& options,
const BlockHandle& handle,
- Block** block) {
+ Block** block,
+ bool* may_cache) {
*block = NULL;
+ *may_cache = false;
// Read the block contents as well as the type/crc footer.
// See table_builder.cc for the code that built this structure.
@@ -100,8 +102,14 @@ Status ReadBlock(RandomAccessFile* file,
case kNoCompression:
if (data != buf) {
// File implementation gave us pointer to some other data.
- // Copy into buf[].
- memcpy(buf, data, n + kBlockTrailerSize);
+ // Use it directly under the assumption that it will be live
+ // while the file is open.
+ delete[] buf;
+ *block = new Block(data, n, false /* do not take ownership */);
+ *may_cache = false; // Do not double-cache
+ } else {
+ *block = new Block(buf, n, true /* take ownership */);
+ *may_cache = true;
}
// Ok
@@ -119,8 +127,8 @@ Status ReadBlock(RandomAccessFile* file,
return Status::Corruption("corrupted compressed block contents");
}
delete[] buf;
- buf = ubuf;
- n = ulength;
+ *block = new Block(ubuf, ulength, true /* take ownership */);
+ *may_cache = true;
break;
}
default:
@@ -128,7 +136,6 @@ Status ReadBlock(RandomAccessFile* file,
return Status::Corruption("bad block type");
}
- *block = new Block(buf, n); // Block takes ownership of buf[]
return Status::OK();
}
diff --git a/table/format.h b/table/format.h
index 2a3e1ac..66a15da 100644
--- a/table/format.h
+++ b/table/format.h
@@ -86,10 +86,13 @@ static const size_t kBlockTrailerSize = 5;
// Read the block identified by "handle" from "file". On success,
// store a pointer to the heap-allocated result in *block and return
// OK. On failure store NULL in *block and return non-OK.
+// On success, stores true in *may_cache if the result may be
+// cached, false if it must not be cached.
extern Status ReadBlock(RandomAccessFile* file,
const ReadOptions& options,
const BlockHandle& handle,
- Block** block);
+ Block** block,
+ bool* may_cache);
// Implementation details follow. Clients should ignore,
diff --git a/table/table.cc b/table/table.cc
index 5f9238e..07dcffd 100644
--- a/table/table.cc
+++ b/table/table.cc
@@ -49,7 +49,9 @@ Status Table::Open(const Options& options,
// Read the index block
Block* index_block = NULL;
if (s.ok()) {
- s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block);
+ bool may_cache; // Ignored result
+ s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block,
+ &may_cache);
}
if (s.ok()) {
@@ -105,6 +107,7 @@ Iterator* Table::BlockReader(void* arg,
// can add more features in the future.
if (s.ok()) {
+ bool may_cache;
if (block_cache != NULL) {
char cache_key_buffer[16];
EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
@@ -114,14 +117,14 @@ Iterator* Table::BlockReader(void* arg,
if (cache_handle != NULL) {
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
} else {
- s = ReadBlock(table->rep_->file, options, handle, &block);
- if (s.ok() && options.fill_cache) {
+ s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
+ if (s.ok() && may_cache && options.fill_cache) {
cache_handle = block_cache->Insert(
key, block, block->size(), &DeleteCachedBlock);
}
}
} else {
- s = ReadBlock(table->rep_->file, options, handle, &block);
+ s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
}
}
diff --git a/table/table_test.cc b/table/table_test.cc
index cd85b4b..0c8e676 100644
--- a/table/table_test.cc
+++ b/table/table_test.cc
@@ -205,7 +205,7 @@ class BlockConstructor: public Constructor {
block_size_ = block_data.size();
char* block_data_copy = new char[block_size_];
memcpy(block_data_copy, block_data.data(), block_size_);
- block_ = new Block(block_data_copy, block_size_);
+ block_ = new Block(block_data_copy, block_size_, true /* take ownership */);
return Status::OK();
}
virtual size_t NumBytes() const { return block_size_; }
diff --git a/util/env_posix.cc b/util/env_posix.cc
index cc73348..cb1f6fc 100644
--- a/util/env_posix.cc
+++ b/util/env_posix.cc
@@ -66,6 +66,7 @@ class PosixSequentialFile: public SequentialFile {
}
};
+// pread() based random-access
class PosixRandomAccessFile: public RandomAccessFile {
private:
std::string filename_;
@@ -89,6 +90,32 @@ class PosixRandomAccessFile: public RandomAccessFile {
}
};
+// mmap() based random-access
+class PosixMmapReadableFile: public RandomAccessFile {
+ private:
+ std::string filename_;
+ void* mmapped_region_;
+ size_t length_;
+
+ public:
+ // base[0,length-1] contains the mmapped contents of the file.
+ PosixMmapReadableFile(const std::string& fname, void* base, size_t length)
+ : filename_(fname), mmapped_region_(base), length_(length) { }
+ virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); }
+
+ virtual Status Read(uint64_t offset, size_t n, Slice* result,
+ char* scratch) const {
+ Status s;
+ if (offset + n > length_) {
+ *result = Slice();
+ s = IOError(filename_, EINVAL);
+ } else {
+ *result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
+ }
+ return s;
+ }
+};
+
// We preallocate up to an extra megabyte and use memcpy to append new
// data to the file. This is safe since we either properly close the
// file before reading from it, or for log files, the reading code
@@ -297,13 +324,28 @@ class PosixEnv : public Env {
virtual Status NewRandomAccessFile(const std::string& fname,
RandomAccessFile** result) {
+ *result = NULL;
+ Status s;
int fd = open(fname.c_str(), O_RDONLY);
if (fd < 0) {
- *result = NULL;
- return IOError(fname, errno);
+ s = IOError(fname, errno);
+ } else if (sizeof(void*) >= 8) {
+ // Use mmap when virtual address-space is plentiful.
+ uint64_t size;
+ s = GetFileSize(fname, &size);
+ if (s.ok()) {
+ void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
+ if (base != MAP_FAILED) {
+ *result = new PosixMmapReadableFile(fname, base, size);
+ } else {
+ s = IOError(fname, errno);
+ }
+ }
+ close(fd);
+ } else {
+ *result = new PosixRandomAccessFile(fname, fd);
}
- *result = new PosixRandomAccessFile(fname, fd);
- return Status::OK();
+ return s;
}
virtual Status NewWritableFile(const std::string& fname,