diff options
Diffstat (limited to 'lib/blobchunk.c')
-rw-r--r-- | lib/blobchunk.c | 430 |
1 files changed, 309 insertions, 121 deletions
diff --git a/lib/blobchunk.c b/lib/blobchunk.c index 3ff0f48..e4d0bad 100644 --- a/lib/blobchunk.c +++ b/lib/blobchunk.c @@ -14,77 +14,98 @@ #include <unistd.h> struct erofs_blobchunk { - struct hashmap_entry ent; + union { + struct hashmap_entry ent; + struct list_head list; + }; char sha256[32]; - erofs_off_t chunksize; + unsigned int device_id; + union { + erofs_off_t chunksize; + erofs_off_t sourceoffset; + }; erofs_blk_t blkaddr; }; static struct hashmap blob_hashmap; static FILE *blobfile; static erofs_blk_t remapped_base; +static erofs_off_t datablob_size; static bool multidev; static struct erofs_buffer_head *bh_devt; struct erofs_blobchunk erofs_holechunk = { .blkaddr = EROFS_NULL_ADDR, }; +static LIST_HEAD(unhashed_blobchunks); -static struct erofs_blobchunk *erofs_blob_getchunk(int fd, - erofs_off_t chunksize) +struct erofs_blobchunk *erofs_get_unhashed_chunk(unsigned int device_id, + erofs_blk_t blkaddr, erofs_off_t sourceoffset) { - static u8 zeroed[EROFS_BLKSIZ]; - u8 *chunkdata, sha256[32]; - int ret; - unsigned int hash; - erofs_off_t blkpos; struct erofs_blobchunk *chunk; - chunkdata = malloc(chunksize); - if (!chunkdata) + chunk = calloc(1, sizeof(struct erofs_blobchunk)); + if (!chunk) return ERR_PTR(-ENOMEM); - ret = read(fd, chunkdata, chunksize); - if (ret < chunksize) { - chunk = ERR_PTR(-EIO); - goto out; - } - erofs_sha256(chunkdata, chunksize, sha256); + chunk->device_id = device_id; + chunk->blkaddr = blkaddr; + chunk->sourceoffset = sourceoffset; + list_add_tail(&chunk->list, &unhashed_blobchunks); + return chunk; +} + +static struct erofs_blobchunk *erofs_blob_getchunk(struct erofs_sb_info *sbi, + u8 *buf, erofs_off_t chunksize) +{ + static u8 zeroed[EROFS_MAX_BLOCK_SIZE]; + struct erofs_blobchunk *chunk; + unsigned int hash, padding; + u8 sha256[32]; + erofs_off_t blkpos; + int ret; + + erofs_sha256(buf, chunksize, sha256); hash = memhash(sha256, sizeof(sha256)); chunk = hashmap_get_from_hash(&blob_hashmap, hash, sha256); if (chunk) { DBG_BUGON(chunksize != chunk->chunksize); - goto out; + sbi->saved_by_deduplication += chunksize; + erofs_dbg("Found duplicated chunk at %u", chunk->blkaddr); + return chunk; } + chunk = malloc(sizeof(struct erofs_blobchunk)); - if (!chunk) { - chunk = ERR_PTR(-ENOMEM); - goto out; - } + if (!chunk) + return ERR_PTR(-ENOMEM); chunk->chunksize = chunksize; - blkpos = ftell(blobfile); - DBG_BUGON(erofs_blkoff(blkpos)); - chunk->blkaddr = erofs_blknr(blkpos); memcpy(chunk->sha256, sha256, sizeof(sha256)); - hashmap_entry_init(&chunk->ent, hash); - hashmap_add(&blob_hashmap, chunk); + blkpos = ftell(blobfile); + DBG_BUGON(erofs_blkoff(sbi, blkpos)); + + if (sbi->extra_devices) + chunk->device_id = 1; + else + chunk->device_id = 0; + chunk->blkaddr = erofs_blknr(sbi, blkpos); erofs_dbg("Writing chunk (%u bytes) to %u", chunksize, chunk->blkaddr); - ret = fwrite(chunkdata, chunksize, 1, blobfile); - if (ret == 1 && erofs_blkoff(chunksize)) - ret = fwrite(zeroed, EROFS_BLKSIZ - erofs_blkoff(chunksize), - 1, blobfile); - if (ret < 1) { - struct hashmap_entry key; + ret = fwrite(buf, chunksize, 1, blobfile); + if (ret == 1) { + padding = erofs_blkoff(sbi, chunksize); + if (padding) { + padding = erofs_blksiz(sbi) - padding; + ret = fwrite(zeroed, padding, 1, blobfile); + } + } - hashmap_entry_init(&key, hash); - hashmap_remove(&blob_hashmap, &key, sha256); + if (ret < 1) { free(chunk); - chunk = ERR_PTR(-ENOSPC); - goto out; + return ERR_PTR(-ENOSPC); } -out: - free(chunkdata); + + hashmap_entry_init(&chunk->ent, hash); + hashmap_add(&blob_hashmap, chunk); return chunk; } @@ -107,109 +128,151 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, { struct erofs_inode_chunk_index idx = {0}; erofs_blk_t extent_start = EROFS_NULL_ADDR; - erofs_blk_t extent_end, extents_blks; + erofs_blk_t extent_end, chunkblks; + erofs_off_t source_offset; unsigned int dst, src, unit; bool first_extent = true; - erofs_blk_t base_blkaddr = 0; - - if (multidev) { - idx.device_id = 1; - DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)); - } else { - base_blkaddr = remapped_base; - } if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) unit = sizeof(struct erofs_inode_chunk_index); else unit = EROFS_BLOCK_MAP_ENTRY_SIZE; + chunkblks = 1U << (inode->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); for (dst = src = 0; dst < inode->extent_isize; src += sizeof(void *), dst += unit) { struct erofs_blobchunk *chunk; chunk = *(void **)(inode->chunkindexes + src); - if (chunk->blkaddr != EROFS_NULL_ADDR) - idx.blkaddr = base_blkaddr + chunk->blkaddr; - else + if (chunk->blkaddr == EROFS_NULL_ADDR) { idx.blkaddr = EROFS_NULL_ADDR; - - if (extent_start != EROFS_NULL_ADDR && - idx.blkaddr == extent_end + 1) { - extent_end = idx.blkaddr; + } else if (chunk->device_id) { + DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)); + idx.blkaddr = chunk->blkaddr; + extent_start = EROFS_NULL_ADDR; } else { + idx.blkaddr = remapped_base + chunk->blkaddr; + } + + if (extent_start == EROFS_NULL_ADDR || + idx.blkaddr != extent_end) { if (extent_start != EROFS_NULL_ADDR) { + tarerofs_blocklist_write(extent_start, + extent_end - extent_start, + source_offset); erofs_droid_blocklist_write_extent(inode, extent_start, - (extent_end - extent_start) + 1, + extent_end - extent_start, first_extent, false); first_extent = false; } extent_start = idx.blkaddr; - extent_end = idx.blkaddr; + source_offset = chunk->sourceoffset; } + extent_end = idx.blkaddr + chunkblks; + idx.device_id = cpu_to_le16(chunk->device_id); + idx.blkaddr = cpu_to_le32(idx.blkaddr); + if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE) memcpy(inode->chunkindexes + dst, &idx.blkaddr, unit); else memcpy(inode->chunkindexes + dst, &idx, sizeof(idx)); } off = roundup(off, unit); + if (extent_start != EROFS_NULL_ADDR) + tarerofs_blocklist_write(extent_start, extent_end - extent_start, + source_offset); + erofs_droid_blocklist_write_extent(inode, extent_start, + extent_start == EROFS_NULL_ADDR ? + 0 : extent_end - extent_start, + first_extent, true); + + return dev_write(inode->sbi, inode->chunkindexes, off, inode->extent_isize); +} + +int erofs_blob_mergechunks(struct erofs_inode *inode, unsigned int chunkbits, + unsigned int new_chunkbits) +{ + struct erofs_sb_info *sbi = inode->sbi; + unsigned int dst, src, unit, count; - if (extent_start == EROFS_NULL_ADDR) - extents_blks = 0; + if (new_chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) + new_chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; + if (chunkbits >= new_chunkbits) /* no need to merge */ + goto out; + + if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) + unit = sizeof(struct erofs_inode_chunk_index); else - extents_blks = (extent_end - extent_start) + 1; - erofs_droid_blocklist_write_extent(inode, extent_start, extents_blks, - first_extent, true); + unit = EROFS_BLOCK_MAP_ENTRY_SIZE; + + count = round_up(inode->i_size, 1ULL << new_chunkbits) >> new_chunkbits; + for (dst = src = 0; dst < count; ++dst) { + *((void **)inode->chunkindexes + dst) = + *((void **)inode->chunkindexes + src); + src += 1U << (new_chunkbits - chunkbits); + } - return dev_write(inode->chunkindexes, off, inode->extent_isize); + DBG_BUGON(count * unit >= inode->extent_isize); + inode->extent_isize = count * unit; + chunkbits = new_chunkbits; +out: + inode->u.chunkformat = (chunkbits - sbi->blkszbits) | + (inode->u.chunkformat & ~EROFS_CHUNK_FORMAT_BLKBITS_MASK); + return 0; } -int erofs_blob_write_chunked_file(struct erofs_inode *inode) +int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd, + erofs_off_t startoff) { + struct erofs_sb_info *sbi = inode->sbi; unsigned int chunkbits = cfg.c_chunkbits; unsigned int count, unit; + struct erofs_blobchunk *chunk, *lastch; struct erofs_inode_chunk_index *idx; erofs_off_t pos, len, chunksize; - int fd, ret; + erofs_blk_t lb, minextblks; + u8 *chunkdata; + int ret; - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; #ifdef SEEK_DATA /* if the file is fully sparsed, use one big chunk instead */ - if (lseek(fd, 0, SEEK_DATA) < 0 && errno == ENXIO) { + if (lseek(fd, startoff, SEEK_DATA) < 0 && errno == ENXIO) { chunkbits = ilog2(inode->i_size - 1) + 1; - if (chunkbits < LOG_BLOCK_SIZE) - chunkbits = LOG_BLOCK_SIZE; + if (chunkbits < sbi->blkszbits) + chunkbits = sbi->blkszbits; } #endif - if (chunkbits - LOG_BLOCK_SIZE > EROFS_CHUNK_FORMAT_BLKBITS_MASK) - chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + LOG_BLOCK_SIZE; + if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) + chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; chunksize = 1ULL << chunkbits; count = DIV_ROUND_UP(inode->i_size, chunksize); - inode->u.chunkformat |= chunkbits - LOG_BLOCK_SIZE; - if (multidev) - inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; + if (sbi->extra_devices) + inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) unit = sizeof(struct erofs_inode_chunk_index); else unit = EROFS_BLOCK_MAP_ENTRY_SIZE; - inode->extent_isize = count * unit; - idx = malloc(count * max(sizeof(*idx), sizeof(void *))); - if (!idx) { - close(fd); + chunkdata = malloc(chunksize); + if (!chunkdata) return -ENOMEM; + + inode->extent_isize = count * unit; + inode->chunkindexes = malloc(count * max(sizeof(*idx), sizeof(void *))); + if (!inode->chunkindexes) { + ret = -ENOMEM; + goto err; } - inode->chunkindexes = idx; + idx = inode->chunkindexes; + lastch = NULL; + minextblks = BLK_ROUND_UP(sbi, inode->i_size); for (pos = 0; pos < inode->i_size; pos += len) { - struct erofs_blobchunk *chunk; #ifdef SEEK_DATA - off_t offset = lseek(fd, pos, SEEK_DATA); + off_t offset = lseek(fd, pos + startoff, SEEK_DATA); if (offset < 0) { if (errno != ENXIO) @@ -217,7 +280,16 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode) else offset = ((pos >> chunkbits) + 1) << chunkbits; } else { - offset &= ~(chunksize - 1); + offset -= startoff; + + if (offset != (offset & ~(chunksize - 1))) { + offset &= ~(chunksize - 1); + if (lseek(fd, offset + startoff, SEEK_SET) != + startoff + offset) { + ret = -EIO; + goto err; + } + } } if (offset > pos) { @@ -227,76 +299,189 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode) pos += chunksize; } while (pos < offset); DBG_BUGON(pos != offset); + lastch = NULL; continue; } #endif len = min_t(u64, inode->i_size - pos, chunksize); - chunk = erofs_blob_getchunk(fd, len); + ret = read(fd, chunkdata, len); + if (ret < len) { + ret = -EIO; + goto err; + } + + chunk = erofs_blob_getchunk(sbi, chunkdata, len); if (IS_ERR(chunk)) { ret = PTR_ERR(chunk); goto err; } + + if (lastch && (lastch->device_id != chunk->device_id || + erofs_pos(sbi, lastch->blkaddr) + lastch->chunksize != + erofs_pos(sbi, chunk->blkaddr))) { + lb = lowbit(pos >> sbi->blkszbits); + if (lb && lb < minextblks) + minextblks = lb; + } *(void **)idx++ = chunk; + lastch = chunk; } inode->datalayout = EROFS_INODE_CHUNK_BASED; - close(fd); - return 0; + free(chunkdata); + return erofs_blob_mergechunks(inode, chunkbits, + ilog2(minextblks) + sbi->blkszbits); err: - close(fd); free(inode->chunkindexes); inode->chunkindexes = NULL; + free(chunkdata); return ret; } -int erofs_blob_remap(void) +int tarerofs_write_chunkes(struct erofs_inode *inode, erofs_off_t data_offset) +{ + struct erofs_sb_info *sbi = inode->sbi; + unsigned int chunkbits = ilog2(inode->i_size - 1) + 1; + unsigned int count, unit, device_id; + erofs_off_t chunksize, len, pos; + erofs_blk_t blkaddr; + struct erofs_inode_chunk_index *idx; + + if (chunkbits < sbi->blkszbits) + chunkbits = sbi->blkszbits; + if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) + chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; + + inode->u.chunkformat |= chunkbits - sbi->blkszbits; + if (sbi->extra_devices) { + device_id = 1; + inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; + unit = sizeof(struct erofs_inode_chunk_index); + DBG_BUGON(erofs_blkoff(sbi, data_offset)); + blkaddr = erofs_blknr(sbi, data_offset); + } else { + device_id = 0; + unit = EROFS_BLOCK_MAP_ENTRY_SIZE; + DBG_BUGON(erofs_blkoff(sbi, datablob_size)); + blkaddr = erofs_blknr(sbi, datablob_size); + datablob_size += round_up(inode->i_size, erofs_blksiz(sbi)); + } + chunksize = 1ULL << chunkbits; + count = DIV_ROUND_UP(inode->i_size, chunksize); + + inode->extent_isize = count * unit; + idx = calloc(count, max(sizeof(*idx), sizeof(void *))); + if (!idx) + return -ENOMEM; + inode->chunkindexes = idx; + + for (pos = 0; pos < inode->i_size; pos += len) { + struct erofs_blobchunk *chunk; + + len = min_t(erofs_off_t, inode->i_size - pos, chunksize); + + chunk = erofs_get_unhashed_chunk(device_id, blkaddr, + data_offset); + if (IS_ERR(chunk)) { + free(inode->chunkindexes); + inode->chunkindexes = NULL; + return PTR_ERR(chunk); + } + + *(void **)idx++ = chunk; + blkaddr += erofs_blknr(sbi, len); + data_offset += len; + } + inode->datalayout = EROFS_INODE_CHUNK_BASED; + return 0; +} + +int erofs_mkfs_dump_blobs(struct erofs_sb_info *sbi) { struct erofs_buffer_head *bh; ssize_t length; erofs_off_t pos_in, pos_out; ssize_t ret; - fflush(blobfile); - length = ftell(blobfile); - if (length < 0) - return -errno; - if (multidev) { - struct erofs_deviceslot dis = { - .blocks = erofs_blknr(length), - }; + if (blobfile) { + fflush(blobfile); + length = ftell(blobfile); + if (length < 0) + return -errno; - pos_out = erofs_btell(bh_devt, false); - ret = dev_write(&dis, pos_out, sizeof(dis)); - if (ret) - return ret; + if (sbi->extra_devices) + sbi->devs[0].blocks = erofs_blknr(sbi, length); + else + datablob_size = length; + } + if (sbi->extra_devices) { + unsigned int i, ret; + erofs_blk_t nblocks; + + nblocks = erofs_mapbh(NULL); + pos_out = erofs_btell(bh_devt, false); + i = 0; + do { + struct erofs_deviceslot dis = { + .mapped_blkaddr = cpu_to_le32(nblocks), + .blocks = cpu_to_le32(sbi->devs[i].blocks), + }; + + memcpy(dis.tag, sbi->devs[i].tag, sizeof(dis.tag)); + ret = dev_write(sbi, &dis, pos_out, sizeof(dis)); + if (ret) + return ret; + pos_out += sizeof(dis); + nblocks += sbi->devs[i].blocks; + } while (++i < sbi->extra_devices); bh_devt->op = &erofs_drop_directly_bhops; erofs_bdrop(bh_devt, false); return 0; } - if (!length) /* bail out if there is no chunked data */ - return 0; - bh = erofs_balloc(DATA, length, 0, 0); + + bh = erofs_balloc(DATA, blobfile ? datablob_size : 0, 0, 0); if (IS_ERR(bh)) return PTR_ERR(bh); erofs_mapbh(bh->block); + pos_out = erofs_btell(bh, false); - pos_in = 0; - remapped_base = erofs_blknr(pos_out); - ret = erofs_copy_file_range(fileno(blobfile), &pos_in, - erofs_devfd, &pos_out, length); + remapped_base = erofs_blknr(sbi, pos_out); + if (blobfile) { + pos_in = 0; + ret = erofs_copy_file_range(fileno(blobfile), &pos_in, + sbi->devfd, &pos_out, datablob_size); + ret = ret < datablob_size ? -EIO : 0; + } else { + ret = 0; + } bh->op = &erofs_drop_directly_bhops; erofs_bdrop(bh, false); - return ret < length ? -EIO : 0; + return ret; } void erofs_blob_exit(void) { + struct hashmap_iter iter; + struct hashmap_entry *e; + struct erofs_blobchunk *bc, *n; + if (blobfile) fclose(blobfile); - hashmap_free(&blob_hashmap, 1); + while ((e = hashmap_iter_first(&blob_hashmap, &iter))) { + bc = container_of((struct hashmap_entry *)e, + struct erofs_blobchunk, ent); + DBG_BUGON(hashmap_remove(&blob_hashmap, e) != e); + free(bc); + } + DBG_BUGON(hashmap_free(&blob_hashmap)); + + list_for_each_entry_safe(bc, n, &unhashed_blobchunks, list) { + list_del(&bc->list); + free(bc); + } } int erofs_blob_init(const char *blobfile_path) @@ -319,22 +504,25 @@ int erofs_blob_init(const char *blobfile_path) return 0; } -int erofs_generate_devtable(void) +int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices) { - struct erofs_deviceslot dis; - - if (!multidev) + if (!devices) return 0; - bh_devt = erofs_balloc(DEVT, sizeof(dis), 0, 0); - if (IS_ERR(bh_devt)) - return PTR_ERR(bh_devt); + sbi->devs = calloc(devices, sizeof(sbi->devs[0])); + if (!sbi->devs) + return -ENOMEM; - dis = (struct erofs_deviceslot) {}; + bh_devt = erofs_balloc(DEVT, + sizeof(struct erofs_deviceslot) * devices, 0, 0); + if (IS_ERR(bh_devt)) { + free(sbi->devs); + return PTR_ERR(bh_devt); + } erofs_mapbh(bh_devt->block); bh_devt->op = &erofs_skip_write_bhops; - sbi.devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE; - sbi.extra_devices = 1; - erofs_sb_set_device_table(); + sbi->devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE; + sbi->extra_devices = devices; + erofs_sb_set_device_table(sbi); return 0; } |