2015-06-20 17:40:55 +00:00
|
|
|
/*
|
|
|
|
* unzip implementation for Bled/busybox
|
|
|
|
*
|
2023-06-28 16:39:39 +00:00
|
|
|
* Copyright © 2015-2023 Pete Batard <pete@akeo.ie>
|
2015-06-20 17:40:55 +00:00
|
|
|
* Based on mini unzip implementation for busybox © Ed Clark
|
|
|
|
* Loosely based on original busybox unzip applet © Laurence Anderson.
|
|
|
|
*
|
|
|
|
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "libbb.h"
|
|
|
|
#include "bb_archive.h"
|
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
#if 0
|
|
|
|
# define dbg(...) bb_printf(__VA_ARGS__)
|
|
|
|
#else
|
|
|
|
# define dbg(...) ((void)0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define xread safe_read
|
|
|
|
|
2015-06-20 17:40:55 +00:00
|
|
|
enum {
|
|
|
|
#if BB_BIG_ENDIAN
|
|
|
|
ZIP_FILEHEADER_MAGIC = 0x504b0304,
|
2023-06-28 16:39:39 +00:00
|
|
|
ZIP_CDF_MAGIC = 0x504b0102, /* CDF item */
|
|
|
|
ZIP_CDE_MAGIC = 0x504b0506, /* End of CDF */
|
|
|
|
ZIP64_CDE_MAGIC = 0x504b0606, /* End of Zip64 CDF */
|
2015-06-20 17:40:55 +00:00
|
|
|
ZIP_DD_MAGIC = 0x504b0708,
|
|
|
|
#else
|
|
|
|
ZIP_FILEHEADER_MAGIC = 0x04034b50,
|
|
|
|
ZIP_CDF_MAGIC = 0x02014b50,
|
|
|
|
ZIP_CDE_MAGIC = 0x06054b50,
|
2023-06-28 16:39:39 +00:00
|
|
|
ZIP64_CDE_MAGIC = 0x06064b50,
|
2015-06-20 17:40:55 +00:00
|
|
|
ZIP_DD_MAGIC = 0x08074b50,
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
#define ZIP_HEADER_LEN 26
|
|
|
|
|
|
|
|
PRAGMA_BEGIN_PACKED
|
|
|
|
typedef union {
|
|
|
|
uint8_t raw[ZIP_HEADER_LEN];
|
|
|
|
struct {
|
|
|
|
uint16_t version; /* 0-1 */
|
|
|
|
uint16_t zip_flags; /* 2-3 */
|
|
|
|
uint16_t method; /* 4-5 */
|
|
|
|
uint16_t modtime; /* 6-7 */
|
|
|
|
uint16_t moddate; /* 8-9 */
|
2023-06-28 16:39:39 +00:00
|
|
|
uint32_t crc32 PACKED; /* 10-13 */
|
|
|
|
uint32_t cmpsize PACKED; /* 14-17 */
|
|
|
|
uint32_t ucmpsize PACKED; /* 18-21 */
|
2015-06-20 17:40:55 +00:00
|
|
|
uint16_t filename_len; /* 22-23 */
|
|
|
|
uint16_t extra_len; /* 24-25 */
|
2023-06-28 16:39:39 +00:00
|
|
|
/* filename follows (not NUL terminated) */
|
|
|
|
/* extra field follows */
|
|
|
|
/* data follows */
|
|
|
|
} fmt PACKED;
|
|
|
|
} zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */
|
2015-06-20 17:40:55 +00:00
|
|
|
PRAGMA_END_PACKED
|
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
#define FIX_ENDIANNESS_ZIP(zip) \
|
|
|
|
do { if (BB_BIG_ENDIAN) { \
|
|
|
|
(zip).fmt.method = SWAP_LE16((zip).fmt.method ); \
|
|
|
|
(zip).fmt.modtime = SWAP_LE16((zip).fmt.modtime ); \
|
|
|
|
(zip).fmt.moddate = SWAP_LE16((zip).fmt.moddate ); \
|
|
|
|
(zip).fmt.crc32 = SWAP_LE32((zip).fmt.crc32 ); \
|
|
|
|
(zip).fmt.cmpsize = SWAP_LE32((zip).fmt.cmpsize ); \
|
|
|
|
(zip).fmt.ucmpsize = SWAP_LE32((zip).fmt.ucmpsize ); \
|
|
|
|
(zip).fmt.filename_len = SWAP_LE16((zip).fmt.filename_len); \
|
|
|
|
(zip).fmt.extra_len = SWAP_LE16((zip).fmt.extra_len ); \
|
|
|
|
}} while (0)
|
2015-06-20 17:40:55 +00:00
|
|
|
|
|
|
|
#define CDF_HEADER_LEN 42
|
|
|
|
|
|
|
|
PRAGMA_BEGIN_PACKED
|
|
|
|
typedef union {
|
|
|
|
uint8_t raw[CDF_HEADER_LEN];
|
|
|
|
struct {
|
|
|
|
/* uint32_t signature; 50 4b 01 02 */
|
|
|
|
uint16_t version_made_by; /* 0-1 */
|
|
|
|
uint16_t version_needed; /* 2-3 */
|
|
|
|
uint16_t cdf_flags; /* 4-5 */
|
|
|
|
uint16_t method; /* 6-7 */
|
2023-06-28 16:39:39 +00:00
|
|
|
uint16_t modtime; /* 8-9 */
|
|
|
|
uint16_t moddate; /* 10-11 */
|
2015-06-20 17:40:55 +00:00
|
|
|
uint32_t crc32; /* 12-15 */
|
|
|
|
uint32_t cmpsize; /* 16-19 */
|
|
|
|
uint32_t ucmpsize; /* 20-23 */
|
2023-06-28 16:39:39 +00:00
|
|
|
uint16_t filename_len; /* 24-25 */
|
|
|
|
uint16_t extra_len; /* 26-27 */
|
2015-06-20 17:40:55 +00:00
|
|
|
uint16_t file_comment_length; /* 28-29 */
|
|
|
|
uint16_t disk_number_start; /* 30-31 */
|
2023-06-28 16:39:39 +00:00
|
|
|
uint16_t internal_attributes; /* 32-33 */
|
|
|
|
uint32_t external_attributes PACKED; /* 34-37 */
|
|
|
|
uint32_t relative_offset_of_local_header PACKED; /* 38-41 */
|
|
|
|
/* filename follows (not NUL terminated) */
|
|
|
|
/* extra field follows */
|
|
|
|
/* file comment follows */
|
|
|
|
} fmt PACKED;
|
2015-06-20 17:40:55 +00:00
|
|
|
} cdf_header_t;
|
|
|
|
PRAGMA_END_PACKED
|
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
#define FIX_ENDIANNESS_CDF(cdf) \
|
|
|
|
do { if (BB_BIG_ENDIAN) { \
|
|
|
|
(cdf).fmt.version_made_by = SWAP_LE16((cdf).fmt.version_made_by); \
|
|
|
|
(cdf).fmt.version_needed = SWAP_LE16((cdf).fmt.version_needed ); \
|
|
|
|
(cdf).fmt.method = SWAP_LE16((cdf).fmt.method ); \
|
|
|
|
(cdf).fmt.modtime = SWAP_LE16((cdf).fmt.modtime ); \
|
|
|
|
(cdf).fmt.moddate = SWAP_LE16((cdf).fmt.moddate ); \
|
|
|
|
(cdf).fmt.crc32 = SWAP_LE32((cdf).fmt.crc32 ); \
|
|
|
|
(cdf).fmt.cmpsize = SWAP_LE32((cdf).fmt.cmpsize ); \
|
|
|
|
(cdf).fmt.ucmpsize = SWAP_LE32((cdf).fmt.ucmpsize ); \
|
|
|
|
(cdf).fmt.filename_len = SWAP_LE16((cdf).fmt.filename_len ); \
|
|
|
|
(cdf).fmt.extra_len = SWAP_LE16((cdf).fmt.extra_len ); \
|
|
|
|
(cdf).fmt.file_comment_length = SWAP_LE16((cdf).fmt.file_comment_length); \
|
|
|
|
(cdf).fmt.external_attributes = SWAP_LE32((cdf).fmt.external_attributes); \
|
|
|
|
}} while (0)
|
|
|
|
|
|
|
|
#define CDE_LEN 16
|
2015-06-20 17:40:55 +00:00
|
|
|
|
|
|
|
PRAGMA_BEGIN_PACKED
|
|
|
|
typedef union {
|
2023-06-28 16:39:39 +00:00
|
|
|
uint8_t raw[CDE_LEN];
|
2015-06-20 17:40:55 +00:00
|
|
|
struct {
|
|
|
|
/* uint32_t signature; 50 4b 05 06 */
|
|
|
|
uint16_t this_disk_no;
|
|
|
|
uint16_t disk_with_cdf_no;
|
|
|
|
uint16_t cdf_entries_on_this_disk;
|
|
|
|
uint16_t cdf_entries_total;
|
|
|
|
uint32_t cdf_size;
|
|
|
|
uint32_t cdf_offset;
|
2023-06-28 16:39:39 +00:00
|
|
|
/* uint16_t archive_comment_length; */
|
|
|
|
/* archive comment follows */
|
|
|
|
} fmt PACKED;
|
|
|
|
} cde_t;
|
|
|
|
PRAGMA_END_PACKED
|
|
|
|
|
|
|
|
#define FIX_ENDIANNESS_CDE(cde) \
|
|
|
|
do { if (BB_BIG_ENDIAN) { \
|
|
|
|
(cde).fmt.cdf_offset = SWAP_LE32((cde).fmt.cdf_offset); \
|
|
|
|
}} while (0)
|
|
|
|
|
2023-06-29 20:15:35 +00:00
|
|
|
/* Extra records */
|
|
|
|
#define EXTRA_HEADER_LEN 4
|
|
|
|
|
|
|
|
PRAGMA_BEGIN_PACKED
|
|
|
|
typedef union {
|
|
|
|
uint8_t raw[EXTRA_HEADER_LEN];
|
|
|
|
struct {
|
|
|
|
uint16_t tag;
|
|
|
|
uint16_t length;
|
|
|
|
/* extra record follows */
|
|
|
|
} fmt PACKED;
|
|
|
|
} extra_header_t;
|
|
|
|
PRAGMA_END_PACKED
|
|
|
|
|
|
|
|
#define FIX_ENDIANNESS_EXTRA(ext) \
|
|
|
|
do { if (BB_BIG_ENDIAN) { \
|
|
|
|
(ext).fmt.tag = SWAP_LE16((ext).fmt.tag); \
|
|
|
|
(ext).fmt.length = SWAP_LE32((ext).fmt.length); \
|
|
|
|
}} while (0)
|
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
/* ZIP64 records */
|
|
|
|
#define ZIP64_LEN 20
|
|
|
|
|
|
|
|
PRAGMA_BEGIN_PACKED
|
|
|
|
typedef union {
|
|
|
|
uint8_t raw[ZIP64_LEN];
|
|
|
|
struct {
|
2023-06-29 20:15:35 +00:00
|
|
|
/* uint16_t tag; 00 01 */
|
|
|
|
uint16_t tag;
|
2023-06-28 16:39:39 +00:00
|
|
|
uint16_t length;
|
|
|
|
uint64_t ucmpsize PACKED;
|
|
|
|
uint64_t cmpsize PACKED;
|
|
|
|
} fmt PACKED;
|
|
|
|
} zip64_t;
|
|
|
|
PRAGMA_END_PACKED
|
|
|
|
|
|
|
|
#define FIX_ENDIANNESS_ZIP64(z64) \
|
|
|
|
do { if (BB_BIG_ENDIAN) { \
|
|
|
|
(z64).fmt.cmpsize = SWAP_LE64((z64).fmt.cmpsize); \
|
|
|
|
(z64).fmt.ucmpsize = SWAP_LE64((z64).fmt.ucmpsize); \
|
|
|
|
}} while (0)
|
|
|
|
|
|
|
|
#define CDE64_LEN 52
|
|
|
|
|
|
|
|
PRAGMA_BEGIN_PACKED
|
|
|
|
typedef union {
|
|
|
|
uint8_t raw[CDE64_LEN];
|
|
|
|
struct {
|
|
|
|
/* uint32_t signature; 50 4b 06 06 */
|
|
|
|
uint64_t size PACKED;
|
|
|
|
uint16_t version_made_by;
|
|
|
|
uint16_t version_needed;
|
|
|
|
uint32_t this_disk_no;
|
|
|
|
uint32_t disk_with_cdf_no;
|
|
|
|
uint64_t cdf_entries_on_this_disk PACKED;
|
|
|
|
uint64_t cdf_entries_total PACKED;
|
|
|
|
uint64_t cdf_size PACKED;
|
|
|
|
uint64_t cdf_offset PACKED;
|
|
|
|
/* archive comment follows */
|
|
|
|
} fmt PACKED;
|
|
|
|
} cde64_t;
|
2015-06-20 17:40:55 +00:00
|
|
|
PRAGMA_END_PACKED
|
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
#define FIX_ENDIANNESS_CDE64(c64) \
|
|
|
|
do { if (BB_BIG_ENDIAN) { \
|
|
|
|
(c64).fmt.size = SWAP_LE64((c64).fmt.size); \
|
|
|
|
(c64).fmt.cdf_offset = SWAP_LE64((c64).fmt.cdf_offset); \
|
|
|
|
}} while (0)
|
|
|
|
|
2023-07-01 19:22:10 +00:00
|
|
|
inline void BUG(void) {
|
2023-06-28 16:39:39 +00:00
|
|
|
/* Check the offset of the last element, not the length. This leniency
|
|
|
|
* allows for poor packing, whereby the overall struct may be too long,
|
|
|
|
* even though the elements are all in the right place.
|
|
|
|
*/
|
2023-06-29 20:15:35 +00:00
|
|
|
BUILD_BUG_ON(
|
2023-06-28 16:39:39 +00:00
|
|
|
offsetof(zip_header_t, fmt.extra_len) + 2
|
2023-06-29 20:15:35 +00:00
|
|
|
!= ZIP_HEADER_LEN);
|
|
|
|
BUILD_BUG_ON(
|
2023-06-28 16:39:39 +00:00
|
|
|
offsetof(cdf_header_t, fmt.relative_offset_of_local_header) + 4
|
2023-06-29 20:15:35 +00:00
|
|
|
!= CDF_HEADER_LEN);
|
|
|
|
BUILD_BUG_ON(sizeof(cde_t) != CDE_LEN);
|
|
|
|
BUILD_BUG_ON(sizeof(extra_header_t) != EXTRA_HEADER_LEN);
|
|
|
|
BUILD_BUG_ON(sizeof(zip64_t) != ZIP64_LEN);
|
|
|
|
BUILD_BUG_ON(sizeof(cde64_t) != CDE64_LEN);
|
|
|
|
}
|
2015-06-20 17:40:55 +00:00
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
/* This value means that we failed to find CDF */
|
|
|
|
#define BAD_CDF_OFFSET ((uint32_t)0xffffffff)
|
|
|
|
|
|
|
|
#if !ENABLE_FEATURE_UNZIP_CDF
|
2015-06-20 17:40:55 +00:00
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
# define find_cdf_offset(fd) BAD_CDF_OFFSET
|
2015-06-20 17:40:55 +00:00
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
#else
|
2015-06-20 17:40:55 +00:00
|
|
|
/* Seen in the wild:
|
|
|
|
* Self-extracting PRO2K3XP_32.exe contains 19078464 byte zip archive,
|
|
|
|
* where CDE was nearly 48 kbytes before EOF.
|
|
|
|
* (Surprisingly, it also apparently has *another* CDE structure
|
|
|
|
* closer to the end, with bogus cdf_offset).
|
|
|
|
* To make extraction work, bumped PEEK_FROM_END from 16k to 64k.
|
|
|
|
*/
|
|
|
|
#define PEEK_FROM_END (64*1024)
|
|
|
|
/* NB: does not preserve file position! */
|
2023-06-28 16:39:39 +00:00
|
|
|
static uint64_t find_cdf_offset(int fd)
|
2015-06-20 17:40:55 +00:00
|
|
|
{
|
2023-06-28 16:39:39 +00:00
|
|
|
cde_t cde;
|
|
|
|
cde64_t cde64;
|
|
|
|
unsigned char *buf;
|
|
|
|
unsigned char *p, c;
|
|
|
|
uint64_t end = 0, found, size;
|
|
|
|
|
|
|
|
size = lseek(fd, 0, SEEK_END);
|
|
|
|
if (size == (off_t) -1)
|
|
|
|
return BAD_CDF_OFFSET;
|
|
|
|
|
|
|
|
if (size > PEEK_FROM_END)
|
|
|
|
end = size - PEEK_FROM_END;
|
|
|
|
else
|
2015-06-20 17:40:55 +00:00
|
|
|
end = 0;
|
2023-06-28 16:39:39 +00:00
|
|
|
|
|
|
|
size = MIN(size, PEEK_FROM_END);
|
|
|
|
|
2020-06-10 19:00:47 +00:00
|
|
|
lseek(fd, end, SEEK_SET);
|
2023-06-28 16:39:39 +00:00
|
|
|
buf = xzalloc((size_t)size);
|
|
|
|
if (buf == NULL)
|
|
|
|
return 0;
|
|
|
|
full_read(fd, buf, (unsigned int)size);
|
2015-06-20 17:40:55 +00:00
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
found = BAD_CDF_OFFSET;
|
2015-06-20 17:40:55 +00:00
|
|
|
p = buf;
|
2023-06-28 16:39:39 +00:00
|
|
|
while (p <= buf + size - CDE_LEN - 4) {
|
2015-06-20 17:40:55 +00:00
|
|
|
if (*p != 'P') {
|
|
|
|
p++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (*++p != 'K')
|
|
|
|
continue;
|
2023-06-28 16:39:39 +00:00
|
|
|
c = *++p;
|
|
|
|
if (c != 5 && c != 6)
|
2015-06-20 17:40:55 +00:00
|
|
|
continue;
|
|
|
|
if (*++p != 6)
|
|
|
|
continue;
|
|
|
|
/* we found CDE! */
|
2023-06-28 16:39:39 +00:00
|
|
|
switch (c) {
|
|
|
|
case 5: /* 32-bit CDE */
|
|
|
|
memcpy(cde.raw, p + 1, CDE_LEN);
|
|
|
|
dbg("cde.this_disk_no:%d", cde.fmt.this_disk_no);
|
|
|
|
dbg("cde.disk_with_cdf_no:%d", cde.fmt.disk_with_cdf_no);
|
|
|
|
dbg("cde.cdf_entries_on_this_disk:%d", cde.fmt.cdf_entries_on_this_disk);
|
|
|
|
dbg("cde.cdf_entries_total:%d", cde.fmt.cdf_entries_total);
|
|
|
|
dbg("cde.cdf_size:%d", cde.fmt.cdf_size);
|
|
|
|
dbg("cde.cdf_offset:%x", cde.fmt.cdf_offset);
|
|
|
|
FIX_ENDIANNESS_CDE(cde);
|
|
|
|
/*
|
|
|
|
* I've seen .ZIP files with seemingly valid CDEs
|
|
|
|
* where cdf_offset points past EOF - ??
|
|
|
|
* This check ignores such CDEs:
|
|
|
|
*/
|
|
|
|
if (cde.fmt.cdf_offset != 0xffffffffL &&
|
|
|
|
cde.fmt.cdf_offset < end + (p - buf)) {
|
|
|
|
found = cde.fmt.cdf_offset;
|
|
|
|
dbg("Possible cdf_offset:0x%"OFF_FMT"x at 0x%"OFF_FMT"x",
|
|
|
|
found, end + (p - 3 - buf));
|
|
|
|
dbg(" cdf_offset+cdf_size:0x%"OFF_FMT"x",
|
|
|
|
(found + SWAP_LE32(cde.fmt.cdf_size)));
|
|
|
|
/*
|
|
|
|
* We do not "break" here because only the last CDE is valid.
|
|
|
|
* I've seen a .zip archive which contained a .zip file,
|
|
|
|
* uncompressed, and taking the first CDE was using
|
|
|
|
* the CDE inside that file!
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 6: /* 64-bit CDE */
|
|
|
|
memcpy(cde64.raw, p + 1, CDE64_LEN);
|
|
|
|
FIX_ENDIANNESS_CDE64(cde64);
|
|
|
|
dbg("cde64.this_disk_no:%d", cde64.fmt.this_disk_no);
|
|
|
|
dbg("cde64.disk_with_cdf_no:%d", cde64.fmt.disk_with_cdf_no);
|
|
|
|
dbg("cde64.cdf_entries_on_this_disk:%lld", cde64.fmt.cdf_entries_on_this_disk);
|
|
|
|
dbg("cde64.cdf_entries_total:%lld", cde64.fmt.cdf_entries_total);
|
|
|
|
dbg("cde64.cdf_size:%lld", cde64.fmt.cdf_size);
|
|
|
|
dbg("cde64.cdf_offset:%llx", cde64.fmt.cdf_offset);
|
|
|
|
if (cde64.fmt.cdf_offset < end + (p - buf)) {
|
|
|
|
found = cde64.fmt.cdf_offset;
|
|
|
|
dbg("Possible cdf_offset:0x%"OFF_FMT"x at 0x%"OFF_FMT"x",
|
|
|
|
found, end + (p - 3 - buf));
|
|
|
|
dbg(" cdf_offset+cdf_size:0x%"OFF_FMT"x",
|
|
|
|
(found + SWAP_LE64(cde64.fmt.cdf_size)));
|
|
|
|
}
|
2015-06-20 17:40:55 +00:00
|
|
|
break;
|
2023-06-28 16:39:39 +00:00
|
|
|
}
|
2015-06-20 17:40:55 +00:00
|
|
|
}
|
|
|
|
free(buf);
|
2023-06-28 16:39:39 +00:00
|
|
|
dbg("Found cdf_offset:0x%"OFF_FMT"x", found);
|
|
|
|
return found;
|
2015-06-20 17:40:55 +00:00
|
|
|
};
|
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
static uint64_t read_next_cdf(int fd, uint64_t cdf_offset, cdf_header_t *cdf)
|
2015-06-20 17:40:55 +00:00
|
|
|
{
|
2023-06-28 16:39:39 +00:00
|
|
|
uint32_t magic;
|
|
|
|
|
|
|
|
if (cdf_offset == BAD_CDF_OFFSET)
|
|
|
|
return cdf_offset;
|
|
|
|
|
|
|
|
dbg("Reading CDF at 0x%"OFF_FMT"x", cdf_offset);
|
|
|
|
lseek(fd, cdf_offset, SEEK_SET);
|
|
|
|
(void)_read(fd, &magic, 4);
|
|
|
|
/* Central Directory End? Assume CDF has ended.
|
|
|
|
* (more correct method is to use cde.cdf_entries_total counter)
|
|
|
|
*/
|
|
|
|
if (magic == ZIP_CDE_MAGIC) {
|
|
|
|
dbg("got ZIP_CDE_MAGIC");
|
|
|
|
return 0; /* EOF */
|
2015-06-20 17:40:55 +00:00
|
|
|
}
|
2023-06-28 16:39:39 +00:00
|
|
|
if (magic == ZIP64_CDE_MAGIC) { /* seen in .zip with >4GB files */
|
|
|
|
dbg("got ZIP64_CDE_MAGIC");
|
|
|
|
return 0; /* EOF */
|
|
|
|
}
|
|
|
|
(void)_read(fd, cdf->raw, CDF_HEADER_LEN);
|
|
|
|
|
|
|
|
FIX_ENDIANNESS_CDF(*cdf);
|
|
|
|
dbg(" magic:%08x filename_len:%u extra_len:%u file_comment_length:%u",
|
|
|
|
magic,
|
|
|
|
(unsigned)cdf->fmt.filename_len,
|
|
|
|
(unsigned)cdf->fmt.extra_len,
|
|
|
|
(unsigned)cdf->fmt.file_comment_length
|
|
|
|
);
|
|
|
|
//TODO: require that magic == ZIP_CDF_MAGIC?
|
|
|
|
|
|
|
|
cdf_offset += 4 + CDF_HEADER_LEN
|
|
|
|
+ cdf->fmt.filename_len
|
|
|
|
+ cdf->fmt.extra_len
|
|
|
|
+ cdf->fmt.file_comment_length;
|
|
|
|
|
|
|
|
dbg("Next cdf_offset 0x%"OFF_FMT"x", cdf_offset);
|
2015-06-20 17:40:55 +00:00
|
|
|
return cdf_offset;
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
static void die_if_bad_fnamesize(unsigned sz)
|
|
|
|
{
|
|
|
|
if (sz > 0xfff) /* more than 4k?! no funny business please */
|
|
|
|
bb_simple_error_msg_and_die("bad archive");
|
|
|
|
}
|
|
|
|
|
2020-06-10 19:00:47 +00:00
|
|
|
static void unzip_skip(int fd, off_t skip)
|
2015-06-20 17:40:55 +00:00
|
|
|
{
|
|
|
|
if (skip != 0)
|
2020-06-10 19:00:47 +00:00
|
|
|
if (lseek(fd, skip, SEEK_CUR) == (off_t)-1)
|
|
|
|
bb_copyfd_exact_size(fd, -1, skip);
|
2015-06-20 17:40:55 +00:00
|
|
|
}
|
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
/* Set the filename and process extra ZIP64 data */
|
|
|
|
static void unzip_set_xstate(transformer_state_t* xstate, zip_header_t* zip)
|
|
|
|
{
|
|
|
|
uint8_t* buf = NULL;
|
2023-06-29 20:15:35 +00:00
|
|
|
uint16_t i = 0;
|
|
|
|
extra_header_t* extra;
|
2023-06-28 16:39:39 +00:00
|
|
|
zip64_t* zip64;
|
|
|
|
|
|
|
|
/* Set the default sizes for non ZIP64 content */
|
|
|
|
xstate->dst_size = zip->fmt.ucmpsize;
|
|
|
|
xstate->bytes_in = zip->fmt.cmpsize;
|
|
|
|
|
|
|
|
/* Set the filename */
|
|
|
|
die_if_bad_fnamesize(zip->fmt.filename_len);
|
|
|
|
xstate->dst_name = xzalloc(zip->fmt.filename_len + 1);
|
|
|
|
if (xstate->dst_name == NULL)
|
|
|
|
goto err;
|
|
|
|
xread(xstate->src_fd, xstate->dst_name, zip->fmt.filename_len);
|
|
|
|
xstate->dst_name[zip->fmt.filename_len] = 0;
|
|
|
|
|
|
|
|
/* Read the extra data */
|
2023-06-29 20:15:35 +00:00
|
|
|
if (zip->fmt.extra_len) {
|
|
|
|
dbg("Reading extra data");
|
2023-06-28 16:39:39 +00:00
|
|
|
buf = malloc(zip->fmt.extra_len);
|
|
|
|
if (buf == NULL)
|
|
|
|
goto err;
|
|
|
|
xread(xstate->src_fd, buf, zip->fmt.extra_len);
|
|
|
|
}
|
|
|
|
|
2023-06-29 20:15:35 +00:00
|
|
|
/* Process the extra records */
|
|
|
|
if (zip->fmt.extra_len < EXTRA_HEADER_LEN + 1)
|
|
|
|
goto err;
|
|
|
|
for (i = 0; i < zip->fmt.extra_len - EXTRA_HEADER_LEN; ) {
|
|
|
|
extra = (extra_header_t*)&buf[i];
|
|
|
|
FIX_ENDIANNESS_EXTRA(*extra);
|
|
|
|
dbg(" tag:0x%04x len:%u",
|
|
|
|
(unsigned)extra->fmt.tag,
|
|
|
|
(unsigned)extra->fmt.length
|
|
|
|
);
|
|
|
|
i += EXTRA_HEADER_LEN + extra->fmt.length;
|
|
|
|
/* Process the ZIP64 data */
|
|
|
|
if (extra->fmt.tag == SWAP_LE16(0x0001)) {
|
|
|
|
zip64 = (zip64_t*)extra;
|
|
|
|
FIX_ENDIANNESS_ZIP64(*zip64);
|
|
|
|
if ((zip->fmt.cmpsize == 0xffffffffL || zip->fmt.ucmpsize == 0xffffffffL) &&
|
|
|
|
EXTRA_HEADER_LEN + zip64->fmt.length >= ZIP64_LEN) {
|
|
|
|
dbg("Actual cmpsize:0x%"OFF_FMT"x ucmpsize:0x%"OFF_FMT"x",
|
|
|
|
zip64->fmt.cmpsize,
|
|
|
|
zip64->fmt.ucmpsize
|
|
|
|
);
|
|
|
|
xstate->dst_size = zip64->fmt.ucmpsize;
|
|
|
|
xstate->bytes_in = zip64->fmt.cmpsize;
|
|
|
|
}
|
|
|
|
}
|
2023-06-28 16:39:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
err:
|
|
|
|
free(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static IF_DESKTOP(long long) int
|
|
|
|
unzip_extract(zip_header_t* zip, transformer_state_t* xstate)
|
2015-06-20 17:40:55 +00:00
|
|
|
{
|
|
|
|
IF_DESKTOP(long long) int n = -EFAULT;
|
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
if (zip->fmt.method == 0) {
|
|
|
|
/* Method 0 - stored (not compressed) */
|
|
|
|
if (xstate->dst_size) {
|
|
|
|
bb_copyfd_exact_size(xstate->src_fd, xstate->dst_fd, xstate->dst_size);
|
2015-06-20 17:40:55 +00:00
|
|
|
}
|
2023-06-28 16:39:39 +00:00
|
|
|
return xstate->dst_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (zip->fmt.method == 8) {
|
|
|
|
/* Method 8 - inflate */
|
|
|
|
n = inflate_unzip(xstate);
|
|
|
|
|
|
|
|
/* Validate decompression */
|
|
|
|
if (n >= 0) {
|
|
|
|
if (zip->fmt.crc32 != (xstate->crc32 ^ 0xffffffffL)) {
|
|
|
|
bb_simple_error_msg_and_die("crc error");
|
|
|
|
}
|
|
|
|
} else if (n != -ENOSPC) {
|
|
|
|
bb_simple_error_msg_and_die("inflate error");
|
2015-06-20 17:40:55 +00:00
|
|
|
}
|
2023-06-28 16:39:39 +00:00
|
|
|
}
|
|
|
|
#if ENABLE_FEATURE_UNZIP_BZIP2
|
|
|
|
else if (zip->fmt.method == 12) {
|
|
|
|
/* Tested. Unpacker reads too much, but we use CDF
|
|
|
|
* and will seek to the correct beginning of next file.
|
|
|
|
*/
|
|
|
|
xstate->bytes_out = unpack_bz2_stream(xstate);
|
2023-06-29 20:15:35 +00:00
|
|
|
if ((int64_t)xstate->bytes_out < 0)
|
2023-06-28 16:39:39 +00:00
|
|
|
bb_simple_error_msg_and_die("inflate error");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if ENABLE_FEATURE_UNZIP_LZMA
|
|
|
|
else if (zip->fmt.method == 14) {
|
|
|
|
/* Not tested yet */
|
|
|
|
xstate->bytes_out = unpack_lzma_stream(xstate);
|
2023-06-29 20:15:35 +00:00
|
|
|
if ((int64_t)xstate->bytes_out < 0)
|
2023-06-28 16:39:39 +00:00
|
|
|
bb_simple_error_msg_and_die("inflate error");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if ENABLE_FEATURE_UNZIP_XZ
|
|
|
|
else if (zip->fmt.method == 95) {
|
|
|
|
/* Not tested yet */
|
|
|
|
xstate->bytes_out = unpack_xz_stream(xstate);
|
2023-06-29 20:15:35 +00:00
|
|
|
if ((int64_t)xstate->bytes_out < 0)
|
2023-06-28 16:39:39 +00:00
|
|
|
bb_simple_error_msg_and_die("inflate error");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
else {
|
|
|
|
bb_error_msg_and_die("unsupported method %u", zip->fmt.method);
|
|
|
|
}
|
2015-06-20 17:40:55 +00:00
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
/* Validate decompression - size */
|
|
|
|
if (n != -ENOSPC && xstate->dst_size != xstate->bytes_out) {
|
|
|
|
/* Don't die. Who knows, maybe len calculation
|
|
|
|
* was botched somewhere. After all, crc matched! */
|
|
|
|
bb_simple_error_msg("bad length");
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
IF_DESKTOP(long long) int FAST_FUNC
|
|
|
|
unpack_zip_stream(transformer_state_t *xstate)
|
|
|
|
{
|
|
|
|
IF_DESKTOP(long long) int n = -EFAULT;
|
|
|
|
bool is_dir = false;
|
|
|
|
uint64_t cdf_offset = find_cdf_offset(xstate->src_fd); /* try to seek to the end, find CDE and CDF start */
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
zip_header_t zip;
|
|
|
|
if (!ENABLE_FEATURE_UNZIP_CDF || cdf_offset == BAD_CDF_OFFSET) {
|
|
|
|
/* Normally happens when input is unseekable.
|
|
|
|
*
|
|
|
|
* Valid ZIP file has Central Directory at the end
|
|
|
|
* with central directory file headers (CDFs).
|
|
|
|
* After it, there is a Central Directory End structure.
|
|
|
|
* CDFs identify what files are in the ZIP and where
|
|
|
|
* they are located. This allows ZIP readers to load
|
|
|
|
* the list of files without reading the entire ZIP archive.
|
|
|
|
* ZIP files may be appended to, only files specified in
|
|
|
|
* the CD are valid. Scanning for local file headers is
|
|
|
|
* not a correct algorithm.
|
|
|
|
*
|
|
|
|
* We try to do the above, and resort to "linear" reading
|
|
|
|
* of ZIP file only if seek failed or CDE wasn't found.
|
2020-06-10 19:00:47 +00:00
|
|
|
*/
|
2023-06-28 16:39:39 +00:00
|
|
|
uint32_t magic;
|
|
|
|
|
|
|
|
/* Check magic number */
|
|
|
|
xread(xstate->src_fd, &magic, 4);
|
|
|
|
/* CDF item? Assume there are no more files, exit */
|
|
|
|
if (magic == ZIP_CDF_MAGIC) {
|
|
|
|
dbg("got ZIP_CDF_MAGIC");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Data descriptor? It was a streaming file, go on */
|
|
|
|
if (magic == ZIP_DD_MAGIC) {
|
|
|
|
dbg("got ZIP_DD_MAGIC");
|
|
|
|
/* skip over duplicate crc32, cmpsize and ucmpsize */
|
|
|
|
unzip_skip(xstate->src_fd, 3 * 4);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (magic != ZIP_FILEHEADER_MAGIC)
|
|
|
|
bb_error_msg_and_die("invalid zip magic %08X", (int)magic);
|
|
|
|
dbg("got ZIP_FILEHEADER_MAGIC");
|
|
|
|
|
|
|
|
xread(xstate->src_fd, zip.raw, ZIP_HEADER_LEN);
|
|
|
|
FIX_ENDIANNESS_ZIP(zip);
|
|
|
|
if (zip.fmt.zip_flags & SWAP_LE16(0x0008)) {
|
|
|
|
bb_error_msg_and_die("zip flag %s is not supported",
|
|
|
|
"8 (streaming)");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#if ENABLE_FEATURE_UNZIP_CDF
|
|
|
|
else {
|
|
|
|
/* cdf_offset is valid (and we know the file is seekable) */
|
|
|
|
cdf_header_t cdf;
|
|
|
|
cdf_offset = read_next_cdf(xstate->src_fd, cdf_offset, &cdf);
|
|
|
|
if (cdf_offset == 0) /* EOF? */
|
|
|
|
break;
|
|
|
|
# if 1
|
|
|
|
lseek(xstate->src_fd,
|
|
|
|
SWAP_LE32(cdf.fmt.relative_offset_of_local_header) + 4,
|
|
|
|
SEEK_SET);
|
|
|
|
xread(xstate->src_fd, zip.raw, ZIP_HEADER_LEN);
|
|
|
|
FIX_ENDIANNESS_ZIP(zip);
|
|
|
|
if (zip.fmt.zip_flags & SWAP_LE16(0x0008)) {
|
2015-06-20 17:40:55 +00:00
|
|
|
/* 0x0008 - streaming. [u]cmpsize can be reliably gotten
|
2023-06-28 16:39:39 +00:00
|
|
|
* only from Central Directory.
|
2020-06-10 19:00:47 +00:00
|
|
|
*/
|
2023-06-28 16:39:39 +00:00
|
|
|
zip.fmt.crc32 = cdf.fmt.crc32;
|
|
|
|
zip.fmt.cmpsize = cdf.fmt.cmpsize;
|
|
|
|
zip.fmt.ucmpsize = cdf.fmt.ucmpsize;
|
2015-06-20 17:40:55 +00:00
|
|
|
}
|
2020-06-10 19:00:47 +00:00
|
|
|
/* Check for UNIX/DOS/WIN directory */
|
2023-06-28 16:39:39 +00:00
|
|
|
is_dir = cdf.fmt.external_attributes & 0x40000010;
|
|
|
|
// Seen in some zipfiles: central directory 9 byte extra field contains
|
|
|
|
// a subfield with ID 0x5455 and 5 data bytes, which is a Unix-style UTC mtime.
|
|
|
|
// Local header version:
|
|
|
|
// u16 0x5455 ("UT")
|
|
|
|
// u16 size (1 + 4 * n)
|
|
|
|
// u8 flags: bit 0:mtime is present, bit 1:atime is present, bit 2:ctime is present
|
|
|
|
// u32 mtime
|
|
|
|
// u32 atime
|
|
|
|
// u32 ctime
|
|
|
|
// Central header version:
|
|
|
|
// u16 0x5455 ("UT")
|
|
|
|
// u16 size (5 (or 1?))
|
|
|
|
// u8 flags: bit 0:mtime is present, bit 1:atime is present, bit 2:ctime is present
|
|
|
|
// u32 mtime (CDF does not store atime/ctime)
|
|
|
|
# else
|
|
|
|
/* CDF has the same data as local header, no need to read the latter...
|
|
|
|
* ...not really. An archive was seen with cdf.extra_len == 6 but
|
|
|
|
* zip.extra_len == 0.
|
|
|
|
*/
|
|
|
|
memcpy(&zip.fmt.version,
|
|
|
|
&cdf.fmt.version_needed, ZIP_HEADER_LEN);
|
|
|
|
xlseek(zip_fd,
|
|
|
|
SWAP_LE32(cdf.fmt.relative_offset_of_local_header) + 4 + ZIP_HEADER_LEN,
|
|
|
|
SEEK_SET);
|
|
|
|
# endif
|
2015-06-20 17:40:55 +00:00
|
|
|
}
|
2023-06-28 16:39:39 +00:00
|
|
|
#endif
|
2015-06-20 17:40:55 +00:00
|
|
|
if (cdf_offset == BAD_CDF_OFFSET
|
2023-06-28 16:39:39 +00:00
|
|
|
&& (zip.fmt.zip_flags & SWAP_LE16(0x0008))
|
|
|
|
) {
|
2015-06-20 17:40:55 +00:00
|
|
|
/* If it's a streaming zip, we _require_ CDF */
|
|
|
|
bb_error_msg_and_die("can't find file table");
|
|
|
|
}
|
2023-06-28 16:39:39 +00:00
|
|
|
if (zip.fmt.zip_flags & SWAP_LE16(0x0001)) {
|
|
|
|
/* 0x0001 - encrypted */
|
|
|
|
bb_error_msg_and_die("zip flag %s is not supported",
|
|
|
|
"1 (encryption)");
|
|
|
|
}
|
|
|
|
dbg("File cmpsize:0x%x extra_len:0x%x ucmpsize:0x%x",
|
|
|
|
(unsigned)zip.fmt.cmpsize,
|
|
|
|
(unsigned)zip.fmt.extra_len,
|
|
|
|
(unsigned)zip.fmt.ucmpsize
|
|
|
|
);
|
|
|
|
|
|
|
|
/* Sets the file name and set the file sizes using ZIP64 if present */
|
|
|
|
unzip_set_xstate(xstate, &zip);
|
2015-06-20 17:40:55 +00:00
|
|
|
|
2020-06-10 19:00:47 +00:00
|
|
|
/* Handle multiple file switching */
|
2023-06-28 16:39:39 +00:00
|
|
|
if ((!is_dir) && (xstate->dst_dir != NULL) &&
|
|
|
|
(transformer_switch_file(xstate) < 0)) {
|
2020-06-10 19:00:47 +00:00
|
|
|
goto err;
|
|
|
|
}
|
2015-06-20 17:40:55 +00:00
|
|
|
|
2023-06-28 16:39:39 +00:00
|
|
|
n = unzip_extract(&zip, xstate);
|
|
|
|
|
2020-06-10 19:00:47 +00:00
|
|
|
/* Only process the first file if not extracting to a dir */
|
|
|
|
if (xstate->dst_dir == NULL)
|
|
|
|
break;
|
2015-06-20 17:40:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (n > 0)
|
|
|
|
return xstate->bytes_out;
|
|
|
|
else if (n == -ENOSPC)
|
|
|
|
return xstate->mem_output_size_max;
|
|
|
|
else
|
|
|
|
return n;
|
|
|
|
}
|