diff --git a/src/unpaker2/unpaker2.cpp b/src/unpaker2/unpaker2.cpp index 8bfc138..51d0774 100644 --- a/src/unpaker2/unpaker2.cpp +++ b/src/unpaker2/unpaker2.cpp @@ -86,9 +86,174 @@ struct stream { } }; -struct decoded_block { - uint8_t out[supported_block_size]; + +/// Type of a function to do some kind of coding work (filters, Stream, +/// Block encoders/decoders etc.). Some special coders use don't use both +/// input and output buffers, but for simplicity they still use this same +/// function prototype. +typedef lzma_ret (*lzma_code_function)(void *coder, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, + size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size, + lzma_action action); + +/// Type of a function to free the memory allocated for the coder +typedef void (*lzma_end_function)(void *coder, const lzma_allocator *allocator); + +/// Hold data and function pointers of the next filter in the chain. +struct lzma_next_coder_s { + /// Pointer to coder-specific data + void *coder; + + /// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't + /// point to a filter coder. + lzma_vli id; + + /// "Pointer" to init function. This is never called here. + /// We need only to detect if we are initializing a coder + /// that was allocated earlier. See lzma_next_coder_init and + /// lzma_next_strm_init macros in this file. + uintptr_t init; + + /// Pointer to function to do the actual coding + lzma_code_function code; + + /// Pointer to function to free lzma_next_coder.coder. This can + /// be NULL; in that case, lzma_free is called to free + /// lzma_next_coder.coder. + lzma_end_function end; + + /// Pointer to a function to get progress information. If this is NULL, + /// lzma_stream.total_in and .total_out are used instead. + void (*get_progress)(void *coder, uint64_t *progress_in, uint64_t *progress_out); + + /// Pointer to function to return the type of the integrity check. + /// Most coders won't support this. + lzma_check (*get_check)(const void *coder); + + /// Pointer to function to get and/or change the memory usage limit. + /// If new_memlimit == 0, the limit is not changed. + lzma_ret (*memconfig)(void *coder, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit); + + /// Update the filter-specific options or the whole filter chain + /// in the encoder. + lzma_ret (*update)(void *coder, const lzma_allocator *allocator, const lzma_filter *filters, + const lzma_filter *reversed_filters); + + /// Set how many bytes of output this coder may produce at maximum. + /// On success LZMA_OK must be returned. + /// If the filter chain as a whole cannot support this feature, + /// this must return LZMA_OPTIONS_ERROR. + /// If no input has been given to the coder and the requested limit + /// is too small, this must return LZMA_BUF_ERROR. If input has been + /// seen, LZMA_OK is allowed too. + lzma_ret (*set_out_limit)(void *coder, uint64_t *uncomp_size, uint64_t out_limit); }; +typedef struct lzma_next_coder_s lzma_next_coder; + +/// Largest valid lzma_action value as unsigned integer. +#define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER)) + +/// Internal data for lzma_strm_init, lzma_code, and lzma_end. A pointer to +/// this is stored in lzma_stream. +struct lzma_internal_s { + /// The actual coder that should do something useful + lzma_next_coder next; + + /// Track the state of the coder. This is used to validate arguments + /// so that the actual coders can rely on e.g. that LZMA_SYNC_FLUSH + /// is used on every call to lzma_code until next.code has returned + /// LZMA_STREAM_END. + enum { + ISEQ_RUN, + ISEQ_SYNC_FLUSH, + ISEQ_FULL_FLUSH, + ISEQ_FINISH, + ISEQ_FULL_BARRIER, + ISEQ_END, + ISEQ_ERROR, + } sequence; + + /// A copy of lzma_stream avail_in. This is used to verify that the + /// amount of input doesn't change once e.g. LZMA_FINISH has been + /// used. + size_t avail_in; + + /// Indicates which lzma_action values are allowed by next.code. + bool supported_actions[LZMA_ACTION_MAX + 1]; + + /// If true, lzma_code will return LZMA_BUF_ERROR if no progress was + /// made (no input consumed and no output produced by next.code). + bool allow_buf_error; +}; +typedef struct lzma_internal_s lzma_internal; + + +typedef struct { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_RUN, + SEQ_INDEX, + SEQ_STREAM_FOOTER, + SEQ_STREAM_PADDING, + } sequence; + + /// Block decoder + lzma_next_coder block_decoder; + + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. + lzma_block block_options; + + /// Stream Flags from Stream Header + lzma_stream_flags stream_flags; + + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed (only an estimate) + uint64_t memusage; + + /// If true, LZMA_NO_CHECK is returned if the Stream has + /// no integrity check. + bool tell_no_check; + + /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has + /// an integrity check that isn't supported by this liblzma build. + bool tell_unsupported_check; + + /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. + bool tell_any_check; + + /// If true, we will tell the Block decoder to skip calculating + /// and verifying the integrity check. + bool ignore_check; + + /// If true, we will decode concatenated Streams that possibly have + /// Stream Padding between or after them. LZMA_STREAM_END is returned + /// once the application isn't giving us any new input (LZMA_FINISH), + /// and we aren't in the middle of a Stream, and possible + /// Stream Padding is a multiple of four bytes. + bool concatenated; + + /// When decoding concatenated Streams, this is true as long as we + /// are decoding the first Stream. This is needed to avoid misleading + /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic + /// bytes. + bool first_stream; + + /// Write position in buffer[] and position in Stream Padding + size_t pos; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +} lzma_stream_coder; + void unpack_file(path fn) { primitives::templates2::mmap_file f{fn}; @@ -100,48 +265,71 @@ void unpack_file(path fn) { auto descs = s.span(p.n_files); auto segments = s.span(p.n_blocks); std::vector bbb; - bbb.resize(segments.size() * supported_block_size); + bbb.resize((segments.size() + 1) * supported_block_size); auto pp = bbb.data(); - std::vector dblocks; for (auto &&seg : segments) { s.p = f.p + seg.offset; - auto &b = dblocks.emplace_back(); uint32_t len = s; switch (seg.algorithm) { case segment::decode_algorithm::none: { - //memcpy(b.out, s.p, len); memcpy(pp, s.p, len); + pp += len; break; } case segment::decode_algorithm::lzo: { + if (seg.algorithm == segment::decode_algorithm::lzma) { + int a = 5; + a++; + } size_t outsz = supported_block_size; - //auto r2 = lzo1x_decompress(s.p, len, b.out, &outsz, 0); auto r2 = lzo1x_decompress(s.p, len, pp, &outsz, 0); if (r2 != LZO_E_OK) { throw std::runtime_error{"lzo error"}; } + pp += outsz; break; } - default: - throw std::runtime_error{"compression unsupported"}; + case segment::decode_algorithm::rlew: { + break; + } + case segment::decode_algorithm::lzma: { + uint64_t memlimit = 0; + size_t in_pos = 0; + size_t out_pos = 0; + auto r2 = lzma_stream_buffer_decode(&memlimit, 0, 0, s.p, &in_pos, len, pp, &out_pos, bbb.size() - (pp - bbb.data())); + lzma_stream strm{}; + strm.next_in = s.p; + strm.avail_in = len; + strm.total_in = len; + strm.next_out = pp; + //strm.avail_out = + auto r3 = lzma_stream_decoder(&strm, 1'000'000, 0); + ((lzma_stream_coder*)strm.internal->next.coder)->sequence = lzma_stream_coder::SEQ_BLOCK_RUN; + auto r4 = lzma_code(&strm, LZMA_RUN); + auto r = lzma_microlzma_decoder(&strm, len, 0, false, 1'000'000); + if (r != LZMA_OK) { + throw std::runtime_error{"lzma error"}; + } + r = lzma_code(&strm, lzma_action::LZMA_RUN); + + int a = 5; + a++; + } + default: + throw std::runtime_error{"compression unsupported: "s + std::to_string(seg.algorithm)}; } - pp += len; } pp = bbb.data(); - /*uint8_t out[32768]; - uint64_t memlimit = 0; - size_t in_pos = 0; - size_t out_pos = 0; - auto r = lzma_stream_buffer_decode(&memlimit, 0, 0, s.p, &in_pos, f.p+f.sz-s.p, out, &out_pos, 1'000'000'000);*/ - auto dir = fn += ".dir2"; fs::create_directories(dir); for (auto &&d : descs) { auto fn = dir / d.name; fs::create_directories(fn.parent_path()); - std::ofstream o{fn, std::ios::binary}; - o.write((const char *)pp + d.offset, d.size); + std::cout << "unpacking " << fn << "\n"; + primitives::templates2::mmap_file f{fn, primitives::templates2::mmap_file::rw{}}; + f.alloc_raw(d.size); + memcpy(f.p, pp + d.offset, d.size); } } @@ -155,10 +343,12 @@ int main(int argc, char *argv[]) { } else if (fs::is_directory(p)) { auto files = enumerate_files_like(p, ".*\\.pak", false); for (auto &f : files) { - if (f.has_extension()) - continue; std::cout << "processing: " << f << "\n"; - unpack_file(f); + try { + unpack_file(f); + } catch (std::exception &e) { + std::cerr << e.what() << "\n"; + } } } else { throw std::runtime_error("Bad fs object");