From 9146cf69005178175e60973372825180563c37ca Mon Sep 17 00:00:00 2001 From: lzwdgc Date: Fri, 2 Feb 2024 23:06:17 +0300 Subject: [PATCH] Unpaker updates. --- src/unpaker/decode.h | 353 ++++---------------------------------- src/unpaker/pak.cpp | 52 ++---- src/unpaker/pak.h | 4 +- src/unpaker/unpaker.cpp | 23 ++- src/unpaker2/unpaker2.cpp | 194 +++++++++++++-------- sw.cpp | 5 +- 6 files changed, 192 insertions(+), 439 deletions(-) diff --git a/src/unpaker/decode.h b/src/unpaker/decode.h index 6485611..a95f8c6 100644 --- a/src/unpaker/decode.h +++ b/src/unpaker/decode.h @@ -1,114 +1,6 @@ #pragma once -#ifdef _WIN64 -#error "not available in 64-bit mode" -#endif - -#define _BYTE uint8_t -#define _WORD uint16_t -#define _DWORD uint32_t - -#define LOBYTE(x) (*((_BYTE *)&(x))) -#define LOWORD(x) (*((_WORD *)&(x))) -#define HIBYTE(x) (*((_BYTE *)&(x) + 1)) - -#define BYTEn(x, n) (*((_BYTE *)&(x) + n)) -#define BYTE1(x) BYTEn(x, 1) - -template -int8_t __SETS__(T x) -{ - if (sizeof(T) == 1) - return int8_t(x) < 0; - if (sizeof(T) == 2) - return int16_t(x) < 0; - if (sizeof(T) == 4) - return int32_t(x) < 0; - return int64_t(x) < 0; -} - -template -int8_t __OFSUB__(T x, U y) -{ - if (sizeof(T) < sizeof(U)) - { - U x2 = x; - int8_t sx = __SETS__(x2); - return (sx ^ __SETS__(y)) & (sx ^ __SETS__(x2 - y)); - } - else - { - T y2 = y; - int8_t sx = __SETS__(x); - return (sx ^ __SETS__(y2)) & (sx ^ __SETS__(x - y2)); - } -} - -inline void memset32(void *ptr, uint32_t value, int count) -{ - uint32_t *p = (uint32_t *)ptr; - for (int i = 0; i < count; i++) - *p++ = value; -} - -char *decode_f1(char *input, int size, char *output) -{ - char *result; // eax@1 - char *v4; // ebx@1 - int v5; // ebp@1 - char *v6; // edi@2 - char *v7; // edx@4 - int v8; // ecx@6 - int v9; // esi@6 - int v10; // esi@6 - int v11; // ecx@6 - char *v12; // edx@7 - int v13; // esi@7 - - result = input; - v4 = &input[size]; - v5 = 8; - if (input < &input[size]) - { - v6 = output; - do - { - if (v5 == 8) - { - v7 = (char *)(uint8_t)*result++; - v5 = 0; - input = v7; - if (result == v4) - break; - } - if ((uint8_t)input & 1) - { - v8 = (uint8_t)*result; - v9 = (uint8_t)result[1]; - result += 2; - v10 = ((v8 & 0xF) << 8) + v9; - v11 = (v8 >> 4) + 4; - if (v11) - { - v12 = &v6[-v10]; - v13 = v11; - do - { - *v6++ = *v12++; - --v13; - } while (v13); - } - } - else - { - *v6++ = *result++; - } - ++v5; - input = (char *)((signed int)input >> 1); - } while (result < v4); - } - return result; -} +#define LOBYTE(x) (*((uint8_t *)&(x))) char decode_f2(char *input, int size, char *output) { @@ -141,17 +33,13 @@ char decode_f2(char *input, int size, char *output) { c_2 = ((c & 0xF) << 8) + c_1; c = (c >> 4) + 4; - if (c) - { - v10 = &v6[-c_2]; - c_3 = c; - do - { - LOBYTE(c) = *v10; - *v6++ = *v10++; - --c_3; - } while (c_3); - } + v10 = &v6[-c_2]; + c_3 = c; + do { + LOBYTE(c) = *v10; + *v6++ = *v10++; + --c_3; + } while (c_3); } else { @@ -168,215 +56,38 @@ char decode_f2(char *input, int size, char *output) return c; } -int decode_f3(char *input, int size, char *output) -{ - uint16_t s; // cx@1 - char *v4; // edi@1 - int result; // eax@1 - int idx; // edx@1 - bool v7; // zf@1 - bool v8; // sf@1 - uint8_t v9; // of@1 - int v10; // ebp@1 - char *v11; // ebx@2 - uint16_t v12; // ax@3 - uint8_t v13; // cl@4 - __int16 v14; // ax@6 - unsigned int v15; // esi@6 - void *v16; // edi@7 - int v17; // ebp@7 - int v18; // eax@7 - int v19; // edi@7 - int i; // ecx@7 - int v21; // [sp+8h] [bp-4h]@1 - int v22; // [sp+14h] [bp+8h]@1 - - LOBYTE(s) = 0; - v4 = input; - HIBYTE(s) = *input; - result = size >> 1; - idx = 1; - v9 = __OFSUB__(size >> 1, 1); - v7 = size >> 1 == 1; - v8 = (size >> 1) - 1 < 0; - v22 = size >> 1; - v10 = s; - v21 = s; - if (!((uint8_t)(v8 ^ v9) | v7)) - { - v11 = output; - do - { - v12 = *(_WORD *)&v4[2 * idx]; - if ((*(_WORD *)&v4[2 * idx] & 0xFF00) == (_WORD)v10) - { - v13 = *(_WORD *)&v4[2 * idx++]; - if (v12 != (uint16_t)v10 + 255) - { - v14 = *(_WORD *)&v4[2 * idx]; - v15 = v13 + 3; - if ((signed int)v15 > 0) - { - LOWORD(v10) = *(_WORD *)&v4[2 * idx]; - v16 = v11; - v11 += 2 * v15; - v17 = v10 << 16; - LOWORD(v17) = v14; - v18 = v17; - v10 = v21; - memset32(v16, v18, v15 >> 1); - v19 = (int)((char *)v16 + 4 * (v15 >> 1)); - for (i = v15 & 1; i; --i) - { - *(_WORD *)v19 = v18; - v19 += 2; - } - v4 = input; - } - goto LABEL_13; - } - else - { - *(_WORD *)v11 = *(_WORD *)&v4[2 * idx]; - } - } - else - { - *(_WORD *)v11 = v12; - } - v11 += 2; - LABEL_13: - result = v22; - ++idx; - } while (idx < v22); +template +auto decode_rle(const T *input, int size, T *output, auto f_cmp_indicator) { + if (size < 2) { + return (uint8_t *)output; } - return result; -} - -int decode_f4(char *input, int size, char *output, int segment_offset) -{ - char *in3; // edx@1 - int result; // eax@1 - char in1; // bl@1 - int ptr; // esi@1 - char *out1; // edi@2 - char c; // al@3 - char c_next; // cl@4 - char v11; // al@6 - unsigned int c_prev; // ebp@6 - int v13; // eax@7 - char in2; // [sp+1h] [bp-1h]@1 - - in3 = input; - result = size; - in1 = *input; - ptr = 1; - in2 = *input; - if (size > 1) - { - out1 = output; - while (1) - { - c = in3[ptr]; - if (c != in1) - break; - c_next = in3[ptr++ + 1]; - if (c_next == -1) - { - *out1 = in1; - LABEL_9: - ++out1; - goto LABEL_10; - } - v11 = in3[ptr++ + 1]; - c_prev = (uint8_t)c_next + 3; - if ((signed int)c_prev > 0) - { - LOBYTE(segment_offset) = v11; - BYTE1(segment_offset) = v11; - v13 = segment_offset << 16; - LOWORD(v13) = segment_offset; - in1 = in2; - memset32(out1, v13, c_prev >> 2); - in3 = input; - memset(&out1[4 * (c_prev >> 2)], v13, c_prev & 3); - out1 = &output[c_prev]; - LABEL_10: - output = out1; - } - result = size; - ++ptr; - if (ptr >= size) - return result; - } - *out1 = c; - goto LABEL_9; - } - return result; -} - -void decode_rle(const short *input, const int size, short *output) -{ - if (size < 2) - return; - - // input ptr, also rle_indicator - const auto rle_indicator = input++; - while (1) - { + const auto base = input; + const auto rle_indicator = (uint8_t)*input++; + while (1) { auto c = *input++; - if ((c & 0xFF00) != (*rle_indicator << 8)) + if (f_cmp_indicator(c) != rle_indicator) { *output++ = c; - else - { - uint32_t count = (uint8_t)c; - if (count == (*rle_indicator << 8) + 255) - *output++ = c; // insert indicator byte itself - else - { - count += 3; - for (int i = 0; i < count / 2; i++) - { - *output++ = *input; - *output++ = *input; - } - for (int i = 0; i < ((count / 2) & 1); i++) - { + } else { + uint32_t count = sizeof(T) == 1 ? *input++ : (c & 0xFF); + if (count == 0xFF) { + *output++ = sizeof(T) == 1 ? rle_indicator : *input++; + } else { + for (int i = 0; i < count + 3; i++) { *output++ = *input; } + ++input; } } - - if (input >= rle_indicator + size) - return; + if ((uint8_t *)input >= (uint8_t *)base + size) { + return (uint8_t *)output; + } } } -void decode_rle(const char *input, const int size, char *output) -{ - if (size < 2) - return; - - // input ptr, also rle_indicator - const auto rle_indicator = input++; - while (1) - { - auto c = *input++; - if (c != *rle_indicator) - *output++ = c; - else - { - uint32_t count = (uint8_t)*input++; - if (count == 255) - *output++ = *rle_indicator; // insert indicator byte itself - else - { - memset(output, *input++, count += 3); - output += count; - } - } - - if (input >= rle_indicator + size) - return; - } +auto decode_rle(const uint16_t *input, int size, uint16_t *output) { + return decode_rle(input, size, output, [](auto c){return c >> 8;}); +} + +auto decode_rle(const uint8_t *input, const int size, uint8_t *output) { + return decode_rle(input, size, output, [](auto c){return c;}); } diff --git a/src/unpaker/pak.cpp b/src/unpaker/pak.cpp index b0ecfb1..947efb4 100644 --- a/src/unpaker/pak.cpp +++ b/src/unpaker/pak.cpp @@ -96,7 +96,7 @@ int record::read(pak *pak, void *output, int size) void segment::load_header(FILE *f) { FREAD(unk1); - FREAD(algorithm); + FREAD(algorithms); FREAD(offset); } @@ -105,7 +105,7 @@ void segment::load_segment() auto f = file; fseek(f, offset, SEEK_SET); - /*if (algorithm == 0) + /*if (algorithms == 0) { std::cerr << "Something is wrong. Maybe you trying to open aim2 files?\n"; std::cerr << "They can be opened with SDK extractor.\n"; @@ -114,7 +114,7 @@ void segment::load_segment() FREAD(size1); size2 = size1; - if ((algorithm & 0x3) && (algorithm & 0xC)) + if (algorithms & DA_2) { FREAD(size2); fread(&decoded[0], 1, size2, f); @@ -129,50 +129,24 @@ void segment::decompress(int segment_id) { load_segment(); - if ((algorithm & DA_1) || (algorithm & DA_2)) - { - if (algorithm & DA_1) - // never called - decode_f1((char*)decoded, size2, (char*)encoded); - else - decode_f2((char*)decoded, size2, (char*)encoded); + if (algorithms & DA_2) { + decode_f2((char*)decoded, size2, (char*)encoded); } - if ((algorithm & RLE_1_byte) || (algorithm & RLE_2_bytes)) - { - if (algorithm & RLE_2_bytes) - { - decode_f3((char*)encoded, size1, (char*)decoded); - - /*static std::vector buf(4194432); - decode_f3((char*)encoded, size1, (char*)buf.data()); - decode_rle((short*)encoded, size1, (short*)decoded); - auto sz = 0; - while (sz++ < size1 - 1 && decoded[sz] == buf[sz]); - std::cout << "len = " << sz << "\n"; - assert(memcmp(decoded, buf.data(), size1) == 0);*/ - } - else - { - //decode_f4((char*)encoded, size1, (char*)decoded); - - /*static std::vector buf(4194432); - const int header_size = 0xC; - decode_f4((char*)encoded, size1, (char*)buf.data(), segment_id * header_size); - decode_rle((char*)encoded, size1, (char*)decoded); - assert(memcmp(decoded, buf.data(), size1) == 0);*/ - - decode_rle((char*)encoded, size1, (char*)decoded); - } + if (algorithms & RLE_2_bytes) { + decode_rle((uint16_t *)encoded, size1, (uint16_t *)decoded); + } else if (algorithms & RLE_1_byte) { + decode_rle((uint8_t *)encoded, size1, (uint8_t *)decoded); } - if (algorithm == None) + if (algorithms == None) { decoded = encoded; + } } void pak::load(FILE *f) { h.load(f); - encoded.resize(h.chunk_size * 256 + 128); - decoded.resize(h.chunk_size * 256 + 128); + encoded.resize(h.chunk_size * 4); + decoded.resize(h.chunk_size * 4); int n = h.number_of_files; while (n--) diff --git a/src/unpaker/pak.h b/src/unpaker/pak.h index 2ff20c6..27970d8 100644 --- a/src/unpaker/pak.h +++ b/src/unpaker/pak.h @@ -60,12 +60,12 @@ struct segment None = 0x0, RLE_2_bytes = 0x1, RLE_1_byte = 0x2, - DA_1 = 0x4, + DA_1 = 0x4, // not used DA_2 = 0x8, }; uint32_t unk1; // some file offset? trash? - decode_algorithm algorithm; + decode_algorithm algorithms; uint32_t offset; uint32_t size1; diff --git a/src/unpaker/unpaker.cpp b/src/unpaker/unpaker.cpp index eba912e..63ecfd0 100644 --- a/src/unpaker/unpaker.cpp +++ b/src/unpaker/unpaker.cpp @@ -16,10 +16,13 @@ * along with this program. If not, see . */ +#include #include #include "pak.h" +namespace fs = std::filesystem; + void unpak(string fn) { FILE *f = fopen(fn.c_str(), "rb"); @@ -45,9 +48,25 @@ int main(int argc, char *argv[]) { if (argc != 2) { - cerr << "Usage: " << argv[0] << " archive.pak" << "\n"; + cerr << "Usage: " << argv[0] << " " << "\n"; return 1; } - unpak(argv[1]); + fs::path p = argv[1]; + if (fs::is_regular_file(p)) { + unpak(p.string()); + } else if (fs::is_directory(p)) { + for (auto &&d : fs::directory_iterator{p}) { + if (d.path().extension() == ".pak") { + std::cout << "processing: " << d.path() << "\n"; + try { + unpak(d.path().string()); + } catch (std::exception &e) { + std::cerr << e.what() << "\n"; + } + } + } + } else { + throw std::runtime_error("Bad fs object"); + } return 0; } diff --git a/src/unpaker2/unpaker2.cpp b/src/unpaker2/unpaker2.cpp index 6587153..1c6e455 100644 --- a/src/unpaker2/unpaker2.cpp +++ b/src/unpaker2/unpaker2.cpp @@ -35,6 +35,8 @@ #include #include +#include "../unpaker/decode.h" + using namespace std; #pragma pack(push, 1) @@ -52,20 +54,28 @@ struct file_description { uint32_t size; }; struct segment { - enum decode_algorithm : uint32_t { - none = 0x0, - lzo = 0x1, - lzma = 0x2, - rlew = 0x4, // https://moddingwiki.shikadi.net/wiki/Id_Software_RLEW_compression - }; - // some file offset? trash? crc? m1 has zlib crc table (png)? uint32_t unk1; - decode_algorithm algorithm; + uint32_t algorithm; uint32_t offset; }; #pragma pack(pop) +struct progress_bar { + const size_t max_elements; + const int displaylen; + int displaycur{}; + int i{}; + + void step() { + auto progress_bar_pos = std::round((double)++i / max_elements * displaylen); + for (int i = displaycur; i < progress_bar_pos; ++i) { + std::cout << "#"; + } + displaycur = progress_bar_pos; + } +}; + void unpack_file(path fn) { primitives::templates2::mmap_file f{fn}; stream s{f}; @@ -75,82 +85,122 @@ void unpack_file(path fn) { std::vector decoded; decoded.resize((segments.size() + 1) * p.block_size * 4); auto pp = decoded.data(); - int displaylen = 50; - int seglen = segments.size() / displaylen == 0 ? 1 : segments.size() / displaylen; - for (int i = 0; auto &&seg : segments) { - if (i++ % seglen == 0) { - std::cout << "#"; - } + progress_bar pb{segments.size(), 50}; + for (auto &&seg : segments) { s.p = f.p + seg.offset; uint32_t len = s; - switch (seg.algorithm) { - case segment::decode_algorithm::none: { - memcpy(pp, s.p, len); - pp += len; - break; - } - case segment::decode_algorithm::lzo: { - size_t outsz; - // use lzo1x_decompress_safe? - auto r2 = lzo1x_decompress(s.p, len, pp, &outsz, 0); - if (r2 != LZO_E_OK) { - throw std::runtime_error{"lzo error"}; + auto m2 = [&]() { + enum decode_algorithm : uint32_t { + none = 0x0, + lzo = 0x1, + lzma = 0x2, + rlew = 0x4, // https://moddingwiki.shikadi.net/wiki/Id_Software_RLEW_compression + }; + switch (seg.algorithm) { + case decode_algorithm::none: { + memcpy(pp, s.p, len); + pp += len; + break; } - pp += outsz; - break; - } - case segment::decode_algorithm::rlew: { - auto base = s.p; - uint16_t flag = s; - while (s.p < base + len) { - uint16_t w = s; - if ((w & 0xFF00) == (flag << 8)) { - uint16_t count = (uint8_t)w; - if (count == 0xFF) { - uint16_t w2 = s; - *(decltype(w2) *)pp = w2; - pp += sizeof(w2); - continue; - } - uint16_t w2 = s; - count += 3; - while (count--) { - *(decltype(w2)*)pp = w2; - pp += sizeof(w2); - } - } else { - *(decltype(w)*)pp = w; - pp += sizeof(w); + case decode_algorithm::lzo: { + size_t outsz; + // use lzo1x_decompress_safe? + auto r2 = lzo1x_decompress(s.p, len, pp, &outsz, 0); + if (r2 != LZO_E_OK) { + throw std::runtime_error{"lzo error"}; } + pp += outsz; + break; } - break; - } - case segment::decode_algorithm::lzma: { - uint8_t flags = s; + case decode_algorithm::rlew: { + auto base = s.p; + uint16_t flag = s; + while (s.p < base + len) { + uint16_t w = s; + if ((w & 0xFF00) == (flag << 8)) { + uint16_t count = (uint8_t)w; + if (count == 0xFF) { + uint16_t w2 = s; + *(decltype(w2) *)pp = w2; + pp += sizeof(w2); + continue; + } + uint16_t w2 = s; + count += 3; + while (count--) { + *(decltype(w2)*)pp = w2; + pp += sizeof(w2); + } + } else { + *(decltype(w)*)pp = w; + pp += sizeof(w); + } + } + break; + } + case decode_algorithm::lzma: { + uint8_t flags = s; - lzma_stream strm{}; - strm.next_in = s.p; - strm.avail_in = len; - strm.next_out = pp; - strm.avail_out = p.block_size; + lzma_stream strm{}; + strm.next_in = s.p; + strm.avail_in = len; + strm.next_out = pp; + strm.avail_out = p.block_size; - auto r = lzma_lzip_decoder(&strm, 10'000'000, flags); - if (r != LZMA_OK) { - throw std::runtime_error{"lzma error"}; + auto r = lzma_lzip_decoder(&strm, 10'000'000, flags); + if (r != LZMA_OK) { + throw std::runtime_error{"lzma error"}; + } + r = lzma_code(&strm, LZMA_RUN); + if (r != LZMA_STREAM_END) { + throw std::runtime_error{"lzma error"}; + } + pp += strm.total_out; + break; } - r = lzma_code(&strm, LZMA_RUN); - if (r != LZMA_STREAM_END) { - throw std::runtime_error{"lzma error"}; + default: + throw std::runtime_error{"compression unsupported: "s + std::to_string(seg.algorithm)}; } - pp += strm.total_out; - break; - } - default: - throw std::runtime_error{"compression unsupported: "s + std::to_string(seg.algorithm)}; + }; + auto m1 = [&]() { + enum decode_algorithm : uint32_t { + None = 0x0, + RLE_2_bytes = 0x1, + RLE_1_byte = 0x2, + decode_algorithm_1 = 0x4, // not used + decode_algorithm_2 = 0x8, + }; + auto in = s.p; + auto size1 = len; + std::vector vec; + if (seg.algorithm & decode_algorithm_1) { + // if you see this, check in git history decode_f1() + throw std::runtime_error{"compression unsupported: "s + std::to_string(seg.algorithm)}; + } + if (seg.algorithm & decode_algorithm_2) { + uint32_t size2 = s; + vec.resize(std::max(size2 * 4, p.block_size)); + decode_f2((char *)s.p, size2, (char *)vec.data()); + in = vec.data(); + } + if (seg.algorithm & RLE_2_bytes) { + pp = decode_rle((uint16_t *)in, size1, (uint16_t *)pp); + } else if (seg.algorithm & RLE_1_byte) { + pp = decode_rle((uint8_t *)in, size1, (uint8_t *)pp); + } + if (seg.algorithm == None) { + //decoded = encoded; + } + }; + if (p.magic == 0) { + m1(); + } else { + m2(); } + pb.step(); } std::cout << "\n"; - auto dir = fn += ".dir2"; + auto dir = fn += ".dir"; fs::create_directories(dir); for (auto &&d : descs) { auto fn = dir / d.name; diff --git a/sw.cpp b/sw.cpp index a8c854f..c88c420 100644 --- a/sw.cpp +++ b/sw.cpp @@ -48,13 +48,12 @@ void build(Solution &s) add_exe_with_data_manager("mmo_extractor"); add_exe_with_common("mmp_extractor") += "org.sw.demo.intel.opencv.highgui"_dep; add_exe_with_common("mpj_loader"); + add_exe_with_common("paker"); add_exe_with_common("script2txt2"); add_exe_with_common("tm_converter"); add_exe("name_generator"); add_exe_with_common("save_loader"); - auto &unpaker = add_exe_base("unpaker"); // 32-bit only - if (unpaker.getBuildSettings().TargetOS.Arch != ArchType::x86) - unpaker.HeaderOnly = true; + add_exe("unpaker"); add_exe_with_common("unpaker2") += "org.sw.demo.oberhumer.lzo.lzo"_dep, "org.sw.demo.xz_utils.lzma"_dep