Unpaker updates.

This commit is contained in:
lzwdgc 2024-02-02 23:06:17 +03:00
parent 98beb2b4f5
commit 9146cf6900
6 changed files with 192 additions and 439 deletions

View file

@ -1,114 +1,6 @@
#pragma once
#ifdef _WIN64
#error "not available in 64-bit mode"
#endif
#define _BYTE uint8_t
#define _WORD uint16_t
#define _DWORD uint32_t
#define LOBYTE(x) (*((_BYTE *)&(x)))
#define LOWORD(x) (*((_WORD *)&(x)))
#define HIBYTE(x) (*((_BYTE *)&(x) + 1))
#define BYTEn(x, n) (*((_BYTE *)&(x) + n))
#define BYTE1(x) BYTEn(x, 1)
template <class T>
int8_t __SETS__(T x)
{
if (sizeof(T) == 1)
return int8_t(x) < 0;
if (sizeof(T) == 2)
return int16_t(x) < 0;
if (sizeof(T) == 4)
return int32_t(x) < 0;
return int64_t(x) < 0;
}
template <class T, class U>
int8_t __OFSUB__(T x, U y)
{
if (sizeof(T) < sizeof(U))
{
U x2 = x;
int8_t sx = __SETS__(x2);
return (sx ^ __SETS__(y)) & (sx ^ __SETS__(x2 - y));
}
else
{
T y2 = y;
int8_t sx = __SETS__(x);
return (sx ^ __SETS__(y2)) & (sx ^ __SETS__(x - y2));
}
}
inline void memset32(void *ptr, uint32_t value, int count)
{
uint32_t *p = (uint32_t *)ptr;
for (int i = 0; i < count; i++)
*p++ = value;
}
char *decode_f1(char *input, int size, char *output)
{
char *result; // eax@1
char *v4; // ebx@1
int v5; // ebp@1
char *v6; // edi@2
char *v7; // edx@4
int v8; // ecx@6
int v9; // esi@6
int v10; // esi@6
int v11; // ecx@6
char *v12; // edx@7
int v13; // esi@7
result = input;
v4 = &input[size];
v5 = 8;
if (input < &input[size])
{
v6 = output;
do
{
if (v5 == 8)
{
v7 = (char *)(uint8_t)*result++;
v5 = 0;
input = v7;
if (result == v4)
break;
}
if ((uint8_t)input & 1)
{
v8 = (uint8_t)*result;
v9 = (uint8_t)result[1];
result += 2;
v10 = ((v8 & 0xF) << 8) + v9;
v11 = (v8 >> 4) + 4;
if (v11)
{
v12 = &v6[-v10];
v13 = v11;
do
{
*v6++ = *v12++;
--v13;
} while (v13);
}
}
else
{
*v6++ = *result++;
}
++v5;
input = (char *)((signed int)input >> 1);
} while (result < v4);
}
return result;
}
#define LOBYTE(x) (*((uint8_t *)&(x)))
char decode_f2(char *input, int size, char *output)
{
@ -141,18 +33,14 @@ char decode_f2(char *input, int size, char *output)
{
c_2 = ((c & 0xF) << 8) + c_1;
c = (c >> 4) + 4;
if (c)
{
v10 = &v6[-c_2];
c_3 = c;
do
{
do {
LOBYTE(c) = *v10;
*v6++ = *v10++;
--c_3;
} while (c_3);
}
}
else
{
*v6++ = v4;
@ -168,215 +56,38 @@ char decode_f2(char *input, int size, char *output)
return c;
}
int decode_f3(char *input, int size, char *output)
{
uint16_t s; // cx@1
char *v4; // edi@1
int result; // eax@1
int idx; // edx@1
bool v7; // zf@1
bool v8; // sf@1
uint8_t v9; // of@1
int v10; // ebp@1
char *v11; // ebx@2
uint16_t v12; // ax@3
uint8_t v13; // cl@4
__int16 v14; // ax@6
unsigned int v15; // esi@6
void *v16; // edi@7
int v17; // ebp@7
int v18; // eax@7
int v19; // edi@7
int i; // ecx@7
int v21; // [sp+8h] [bp-4h]@1
int v22; // [sp+14h] [bp+8h]@1
LOBYTE(s) = 0;
v4 = input;
HIBYTE(s) = *input;
result = size >> 1;
idx = 1;
v9 = __OFSUB__(size >> 1, 1);
v7 = size >> 1 == 1;
v8 = (size >> 1) - 1 < 0;
v22 = size >> 1;
v10 = s;
v21 = s;
if (!((uint8_t)(v8 ^ v9) | v7))
{
v11 = output;
do
{
v12 = *(_WORD *)&v4[2 * idx];
if ((*(_WORD *)&v4[2 * idx] & 0xFF00) == (_WORD)v10)
{
v13 = *(_WORD *)&v4[2 * idx++];
if (v12 != (uint16_t)v10 + 255)
{
v14 = *(_WORD *)&v4[2 * idx];
v15 = v13 + 3;
if ((signed int)v15 > 0)
{
LOWORD(v10) = *(_WORD *)&v4[2 * idx];
v16 = v11;
v11 += 2 * v15;
v17 = v10 << 16;
LOWORD(v17) = v14;
v18 = v17;
v10 = v21;
memset32(v16, v18, v15 >> 1);
v19 = (int)((char *)v16 + 4 * (v15 >> 1));
for (i = v15 & 1; i; --i)
{
*(_WORD *)v19 = v18;
v19 += 2;
template <typename T>
auto decode_rle(const T *input, int size, T *output, auto f_cmp_indicator) {
if (size < 2) {
return (uint8_t *)output;
}
v4 = input;
}
goto LABEL_13;
}
else
{
*(_WORD *)v11 = *(_WORD *)&v4[2 * idx];
}
}
else
{
*(_WORD *)v11 = v12;
}
v11 += 2;
LABEL_13:
result = v22;
++idx;
} while (idx < v22);
}
return result;
}
int decode_f4(char *input, int size, char *output, int segment_offset)
{
char *in3; // edx@1
int result; // eax@1
char in1; // bl@1
int ptr; // esi@1
char *out1; // edi@2
char c; // al@3
char c_next; // cl@4
char v11; // al@6
unsigned int c_prev; // ebp@6
int v13; // eax@7
char in2; // [sp+1h] [bp-1h]@1
in3 = input;
result = size;
in1 = *input;
ptr = 1;
in2 = *input;
if (size > 1)
{
out1 = output;
while (1)
{
c = in3[ptr];
if (c != in1)
break;
c_next = in3[ptr++ + 1];
if (c_next == -1)
{
*out1 = in1;
LABEL_9:
++out1;
goto LABEL_10;
}
v11 = in3[ptr++ + 1];
c_prev = (uint8_t)c_next + 3;
if ((signed int)c_prev > 0)
{
LOBYTE(segment_offset) = v11;
BYTE1(segment_offset) = v11;
v13 = segment_offset << 16;
LOWORD(v13) = segment_offset;
in1 = in2;
memset32(out1, v13, c_prev >> 2);
in3 = input;
memset(&out1[4 * (c_prev >> 2)], v13, c_prev & 3);
out1 = &output[c_prev];
LABEL_10:
output = out1;
}
result = size;
++ptr;
if (ptr >= size)
return result;
}
*out1 = c;
goto LABEL_9;
}
return result;
}
void decode_rle(const short *input, const int size, short *output)
{
if (size < 2)
return;
// input ptr, also rle_indicator
const auto rle_indicator = input++;
while (1)
{
const auto base = input;
const auto rle_indicator = (uint8_t)*input++;
while (1) {
auto c = *input++;
if ((c & 0xFF00) != (*rle_indicator << 8))
if (f_cmp_indicator(c) != rle_indicator) {
*output++ = c;
else
{
uint32_t count = (uint8_t)c;
if (count == (*rle_indicator << 8) + 255)
*output++ = c; // insert indicator byte itself
else
{
count += 3;
for (int i = 0; i < count / 2; i++)
{
*output++ = *input;
} else {
uint32_t count = sizeof(T) == 1 ? *input++ : (c & 0xFF);
if (count == 0xFF) {
*output++ = sizeof(T) == 1 ? rle_indicator : *input++;
} else {
for (int i = 0; i < count + 3; i++) {
*output++ = *input;
}
for (int i = 0; i < ((count / 2) & 1); i++)
{
*output++ = *input;
++input;
}
}
if ((uint8_t *)input >= (uint8_t *)base + size) {
return (uint8_t *)output;
}
}
}
if (input >= rle_indicator + size)
return;
}
auto decode_rle(const uint16_t *input, int size, uint16_t *output) {
return decode_rle(input, size, output, [](auto c){return c >> 8;});
}
void decode_rle(const char *input, const int size, char *output)
{
if (size < 2)
return;
// input ptr, also rle_indicator
const auto rle_indicator = input++;
while (1)
{
auto c = *input++;
if (c != *rle_indicator)
*output++ = c;
else
{
uint32_t count = (uint8_t)*input++;
if (count == 255)
*output++ = *rle_indicator; // insert indicator byte itself
else
{
memset(output, *input++, count += 3);
output += count;
}
}
if (input >= rle_indicator + size)
return;
}
auto decode_rle(const uint8_t *input, const int size, uint8_t *output) {
return decode_rle(input, size, output, [](auto c){return c;});
}

View file

@ -96,7 +96,7 @@ int record::read(pak *pak, void *output, int size)
void segment::load_header(FILE *f)
{
FREAD(unk1);
FREAD(algorithm);
FREAD(algorithms);
FREAD(offset);
}
@ -105,7 +105,7 @@ void segment::load_segment()
auto f = file;
fseek(f, offset, SEEK_SET);
/*if (algorithm == 0)
/*if (algorithms == 0)
{
std::cerr << "Something is wrong. Maybe you trying to open aim2 files?\n";
std::cerr << "They can be opened with SDK extractor.\n";
@ -114,7 +114,7 @@ void segment::load_segment()
FREAD(size1);
size2 = size1;
if ((algorithm & 0x3) && (algorithm & 0xC))
if (algorithms & DA_2)
{
FREAD(size2);
fread(&decoded[0], 1, size2, f);
@ -129,50 +129,24 @@ void segment::decompress(int segment_id)
{
load_segment();
if ((algorithm & DA_1) || (algorithm & DA_2))
{
if (algorithm & DA_1)
// never called
decode_f1((char*)decoded, size2, (char*)encoded);
else
if (algorithms & DA_2) {
decode_f2((char*)decoded, size2, (char*)encoded);
}
if ((algorithm & RLE_1_byte) || (algorithm & RLE_2_bytes))
{
if (algorithm & RLE_2_bytes)
{
decode_f3((char*)encoded, size1, (char*)decoded);
/*static std::vector<uint8_t> buf(4194432);
decode_f3((char*)encoded, size1, (char*)buf.data());
decode_rle((short*)encoded, size1, (short*)decoded);
auto sz = 0;
while (sz++ < size1 - 1 && decoded[sz] == buf[sz]);
std::cout << "len = " << sz << "\n";
assert(memcmp(decoded, buf.data(), size1) == 0);*/
if (algorithms & RLE_2_bytes) {
decode_rle((uint16_t *)encoded, size1, (uint16_t *)decoded);
} else if (algorithms & RLE_1_byte) {
decode_rle((uint8_t *)encoded, size1, (uint8_t *)decoded);
}
else
{
//decode_f4((char*)encoded, size1, (char*)decoded);
/*static std::vector<uint8_t> buf(4194432);
const int header_size = 0xC;
decode_f4((char*)encoded, size1, (char*)buf.data(), segment_id * header_size);
decode_rle((char*)encoded, size1, (char*)decoded);
assert(memcmp(decoded, buf.data(), size1) == 0);*/
decode_rle((char*)encoded, size1, (char*)decoded);
}
}
if (algorithm == None)
if (algorithms == None) {
decoded = encoded;
}
}
void pak::load(FILE *f)
{
h.load(f);
encoded.resize(h.chunk_size * 256 + 128);
decoded.resize(h.chunk_size * 256 + 128);
encoded.resize(h.chunk_size * 4);
decoded.resize(h.chunk_size * 4);
int n = h.number_of_files;
while (n--)

View file

@ -60,12 +60,12 @@ struct segment
None = 0x0,
RLE_2_bytes = 0x1,
RLE_1_byte = 0x2,
DA_1 = 0x4,
DA_1 = 0x4, // not used
DA_2 = 0x8,
};
uint32_t unk1; // some file offset? trash?
decode_algorithm algorithm;
decode_algorithm algorithms;
uint32_t offset;
uint32_t size1;

View file

@ -16,10 +16,13 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <filesystem>
#include <iostream>
#include "pak.h"
namespace fs = std::filesystem;
void unpak(string fn)
{
FILE *f = fopen(fn.c_str(), "rb");
@ -45,9 +48,25 @@ int main(int argc, char *argv[])
{
if (argc != 2)
{
cerr << "Usage: " << argv[0] << " archive.pak" << "\n";
cerr << "Usage: " << argv[0] << " <archive.pak or dir>" << "\n";
return 1;
}
unpak(argv[1]);
fs::path p = argv[1];
if (fs::is_regular_file(p)) {
unpak(p.string());
} else if (fs::is_directory(p)) {
for (auto &&d : fs::directory_iterator{p}) {
if (d.path().extension() == ".pak") {
std::cout << "processing: " << d.path() << "\n";
try {
unpak(d.path().string());
} catch (std::exception &e) {
std::cerr << e.what() << "\n";
}
}
}
} else {
throw std::runtime_error("Bad fs object");
}
return 0;
}

View file

@ -35,6 +35,8 @@
#include <lzma.h>
#include <lzo/lzo1x.h>
#include "../unpaker/decode.h"
using namespace std;
#pragma pack(push, 1)
@ -52,20 +54,28 @@ struct file_description {
uint32_t size;
};
struct segment {
enum decode_algorithm : uint32_t {
none = 0x0,
lzo = 0x1,
lzma = 0x2,
rlew = 0x4, // https://moddingwiki.shikadi.net/wiki/Id_Software_RLEW_compression
};
// some file offset? trash? crc? m1 has zlib crc table (png)?
uint32_t unk1;
decode_algorithm algorithm;
uint32_t algorithm;
uint32_t offset;
};
#pragma pack(pop)
struct progress_bar {
const size_t max_elements;
const int displaylen;
int displaycur{};
int i{};
void step() {
auto progress_bar_pos = std::round((double)++i / max_elements * displaylen);
for (int i = displaycur; i < progress_bar_pos; ++i) {
std::cout << "#";
}
displaycur = progress_bar_pos;
}
};
void unpack_file(path fn) {
primitives::templates2::mmap_file<uint8_t> f{fn};
stream s{f};
@ -75,21 +85,24 @@ void unpack_file(path fn) {
std::vector<uint8_t> decoded;
decoded.resize((segments.size() + 1) * p.block_size * 4);
auto pp = decoded.data();
int displaylen = 50;
int seglen = segments.size() / displaylen == 0 ? 1 : segments.size() / displaylen;
for (int i = 0; auto &&seg : segments) {
if (i++ % seglen == 0) {
std::cout << "#";
}
progress_bar pb{segments.size(), 50};
for (auto &&seg : segments) {
s.p = f.p + seg.offset;
uint32_t len = s;
auto m2 = [&]() {
enum decode_algorithm : uint32_t {
none = 0x0,
lzo = 0x1,
lzma = 0x2,
rlew = 0x4, // https://moddingwiki.shikadi.net/wiki/Id_Software_RLEW_compression
};
switch (seg.algorithm) {
case segment::decode_algorithm::none: {
case decode_algorithm::none: {
memcpy(pp, s.p, len);
pp += len;
break;
}
case segment::decode_algorithm::lzo: {
case decode_algorithm::lzo: {
size_t outsz;
// use lzo1x_decompress_safe?
auto r2 = lzo1x_decompress(s.p, len, pp, &outsz, 0);
@ -99,7 +112,7 @@ void unpack_file(path fn) {
pp += outsz;
break;
}
case segment::decode_algorithm::rlew: {
case decode_algorithm::rlew: {
auto base = s.p;
uint16_t flag = s;
while (s.p < base + len) {
@ -125,7 +138,7 @@ void unpack_file(path fn) {
}
break;
}
case segment::decode_algorithm::lzma: {
case decode_algorithm::lzma: {
uint8_t flags = s;
lzma_stream strm{};
@ -148,9 +161,46 @@ void unpack_file(path fn) {
default:
throw std::runtime_error{"compression unsupported: "s + std::to_string(seg.algorithm)};
}
};
auto m1 = [&]() {
enum decode_algorithm : uint32_t {
None = 0x0,
RLE_2_bytes = 0x1,
RLE_1_byte = 0x2,
decode_algorithm_1 = 0x4, // not used
decode_algorithm_2 = 0x8,
};
auto in = s.p;
auto size1 = len;
std::vector<uint8_t> vec;
if (seg.algorithm & decode_algorithm_1) {
// if you see this, check in git history decode_f1()
throw std::runtime_error{"compression unsupported: "s + std::to_string(seg.algorithm)};
}
if (seg.algorithm & decode_algorithm_2) {
uint32_t size2 = s;
vec.resize(std::max(size2 * 4, p.block_size));
decode_f2((char *)s.p, size2, (char *)vec.data());
in = vec.data();
}
if (seg.algorithm & RLE_2_bytes) {
pp = decode_rle((uint16_t *)in, size1, (uint16_t *)pp);
} else if (seg.algorithm & RLE_1_byte) {
pp = decode_rle((uint8_t *)in, size1, (uint8_t *)pp);
}
if (seg.algorithm == None) {
//decoded = encoded;
}
};
if (p.magic == 0) {
m1();
} else {
m2();
}
pb.step();
}
std::cout << "\n";
auto dir = fn += ".dir2";
auto dir = fn += ".dir";
fs::create_directories(dir);
for (auto &&d : descs) {
auto fn = dir / d.name;

5
sw.cpp
View file

@ -48,13 +48,12 @@ void build(Solution &s)
add_exe_with_data_manager("mmo_extractor");
add_exe_with_common("mmp_extractor") += "org.sw.demo.intel.opencv.highgui"_dep;
add_exe_with_common("mpj_loader");
add_exe_with_common("paker");
add_exe_with_common("script2txt2");
add_exe_with_common("tm_converter");
add_exe("name_generator");
add_exe_with_common("save_loader");
auto &unpaker = add_exe_base("unpaker"); // 32-bit only
if (unpaker.getBuildSettings().TargetOS.Arch != ArchType::x86)
unpaker.HeaderOnly = true;
add_exe("unpaker");
add_exe_with_common("unpaker2") +=
"org.sw.demo.oberhumer.lzo.lzo"_dep,
"org.sw.demo.xz_utils.lzma"_dep