From f86e9e9f2a103335521f491fc50b838475aeb61b Mon Sep 17 00:00:00 2001 From: lzwdgc Date: Sun, 30 Jul 2017 03:00:41 +0300 Subject: [PATCH] Add db_add_language tool. --- src/CMakeLists.txt | 11 +- src/common/buffer.cpp | 17 -- src/db_add_language/db_add_language.cpp | 264 ++++++++++++++++++++++++ src/db_extractor/db.cpp | 38 +++- src/db_extractor/db.h | 15 +- src/db_extractor/db_extractor.cpp | 37 +--- 6 files changed, 324 insertions(+), 58 deletions(-) create mode 100644 src/db_add_language/db_add_language.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 415ba5e..33dac71 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,12 +8,19 @@ target_link_libraries(common add_dependencies(common version) if (WIN32) -file(GLOB unpaker_src "unpaker/*") -add_executable(unpaker ${unpaker_src}) +file(GLOB db_add_language_src "db_add_language/*") +add_executable(db_add_language ${db_add_language_src} db_extractor/db.cpp) +target_link_libraries(db_add_language + common + pvt.lzwdgc.polygon4.data_manager.data_manager +) file(GLOB db_extractor_src "db_extractor/*") add_executable(db_extractor ${db_extractor_src}) target_link_libraries(db_extractor common) + +file(GLOB unpaker_src "unpaker/*") +add_executable(unpaker ${unpaker_src}) endif() file(GLOB mmm_extractor_src "mmm_extractor/*") diff --git a/src/common/buffer.cpp b/src/common/buffer.cpp index 0f76d47..89d46da 100644 --- a/src/common/buffer.cpp +++ b/src/common/buffer.cpp @@ -36,23 +36,6 @@ std::string version() return s; } -std::vector readFile(const std::string &fn) -{ - FILE *f = fopen(fn.c_str(), "rb"); - if (!f) - { - printf("Cannot open file %s\n", fn.c_str()); - throw std::runtime_error("Cannot open file " + fn); - } - fseek(f, 0, SEEK_END); - auto sz = ftell(f); - fseek(f, 0, SEEK_SET); - std::vector buf(sz); - fread(buf.data(), 1, sz, f); - fclose(f); - return buf; -} - void writeFile(const std::string &fn, const std::vector &data) { FILE *f = fopen(fn.c_str(), "wb"); diff --git a/src/db_add_language/db_add_language.cpp b/src/db_add_language/db_add_language.cpp new file mode 100644 index 0000000..69f529b --- /dev/null +++ b/src/db_add_language/db_add_language.cpp @@ -0,0 +1,264 @@ +/* + * AIM db_extractor + * Copyright (C) 2017 lzwdgc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "../db_extractor/db.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace polygon4; +using namespace polygon4::detail; + +struct string_index +{ + std::wstring s; + IdType i = -1; + + string_index &operator=(const std::string &rhs) + { + s = string2wstring(str2utf8(rhs)); + return *this; + } +}; + +using AimKV = std::unordered_map; +using AimKVResolved = std::unordered_map; +AimKVResolved kv_resolved; + +template +int levenshtein_distance(const T &s1, const T &s2) +{ + // To change the type this function manipulates and returns, change + // the return type and the types of the two variables below. + int s1len = s1.size(); + int s2len = s2.size(); + + auto column_start = (decltype(s1len))1; + + auto column = new decltype(s1len)[s1len + 1]; + std::iota(column + column_start, column + s1len + 1, column_start); + + for (auto x = column_start; x <= s2len; x++) { + column[0] = x; + auto last_diagonal = x - column_start; + for (auto y = column_start; y <= s1len; y++) { + auto old_diagonal = column[y]; + auto possibilities = { + column[y] + 1, + column[y - 1] + 1, + last_diagonal + (s1[y - 1] == s2[x - 1] ? 0 : 1) + }; + column[y] = std::min(possibilities); + last_diagonal = old_diagonal; + } + } + auto result = column[s1len]; + delete[] column; + return result; +} + +auto open(const path &p) +{ + db db; + if (fs::exists(p / "quest.dat")) + db.open(p / "quest"); + return db; +}; + +AimKV get_kv(const db &db) +{ + auto iter_tbl = std::find_if(db.t.tables.begin(), db.t.tables.end(), [](auto &t) { + return t.second.name == "INFORMATION"; + }); + if (iter_tbl == db.t.tables.end()) + throw std::runtime_error("Table INFORMATION was not found"); + + auto find_field = [&db, &iter_tbl](const std::string &name) + { + auto i = std::find_if(db.t.fields.begin(), db.t.fields.end(), [&iter_tbl, &name](auto &t) { + return t.second.table_id == iter_tbl->second.id && t.second.name == name; + }); + if (i == db.t.fields.end()) + throw std::runtime_error("Field " + name + " was not found"); + return i->first; + }; + auto nid = find_field("NAME"); + auto tid = find_field("TEXT"); + + AimKV kv; + for (auto &v : db.values) + { + if (v.table_id != iter_tbl->second.id || v.name.empty()) + continue; + for (auto &f : v.fields) + { + if ((f.field_id == nid || f.field_id == tid) && !f.s.empty()) + kv[v.name] = f.s; + } + } + return kv; +} + +AimKVResolved get_kv_resolved(const path &d, const Storage &storage) +{ + static const auto fn = "kv.resolved"; + + AimKVResolved mres; + if (fs::exists(fn)) + { + std::ifstream f(fn); + std::string s; + IdType i; + while (f) + { + f >> std::quoted(s); + if (!f) + break; + f >> i; + mres[s] = i; + } + } + else + { + auto db1 = open(d / "ru" / "aim1"); + auto db2 = open(d / "ru" / "aim2"); + + auto kv1 = get_kv(db1); + auto kv2 = get_kv(db2); + kv1.insert(kv2.begin(), kv2.end()); + auto sz = kv1.size(); + std::cout << "total kvs: " << sz << "\n"; + + Executor e; + int i = 0; + for (auto &kv : kv1) + { + e.push([&storage, &i, &sz, &kv]() + { + std::cout << "total kvs: " << ++i << "/" << sz << "\n"; + std::map m; + for (auto &s : storage.strings) + m[levenshtein_distance(kv.second.s, s.second->string.ru)] = s.first; + if (m.empty()) + return; + kv.second.i = m.begin()->second; + }); + } + e.wait(); + + std::ofstream f(fn); + for (auto &kv : kv1) + { + mres[kv.first] = kv.second.i; + f << std::quoted(kv.first) << " " << kv.second.i << "\n"; + } + } + return mres; +} + +void process_lang(Storage &s, const path &p, polygon4::String polygon4::LocalizedString::*field) +{ + auto db1 = open(p); + auto db2 = open(p / "aim1"); + auto db3 = open(p / "aim2"); + + AimKV kvm; + auto get_kv = [&kvm](auto &db) + { + AimKV kv1; + if (db.number_of_values) + { + kv1 = ::get_kv(db); + kvm.insert(kv1.begin(), kv1.end()); + } + }; + get_kv(db1); + get_kv(db2); + get_kv(db3); + + std::string str; + for (auto &kv : kvm) + { + auto i = kv_resolved.find(kv.first); + if (i == kv_resolved.end()) + continue; + auto &sold = s.strings[i->second]->string.*field; + //sold = kv.second.s; + str += "id: " + std::to_string(i->second) + "\n\n"; + str += "old:\n"; + str += wstring2string(sold) + "\n"; + str += "\n"; + str += "new:\n"; + str += wstring2string(kv.second.s) + "\n"; + str += "\n================================================\n\n"; + } + write_file(p / (p.filename().string() + "_diff.txt"), str); +} + +int main(int argc, char *argv[]) +try +{ + if (argc != 3) + { + std::cout << "Usage: prog db.sqlite dir_to_lang_dbs" << "\n"; + return 1; + } + path d = argv[2]; + + auto storage = initStorage(argv[1]); + storage->load(); + kv_resolved = get_kv_resolved(d, *storage.get()); + + for (auto &f : boost::make_iterator_range(fs::directory_iterator(d), {})) + { + if (!fs::is_directory(f)) + continue; + + auto p = f.path(); + + if (0); +#define ADD_LANGUAGE(l, n) else if (p.filename() == #l && p.filename() != "ru") \ + {process_lang(*storage.get(), p, &polygon4::LocalizedString::l);} +#include +#undef ADD_LANGUAGE + else + { + std::cerr << "No such lang: " << p.filename().string() << "\n"; + continue; + } + } + + return 0; +} +catch (std::exception &e) +{ + printf("error: %s\n", e.what()); + return 1; +} +catch (...) +{ + printf("error: unknown exception\n"); + return 1; +} diff --git a/src/db_extractor/db.cpp b/src/db_extractor/db.cpp index e3c6da0..f92b040 100644 --- a/src/db_extractor/db.cpp +++ b/src/db_extractor/db.cpp @@ -20,6 +20,8 @@ #include +#include + string getSqlType(FieldType type) { switch (type) @@ -45,6 +47,8 @@ void table::load(const buffer &b) void field::load(const buffer &b) { + if (b.eof()) + return; READ(b, table_id); READ(b, id); READ_STRING(b, name); @@ -69,6 +73,8 @@ void tab::load(const buffer &b) { field t; t.load(b); + if (t.table_id == -1) + continue; fields[t.id] = t; } } @@ -104,6 +110,8 @@ void value::load_fields(const tab &tab, buffer &b) case FieldType::String: fv.s.resize(fv.size); READ_N(data, fv.s[0], fv.s.size()); + while (!fv.s.empty() && fv.s.back() == '\0') + fv.s.resize(fv.s.size() - 1); break; case FieldType::Integer: READ(data, fv.i); @@ -133,4 +141,32 @@ void db::load(const buffer &b) t.load_index(b); values.push_back(t); } -} \ No newline at end of file +} + +void db::open(const path &p) +{ + std::string fn = p.string(); + t.load(buffer(read_file(fn + ".tab"))); + load(buffer(read_file(fn + ".ind"))); + buffer b(read_file(fn + ".dat")); + for (auto &v : values) + v.load_fields(t, b); +} + +std::string str2utf8(const std::string &codepage_str) +{ + int size = MultiByteToWideChar(CP_ACP, MB_COMPOSITE, codepage_str.c_str(), + codepage_str.length(), nullptr, 0); + std::wstring utf16_str(size, '\0'); + MultiByteToWideChar(CP_ACP, MB_COMPOSITE, codepage_str.c_str(), + codepage_str.length(), &utf16_str[0], size); + + int utf8_size = WideCharToMultiByte(CP_UTF8, 0, utf16_str.c_str(), + utf16_str.length(), nullptr, 0, + nullptr, nullptr); + std::string utf8_str(utf8_size, '\0'); + WideCharToMultiByte(CP_UTF8, 0, utf16_str.c_str(), + utf16_str.length(), &utf8_str[0], utf8_size, + nullptr, nullptr); + return utf8_str; +} diff --git a/src/db_extractor/db.h b/src/db_extractor/db.h index 6d927cd..ecf176b 100644 --- a/src/db_extractor/db.h +++ b/src/db_extractor/db.h @@ -16,6 +16,10 @@ * along with this program. If not, see . */ +#include + +#include + #include #include #include @@ -24,8 +28,6 @@ #include #include -#include - using namespace std; enum class FieldType : uint32_t @@ -47,7 +49,7 @@ struct table struct field { - uint32_t table_id; + uint32_t table_id = -1; uint32_t id; std::string name; FieldType type; @@ -90,10 +92,13 @@ struct value struct db { - uint32_t number_of_values; + uint32_t number_of_values = 0; tab t; vector values; void load(const buffer &b); -}; \ No newline at end of file + void open(const path &p); +}; + +std::string str2utf8(const std::string &codepage_str); diff --git a/src/db_extractor/db_extractor.cpp b/src/db_extractor/db_extractor.cpp index 6c097cc..a424331 100644 --- a/src/db_extractor/db_extractor.cpp +++ b/src/db_extractor/db_extractor.cpp @@ -20,36 +20,7 @@ #include -#include #include -#include - -void open_db(string path, db &db) -{ - db.t.load(buffer(read_file(path + ".tab"))); - db.load(buffer(read_file(path + ".ind"))); - buffer b(read_file(path + ".dat")); - for (auto &v : db.values) - v.load_fields(db.t, b); -} - -string str2utf8(string codepage_str) -{ - int size = MultiByteToWideChar(CP_ACP, MB_COMPOSITE, codepage_str.c_str(), - codepage_str.length(), nullptr, 0); - std::wstring utf16_str(size, '\0'); - MultiByteToWideChar(CP_ACP, MB_COMPOSITE, codepage_str.c_str(), - codepage_str.length(), &utf16_str[0], size); - - int utf8_size = WideCharToMultiByte(CP_UTF8, 0, utf16_str.c_str(), - utf16_str.length(), nullptr, 0, - nullptr, nullptr); - std::string utf8_str(utf8_size, '\0'); - WideCharToMultiByte(CP_UTF8, 0, utf16_str.c_str(), - utf16_str.length(), &utf8_str[0], utf8_size, - nullptr, nullptr); - return utf8_str; -} void create_sql(string path, const db &db) { @@ -157,10 +128,10 @@ try cout << "Usage:\n" << argv[0] << " path/to/aim_game/data/db" << "\n" << argv[0] << " path/to/aim_game/data/quest" << "\n"; return 1; } - string path = argv[1]; + path p = argv[1]; db db; - open_db(path, db); - create_sql(path, db); + db.open(p); + create_sql(p.string(), db); return 0; } catch (std::exception &e) @@ -172,4 +143,4 @@ catch (...) { printf("error: unknown exception\n"); return 1; -} \ No newline at end of file +}