From f86e9e9f2a103335521f491fc50b838475aeb61b Mon Sep 17 00:00:00 2001
From: lzwdgc <lzwdgc@gmail.com>
Date: Sun, 30 Jul 2017 03:00:41 +0300
Subject: [PATCH] Add db_add_language tool.

---
 src/CMakeLists.txt                      |  11 +-
 src/common/buffer.cpp                   |  17 --
 src/db_add_language/db_add_language.cpp | 264 ++++++++++++++++++++++++
 src/db_extractor/db.cpp                 |  38 +++-
 src/db_extractor/db.h                   |  15 +-
 src/db_extractor/db_extractor.cpp       |  37 +---
 6 files changed, 324 insertions(+), 58 deletions(-)
 create mode 100644 src/db_add_language/db_add_language.cpp
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 415ba5e..33dac71 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -8,12 +8,19 @@ target_link_libraries(common
 add_dependencies(common version)
 
 if (WIN32)
-file(GLOB unpaker_src "unpaker/*")
-add_executable(unpaker ${unpaker_src})
+file(GLOB db_add_language_src "db_add_language/*")
+add_executable(db_add_language ${db_add_language_src} db_extractor/db.cpp)
+target_link_libraries(db_add_language
+    common
+    pvt.lzwdgc.polygon4.data_manager.data_manager
+)
 
 file(GLOB db_extractor_src "db_extractor/*")
 add_executable(db_extractor ${db_extractor_src})
 target_link_libraries(db_extractor common)
+
+file(GLOB unpaker_src "unpaker/*")
+add_executable(unpaker ${unpaker_src})
 endif()
 
 file(GLOB mmm_extractor_src "mmm_extractor/*")
diff --git a/src/common/buffer.cpp b/src/common/buffer.cpp
index 0f76d47..89d46da 100644
--- a/src/common/buffer.cpp
+++ b/src/common/buffer.cpp
@@ -36,23 +36,6 @@ std::string version()
     return s;
 }
 
-std::vector<uint8_t> readFile(const std::string &fn)
-{
-    FILE *f = fopen(fn.c_str(), "rb");
-    if (!f)
-    {
-        printf("Cannot open file %s\n", fn.c_str());
-        throw std::runtime_error("Cannot open file " + fn);
-    }
-    fseek(f, 0, SEEK_END);
-    auto sz = ftell(f);
-    fseek(f, 0, SEEK_SET);
-    std::vector<uint8_t> buf(sz);
-    fread(buf.data(), 1, sz, f);
-    fclose(f);
-    return buf;
-}
-
 void writeFile(const std::string &fn, const std::vector<uint8_t> &data)
 {
     FILE *f = fopen(fn.c_str(), "wb");
diff --git a/src/db_add_language/db_add_language.cpp b/src/db_add_language/db_add_language.cpp
new file mode 100644
index 0000000..69f529b
--- /dev/null
+++ b/src/db_add_language/db_add_language.cpp
@@ -0,0 +1,264 @@
+/*
+ * AIM db_extractor
+ * Copyright (C) 2017 lzwdgc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../db_extractor/db.h"
+
+#include <Polygon4/DataManager/Localization.h>
+#include <Polygon4/DataManager/Storage.h>
+#include <Polygon4/DataManager/Types.h>
+#include <primitives/filesystem.h>
+#include <primitives/executor.h>
+
+#include <iostream>
+#include <iomanip>
+#include <numeric>
+
+using namespace polygon4;
+using namespace polygon4::detail;
+
+struct string_index
+{
+    std::wstring s;
+    IdType i = -1;
+
+    string_index &operator=(const std::string &rhs)
+    {
+        s = string2wstring(str2utf8(rhs));
+        return *this;
+    }
+};
+
+using AimKV = std::unordered_map<std::string, string_index>;
+using AimKVResolved = std::unordered_map<std::string, IdType>;
+AimKVResolved kv_resolved;
+
+template <class T>
+int levenshtein_distance(const T &s1, const T &s2)
+{
+    // To change the type this function manipulates and returns, change
+    // the return type and the types of the two variables below.
+    int s1len = s1.size();
+    int s2len = s2.size();
+
+    auto column_start = (decltype(s1len))1;
+
+    auto column = new decltype(s1len)[s1len + 1];
+    std::iota(column + column_start, column + s1len + 1, column_start);
+
+    for (auto x = column_start; x <= s2len; x++) {
+        column[0] = x;
+        auto last_diagonal = x - column_start;
+        for (auto y = column_start; y <= s1len; y++) {
+            auto old_diagonal = column[y];
+            auto possibilities = {
+                column[y] + 1,
+                column[y - 1] + 1,
+                last_diagonal + (s1[y - 1] == s2[x - 1] ? 0 : 1)
+            };
+            column[y] = std::min(possibilities);
+            last_diagonal = old_diagonal;
+        }
+    }
+    auto result = column[s1len];
+    delete[] column;
+    return result;
+}
+
+auto open(const path &p)
+{
+    db db;
+    if (fs::exists(p / "quest.dat"))
+        db.open(p / "quest");
+    return db;
+};
+
+AimKV get_kv(const db &db)
+{
+    auto iter_tbl = std::find_if(db.t.tables.begin(), db.t.tables.end(), [](auto &t) {
+        return t.second.name == "INFORMATION";
+    });
+    if (iter_tbl == db.t.tables.end())
+        throw std::runtime_error("Table INFORMATION was not found");
+
+    auto find_field = [&db, &iter_tbl](const std::string &name)
+    {
+        auto i = std::find_if(db.t.fields.begin(), db.t.fields.end(), [&iter_tbl, &name](auto &t) {
+            return t.second.table_id == iter_tbl->second.id && t.second.name == name;
+        });
+        if (i == db.t.fields.end())
+            throw std::runtime_error("Field " + name + " was not found");
+        return i->first;
+    };
+    auto nid = find_field("NAME");
+    auto tid = find_field("TEXT");
+
+    AimKV kv;
+    for (auto &v : db.values)
+    {
+        if (v.table_id != iter_tbl->second.id || v.name.empty())
+            continue;
+        for (auto &f : v.fields)
+        {
+            if ((f.field_id == nid || f.field_id == tid) && !f.s.empty())
+                kv[v.name] = f.s;
+        }
+    }
+    return kv;
+}
+
+AimKVResolved get_kv_resolved(const path &d, const Storage &storage)
+{
+    static const auto fn = "kv.resolved";
+
+    AimKVResolved mres;
+    if (fs::exists(fn))
+    {
+        std::ifstream f(fn);
+        std::string s;
+        IdType i;
+        while (f)
+        {
+            f >> std::quoted(s);
+            if (!f)
+                break;
+            f >> i;
+            mres[s] = i;
+        }
+    }
+    else
+    {
+        auto db1 = open(d / "ru" / "aim1");
+        auto db2 = open(d / "ru" / "aim2");
+
+        auto kv1 = get_kv(db1);
+        auto kv2 = get_kv(db2);
+        kv1.insert(kv2.begin(), kv2.end());
+        auto sz = kv1.size();
+        std::cout << "total kvs: " << sz << "\n";
+
+        Executor e;
+        int i = 0;
+        for (auto &kv : kv1)
+        {
+            e.push([&storage, &i, &sz, &kv]()
+            {
+                std::cout << "total kvs: " << ++i << "/" << sz << "\n";
+                std::map<int, IdType> m;
+                for (auto &s : storage.strings)
+                    m[levenshtein_distance<std::wstring>(kv.second.s, s.second->string.ru)] = s.first;
+                if (m.empty())
+                    return;
+                kv.second.i = m.begin()->second;
+            });
+        }
+        e.wait();
+
+        std::ofstream f(fn);
+        for (auto &kv : kv1)
+        {
+            mres[kv.first] = kv.second.i;
+            f << std::quoted(kv.first) << " " << kv.second.i << "\n";
+        }
+    }
+    return mres;
+}
+
+void process_lang(Storage &s, const path &p, polygon4::String polygon4::LocalizedString::*field)
+{
+    auto db1 = open(p);
+    auto db2 = open(p / "aim1");
+    auto db3 = open(p / "aim2");
+
+    AimKV kvm;
+    auto get_kv = [&kvm](auto &db)
+    {
+        AimKV kv1;
+        if (db.number_of_values)
+        {
+            kv1 = ::get_kv(db);
+            kvm.insert(kv1.begin(), kv1.end());
+        }
+    };
+    get_kv(db1);
+    get_kv(db2);
+    get_kv(db3);
+
+    std::string str;
+    for (auto &kv : kvm)
+    {
+        auto i = kv_resolved.find(kv.first);
+        if (i == kv_resolved.end())
+            continue;
+        auto &sold = s.strings[i->second]->string.*field;
+        //sold = kv.second.s;
+        str += "id: " + std::to_string(i->second) + "\n\n";
+        str += "old:\n";
+        str += wstring2string(sold) + "\n";
+        str += "\n";
+        str += "new:\n";
+        str += wstring2string(kv.second.s) + "\n";
+        str += "\n================================================\n\n";
+    }
+    write_file(p / (p.filename().string() + "_diff.txt"), str);
+}
+
+int main(int argc, char *argv[])
+try
+{
+    if (argc != 3)
+    {
+        std::cout << "Usage: prog db.sqlite dir_to_lang_dbs" << "\n";
+        return 1;
+    }
+    path d = argv[2];
+
+    auto storage = initStorage(argv[1]);
+    storage->load();
+    kv_resolved = get_kv_resolved(d, *storage.get());
+
+    for (auto &f : boost::make_iterator_range(fs::directory_iterator(d), {}))
+    {
+        if (!fs::is_directory(f))
+            continue;
+
+        auto p = f.path();
+
+        if (0);
+#define ADD_LANGUAGE(l, n) else if (p.filename() == #l && p.filename() != "ru") \
+    {process_lang(*storage.get(), p, &polygon4::LocalizedString::l);}
+#include <Polygon4/DataManager/Languages.inl>
+#undef ADD_LANGUAGE
+        else
+        {
+            std::cerr << "No such lang: " << p.filename().string() << "\n";
+            continue;
+        }
+    }
+
+    return 0;
+}
+catch (std::exception &e)
+{
+    printf("error: %s\n", e.what());
+    return 1;
+}
+catch (...)
+{
+    printf("error: unknown exception\n");
+    return 1;
+}
diff --git a/src/db_extractor/db.cpp b/src/db_extractor/db.cpp
index e3c6da0..f92b040 100644
--- a/src/db_extractor/db.cpp
+++ b/src/db_extractor/db.cpp
@@ -20,6 +20,8 @@
 
 #include <buffer.h>
 
+#include <Windows.h>
+
 string getSqlType(FieldType type)
 {
     switch (type)
@@ -45,6 +47,8 @@ void table::load(const buffer &b)
 
 void field::load(const buffer &b)
 {
+    if (b.eof())
+        return;
     READ(b, table_id);
     READ(b, id);
     READ_STRING(b, name);
@@ -69,6 +73,8 @@ void tab::load(const buffer &b)
     {
         field t;
         t.load(b);
+        if (t.table_id == -1)
+            continue;
         fields[t.id] = t;
     }
 }
@@ -104,6 +110,8 @@ void value::load_fields(const tab &tab, buffer &b)
         case FieldType::String:
             fv.s.resize(fv.size);
             READ_N(data, fv.s[0], fv.s.size());
+            while (!fv.s.empty() && fv.s.back() == '\0')
+                fv.s.resize(fv.s.size() - 1);
             break;
         case FieldType::Integer:
             READ(data, fv.i);
@@ -133,4 +141,32 @@ void db::load(const buffer &b)
         t.load_index(b);
         values.push_back(t);
     }
-}
\ No newline at end of file
+}
+
+void db::open(const path &p)
+{
+    std::string fn = p.string();
+    t.load(buffer(read_file(fn + ".tab")));
+    load(buffer(read_file(fn + ".ind")));
+    buffer b(read_file(fn + ".dat"));
+    for (auto &v : values)
+        v.load_fields(t, b);
+}
+
+std::string str2utf8(const std::string &codepage_str)
+{
+    int size = MultiByteToWideChar(CP_ACP, MB_COMPOSITE, codepage_str.c_str(),
+        codepage_str.length(), nullptr, 0);
+    std::wstring utf16_str(size, '\0');
+    MultiByteToWideChar(CP_ACP, MB_COMPOSITE, codepage_str.c_str(),
+        codepage_str.length(), &utf16_str[0], size);
+
+    int utf8_size = WideCharToMultiByte(CP_UTF8, 0, utf16_str.c_str(),
+        utf16_str.length(), nullptr, 0,
+        nullptr, nullptr);
+    std::string utf8_str(utf8_size, '\0');
+    WideCharToMultiByte(CP_UTF8, 0, utf16_str.c_str(),
+        utf16_str.length(), &utf8_str[0], utf8_size,
+        nullptr, nullptr);
+    return utf8_str;
+}
diff --git a/src/db_extractor/db.h b/src/db_extractor/db.h
index 6d927cd..ecf176b 100644
--- a/src/db_extractor/db.h
+++ b/src/db_extractor/db.h
@@ -16,6 +16,10 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <buffer.h>
+
+#include <primitives/filesystem.h>
+
 #include <assert.h>
 #include <iostream>
 #include <map>
@@ -24,8 +28,6 @@
 #include <string>
 #include <vector>
 
-#include <buffer.h>
-
 using namespace std;
 
 enum class FieldType : uint32_t
@@ -47,7 +49,7 @@ struct table
 
 struct field
 {
-    uint32_t table_id;
+    uint32_t table_id = -1;
     uint32_t id;
     std::string name;
     FieldType type;
@@ -90,10 +92,13 @@ struct value
 
 struct db
 {
-    uint32_t number_of_values;
+    uint32_t number_of_values = 0;
 
     tab t;
     vector<value> values;
 
     void load(const buffer &b);
-};
\ No newline at end of file
+    void open(const path &p);
+};
+
+std::string str2utf8(const std::string &codepage_str);
diff --git a/src/db_extractor/db_extractor.cpp b/src/db_extractor/db_extractor.cpp
index 6c097cc..a424331 100644
--- a/src/db_extractor/db_extractor.cpp
+++ b/src/db_extractor/db_extractor.cpp
@@ -20,36 +20,7 @@
 
 #include <fstream>
 
-#include <Windows.h>
 #include <buffer.h>
-#include <primitives/filesystem.h>
-
-void open_db(string path, db &db)
-{
-    db.t.load(buffer(read_file(path + ".tab")));
-    db.load(buffer(read_file(path + ".ind")));
-    buffer b(read_file(path + ".dat"));
-    for (auto &v : db.values)
-        v.load_fields(db.t, b);
-}
-
-string str2utf8(string codepage_str)
-{
-    int size = MultiByteToWideChar(CP_ACP, MB_COMPOSITE, codepage_str.c_str(),
-                                   codepage_str.length(), nullptr, 0);
-    std::wstring utf16_str(size, '\0');
-    MultiByteToWideChar(CP_ACP, MB_COMPOSITE, codepage_str.c_str(),
-                        codepage_str.length(), &utf16_str[0], size);
-
-    int utf8_size = WideCharToMultiByte(CP_UTF8, 0, utf16_str.c_str(),
-                                        utf16_str.length(), nullptr, 0,
-                                        nullptr, nullptr);
-    std::string utf8_str(utf8_size, '\0');
-    WideCharToMultiByte(CP_UTF8, 0, utf16_str.c_str(),
-                        utf16_str.length(), &utf8_str[0], utf8_size,
-                        nullptr, nullptr);
-    return utf8_str;
-}
 
 void create_sql(string path, const db &db)
 {
@@ -157,10 +128,10 @@ try
         cout << "Usage:\n" << argv[0] << " path/to/aim_game/data/db" << "\n" << argv[0] << " path/to/aim_game/data/quest" << "\n";
         return 1;
     }
-    string path = argv[1];
+    path p = argv[1];
     db db;
-    open_db(path, db);
-    create_sql(path, db);
+    db.open(p);
+    create_sql(p.string(), db);
     return 0;
 }
 catch (std::exception &e)
@@ -172,4 +143,4 @@ catch (...)
 {
     printf("error: unknown exception\n");
     return 1;
-}
\ No newline at end of file
+}