Respect codepages.

This commit is contained in:
lzwdgc 2024-04-03 15:36:37 +03:00
parent a8c8665ba1
commit 8ca71c62ab
7 changed files with 42 additions and 37 deletions

View file

@ -85,7 +85,7 @@ struct mod_maker {
} }
} }
void write() { void write() {
m.save(fn); m.save(fn, codepage);
written = true; written = true;
} }
auto &operator[](this auto &&d, const std::string &s) { auto &operator[](this auto &&d, const std::string &s) {
@ -475,7 +475,7 @@ struct mod_maker {
if (!aim2_available()) { if (!aim2_available()) {
throw std::runtime_error{"aim2 is not available, setup it first"}; throw std::runtime_error{"aim2 is not available, setup it first"};
} }
static auto m2 = db2{aim2_game_dir / "data" / "db", 1251}.open().to_map(); static auto m2 = db2{aim2_game_dir / "data" / "db"}.open().to_map(1251);
return m2; return m2;
} }
@ -489,7 +489,7 @@ private:
if (!aim2_available()) { if (!aim2_available()) {
throw std::runtime_error{"aim2 is not available, setup it first"}; throw std::runtime_error{"aim2 is not available, setup it first"};
} }
static auto m2 = db2{aim2_game_dir / "data" / "quest", 1251}.open().to_map(); static auto m2 = db2{aim2_game_dir / "data" / "quest"}.open().to_map(1251);
return m2; return m2;
} }
bool aim2_available() const { bool aim2_available() const {
@ -503,16 +503,16 @@ private:
return backup; return backup;
} }
db_wrapper open_db(auto &&name, int db_codepage) { db_wrapper open_db(auto &&name, int db_codepage) {
auto d = db2{get_data_dir() / name, db_codepage}; auto d = db2{get_data_dir() / name};
auto files = d.open().get_files(); auto files = d.open().get_files();
for (auto &&f : files) { for (auto &&f : files) {
backup_or_restore_once(f); backup_or_restore_once(f);
files_to_distribute.insert(f); files_to_distribute.insert(f);
} }
db_wrapper w; db_wrapper w;
w.m = d.open().to_map(); w.m = d.open().to_map(db_codepage);
w.fn = d.fn; w.fn = d.fn;
w.codepage = d.codepage; w.codepage = db_codepage;
return w; return w;
} }
void backup_or_restore_once(const path &fn) { void backup_or_restore_once(const path &fn) {

View file

@ -18,11 +18,25 @@
#pragma once #pragma once
#include <string>
#include <iostream> #include <iostream>
#include <map>
#include <string>
std::string str2utf8(const std::string &codepage_str, int cp = 0); // MultiByteToWideChar: https://msdn.microsoft.com/en-us/library/windows/desktop/dd319072(v=vs.85).aspx
std::wstring str2utf16(const std::string &codepage_str, int cp = 0); // code pages: https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756(v=vs.85).aspx
// https://www.ibm.com/docs/en/rational-soft-arch/9.6.1?topic=overview-locales-code-pages-supported
static const std::map<std::string, int> code_pages
{
{ "en", 0 },
{ "cz", 1250 },
{ "ru", 1251 },
{ "ge", 1252 },
{ "fr", 1252 },
{ "et", 1257 },
};
std::string str2utf8(const std::string &codepage_str, int cp);
std::wstring str2utf16(const std::string &codepage_str, int cp);
std::string str2str(const std::string &codepage_str, int cp_from, int cp_to); std::string str2str(const std::string &codepage_str, int cp_from, int cp_to);

View file

@ -147,11 +147,11 @@ void db::open(const path &p)
v.load_fields(t, b); v.load_fields(t, b);
} }
polygon4::tools::db::processed_db db::process() const polygon4::tools::db::processed_db db::process(int cp) const
{ {
auto process_string = [](const std::string &s) auto process_string = [&](const std::string &s)
{ {
return str2utf8(s.c_str()); return str2utf8(s.c_str(), cp);
}; };
polygon4::tools::db::processed_db pdb; polygon4::tools::db::processed_db pdb;

View file

@ -107,5 +107,5 @@ struct db
void load(const buffer &b); void load(const buffer &b);
void open(const path &basename); void open(const path &basename);
polygon4::tools::db::processed_db process() const; polygon4::tools::db::processed_db process(int cp) const;
}; };

View file

@ -25,13 +25,13 @@
#include <variant> #include <variant>
std::string utf8_to_dbstr(const char8_t *s, int codepage = 1251) { std::string utf8_to_dbstr(const char8_t *s, int codepage) {
return str2str((const char *)s, CP_UTF8, codepage); return str2str((const char *)s, CP_UTF8, codepage);
} }
std::string utf8_to_dbstr(const char *s, int codepage = 1251) { std::string utf8_to_dbstr(const char *s, int codepage) {
return utf8_to_dbstr((const char8_t *)s, codepage); return utf8_to_dbstr((const char8_t *)s, codepage);
} }
std::string utf8_to_dbstr(const std::string &s, int codepage = 1251) { std::string utf8_to_dbstr(const std::string &s, int codepage) {
return utf8_to_dbstr((const char8_t *)s.c_str(), codepage); return utf8_to_dbstr((const char8_t *)s.c_str(), codepage);
} }
@ -149,7 +149,6 @@ struct db2 {
}; };
path fn; path fn;
int codepage{1251};
template <typename T> template <typename T>
struct file { struct file {
@ -223,9 +222,9 @@ struct db2 {
} }
} }
} }
void save(const path &fn, int codepage = 1251) { void save(const path &fn, int codepage) {
auto s_to_char20 = [&](char20 &dst, const std::string &in, int codepage = 1251) { auto s_to_char20 = [&](char20 &dst, const std::string &in, int codepage) {
auto s = utf8_to_dbstr(in); auto s = utf8_to_dbstr(in, codepage);
if (s.size() + 1 > sizeof(char20)) { if (s.size() + 1 > sizeof(char20)) {
throw std::runtime_error{"too long string"}; throw std::runtime_error{"too long string"};
} }
@ -241,7 +240,7 @@ struct db2 {
for (auto &&[tn,td] : m) { for (auto &&[tn,td] : m) {
tab::table &t = tabv; tab::table &t = tabv;
t.id = table_id; t.id = table_id;
s_to_char20(t.name, tn, 1251); // always 1251 s_to_char20(t.name, tn, 1251); // always 1251, because latin only letters
for (auto &&[_,fd] : td) { for (auto &&[_,fd] : td) {
for (auto &&[fn,fv] : fd) { for (auto &&[fn,fv] : fd) {
@ -255,7 +254,7 @@ struct db2 {
f.table_id = table_id; f.table_id = table_id;
f.type = ft; f.type = ft;
ft = (field_type)total_fields; ft = (field_type)total_fields;
s_to_char20(f.name, fn, 1251); // always 1251 if we have any field in Russian s_to_char20(f.name, fn, 1251); // always 1251, because latin only letters
} }
++table_id; ++table_id;
@ -282,7 +281,7 @@ struct db2 {
auto sz = visit(fv, auto sz = visit(fv,
[&](const int &v) { return datv = v; }, [&](const int &v) { return datv = v; },
[&](const float &v) { return datv = v; }, [&](const float &v) { return datv = v; },
[&](const std::string &v) { return datv = utf8_to_dbstr(v); }); [&](const std::string &v) { return datv = utf8_to_dbstr(v, codepage); });
auto &v = datv.at<dat::field_value_base>(-(sizeof(dat::field_value_base) + sz)); auto &v = datv.at<dat::field_value_base>(-(sizeof(dat::field_value_base) + sz));
v.field_id = (int)fields[tn].find(fn)->second; v.field_id = (int)fields[tn].find(fn)->second;
v.size = sz; v.size = sz;
@ -300,9 +299,9 @@ struct db2 {
// converts string to utf8 // converts string to utf8
// filters out values with empty name "" // filters out values with empty name ""
auto to_map() const { auto to_map(int cp) const {
auto prepare_string = [](auto &&in) { auto prepare_string = [&](auto &&in) {
auto s = str2utf8(in); auto s = str2utf8(in, cp);
// we have some erroneous table values (records) with spaces // we have some erroneous table values (records) with spaces
// we can trim only field values, but don't do it at the moment // we can trim only field values, but don't do it at the moment
//boost::trim(s); //boost::trim(s);

View file

@ -41,15 +41,6 @@
#include <math.h> #include <math.h>
// MultiByteToWideChar: https://msdn.microsoft.com/en-us/library/windows/desktop/dd319072(v=vs.85).aspx
// code pages: https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756(v=vs.85).aspx
const std::map<std::string, int> code_pages
{
{ "en", 0 },
{ "ru", 1251 },
{ "et", 1257 },
};
static int get_cp(const std::string &cp) static int get_cp(const std::string &cp)
{ {
auto i = code_pages.find(cp); auto i = code_pages.find(cp);

View file

@ -32,6 +32,7 @@
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
cl::opt<path> db_fn(cl::Positional, cl::desc("<db file or json file to backwards conversion>"), cl::Required); cl::opt<path> db_fn(cl::Positional, cl::desc("<db file or json file to backwards conversion>"), cl::Required);
cl::opt<int> codepage(cl::Positional, cl::desc("<codepage>"), cl::Required);
cl::ParseCommandLineOptions(argc, argv); cl::ParseCommandLineOptions(argc, argv);
@ -40,12 +41,12 @@ int main(int argc, char *argv[])
if (fn.extension() != ".json") { if (fn.extension() != ".json") {
db2 db{fn}; db2 db{fn};
auto f = db.open(); auto f = db.open();
auto m = f.to_map(); auto m = f.to_map(codepage);
write_file(path{fn} += ".json", m.to_json().dump(1)); write_file(path{fn} += ".json", m.to_json().dump(1));
} else { } else {
db2::files::db2_internal db; db2::files::db2_internal db;
db.load_from_json(fn); db.load_from_json(fn);
db.save(fn.parent_path() / fn.stem()); db.save(fn.parent_path() / fn.stem(), codepage);
} }
return 0; return 0;