mirror of
https://github.com/aimrebirth/tools.git
synced 2026-04-15 01:43:25 +00:00
[db2] Update codepage handling. Track duplicates.
This commit is contained in:
parent
0cd7f72e25
commit
c6df217c6b
2 changed files with 42 additions and 11 deletions
|
|
@ -28,11 +28,11 @@
|
||||||
std::string utf8_to_dbstr(const char8_t *s, int codepage = 1251) {
|
std::string utf8_to_dbstr(const char8_t *s, int codepage = 1251) {
|
||||||
return str2str((const char *)s, CP_UTF8, codepage);
|
return str2str((const char *)s, CP_UTF8, codepage);
|
||||||
}
|
}
|
||||||
std::string utf8_to_dbstr(const char *s) {
|
std::string utf8_to_dbstr(const char *s, int codepage = 1251) {
|
||||||
return utf8_to_dbstr((const char8_t *)s);
|
return utf8_to_dbstr((const char8_t *)s, codepage);
|
||||||
}
|
}
|
||||||
std::string utf8_to_dbstr(const std::string &s) {
|
std::string utf8_to_dbstr(const std::string &s, int codepage = 1251) {
|
||||||
return utf8_to_dbstr((const char8_t *)s.c_str());
|
return utf8_to_dbstr((const char8_t *)s.c_str(), codepage);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct mem_stream {
|
struct mem_stream {
|
||||||
|
|
@ -179,13 +179,25 @@ struct db2 {
|
||||||
using db2_memory_value = std::variant<std::string, int, float>;
|
using db2_memory_value = std::variant<std::string, int, float>;
|
||||||
using db2_memory = std::map<std::string, std::map<std::string, std::map<std::string, db2_memory_value>>>;
|
using db2_memory = std::map<std::string, std::map<std::string, std::map<std::string, db2_memory_value>>>;
|
||||||
|
|
||||||
|
path fn;
|
||||||
|
int codepage{1251};
|
||||||
db2_memory m;
|
db2_memory m;
|
||||||
|
bool written{};
|
||||||
|
|
||||||
|
~db2_internal() {
|
||||||
|
if (!written) {
|
||||||
|
write();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto begin(this auto &&d) {return d.m.begin();}
|
auto begin(this auto &&d) {return d.m.begin();}
|
||||||
auto end(this auto &&d) {return d.m.end();}
|
auto end(this auto &&d) {return d.m.end();}
|
||||||
auto &operator[](this auto &&d, const std::string &s) {
|
auto &operator[](this auto &&d, const std::string &s) {
|
||||||
return d.m[s];
|
return d.m[s];
|
||||||
}
|
}
|
||||||
|
auto &operator[](this auto &&d, const std::u8string &s) {
|
||||||
|
return d.m[(const char *)s.c_str()];
|
||||||
|
}
|
||||||
auto to_json() const {
|
auto to_json() const {
|
||||||
nlohmann::json ja;
|
nlohmann::json ja;
|
||||||
for (auto &&[tn,t] : m) {
|
for (auto &&[tn,t] : m) {
|
||||||
|
|
@ -204,7 +216,7 @@ struct db2 {
|
||||||
return ja;
|
return ja;
|
||||||
}
|
}
|
||||||
void save(const path &fn) {
|
void save(const path &fn) {
|
||||||
auto s_to_char20 = [&](char20 &dst, const std::string &in) {
|
auto s_to_char20 = [&](char20 &dst, const std::string &in, int codepage = 1251) {
|
||||||
auto s = utf8_to_dbstr(in);
|
auto s = utf8_to_dbstr(in);
|
||||||
if (s.size() + 1 > sizeof(char20)) {
|
if (s.size() + 1 > sizeof(char20)) {
|
||||||
throw std::runtime_error{"too long string"};
|
throw std::runtime_error{"too long string"};
|
||||||
|
|
@ -221,7 +233,7 @@ struct db2 {
|
||||||
for (auto &&[tn,td] : m) {
|
for (auto &&[tn,td] : m) {
|
||||||
tab::table &t = tabv;
|
tab::table &t = tabv;
|
||||||
t.id = table_id;
|
t.id = table_id;
|
||||||
s_to_char20(t.name, tn); // always 1251
|
s_to_char20(t.name, tn, 1251); // always 1251
|
||||||
|
|
||||||
for (auto &&[_,fd] : td) {
|
for (auto &&[_,fd] : td) {
|
||||||
for (auto &&[fn,fv] : fd) {
|
for (auto &&[fn,fv] : fd) {
|
||||||
|
|
@ -235,7 +247,7 @@ struct db2 {
|
||||||
f.table_id = table_id;
|
f.table_id = table_id;
|
||||||
f.type = ft;
|
f.type = ft;
|
||||||
ft = (field_type)total_fields;
|
ft = (field_type)total_fields;
|
||||||
s_to_char20(f.name, fn);
|
s_to_char20(f.name, fn, 1251); // always 1251 if we have any field in Russian
|
||||||
}
|
}
|
||||||
|
|
||||||
++table_id;
|
++table_id;
|
||||||
|
|
@ -255,7 +267,7 @@ struct db2 {
|
||||||
for (auto &&[vn, fd] : td) {
|
for (auto &&[vn, fd] : td) {
|
||||||
ind::value &i = indv;
|
ind::value &i = indv;
|
||||||
i.table_id = table_id;
|
i.table_id = table_id;
|
||||||
s_to_char20(i.name, vn);
|
s_to_char20(i.name, vn, codepage);
|
||||||
i.offset = datv.size();
|
i.offset = datv.size();
|
||||||
for (auto &&[fn, fv] : fd) {
|
for (auto &&[fn, fv] : fd) {
|
||||||
dat::field_value_base &_ = datv;
|
dat::field_value_base &_ = datv;
|
||||||
|
|
@ -276,9 +288,14 @@ struct db2 {
|
||||||
write_file(path{fn} += ".ind", indv.d);
|
write_file(path{fn} += ".ind", indv.d);
|
||||||
write_file(path{fn} += ".dat", datv.d);
|
write_file(path{fn} += ".dat", datv.d);
|
||||||
}
|
}
|
||||||
|
void write() {
|
||||||
|
save(fn);
|
||||||
|
written = true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// converts string to utf8, trims them
|
// converts string to utf8, trims them
|
||||||
|
// filters out values with empty name ""
|
||||||
auto to_map() const {
|
auto to_map() const {
|
||||||
auto prepare_string = [](auto &&in) {
|
auto prepare_string = [](auto &&in) {
|
||||||
auto s = str2utf8(in);
|
auto s = str2utf8(in);
|
||||||
|
|
@ -287,16 +304,18 @@ struct db2 {
|
||||||
};
|
};
|
||||||
|
|
||||||
db2_internal m;
|
db2_internal m;
|
||||||
|
m.fn = db.fn;
|
||||||
|
m.codepage = db.codepage;
|
||||||
auto tbl = tab_.data->tables();
|
auto tbl = tab_.data->tables();
|
||||||
for (auto &&t : tbl) {
|
for (auto &&t : tbl) {
|
||||||
auto &jt = m[prepare_string(t.name)];
|
auto &jt = m[prepare_string(t.name)];
|
||||||
auto fields = tab_.data->fields(t.id);
|
auto fields = tab_.data->fields(t.id);
|
||||||
for (auto &&v : ind_.data->values(t.id)) {
|
for (auto &&v : ind_.data->values(t.id)) {
|
||||||
auto vn = prepare_string(v.name);
|
auto vn = prepare_string(v.name);
|
||||||
if (jt.contains(vn)) {
|
if (vn.empty()) {
|
||||||
throw std::logic_error{"duplicate"};
|
continue;
|
||||||
}
|
}
|
||||||
auto &jv = jt[vn];
|
std::decay_t<decltype(jt)>::mapped_type jv;
|
||||||
auto p = dat_.f.p + v.offset;
|
auto p = dat_.f.p + v.offset;
|
||||||
auto max = p + v.size;
|
auto max = p + v.size;
|
||||||
while (p < max) {
|
while (p < max) {
|
||||||
|
|
@ -307,6 +326,10 @@ struct db2 {
|
||||||
throw std::logic_error{"unknown field"};
|
throw std::logic_error{"unknown field"};
|
||||||
}
|
}
|
||||||
auto fn = prepare_string(f->name);
|
auto fn = prepare_string(f->name);
|
||||||
|
if (jv.contains(fn)) {
|
||||||
|
// we analyze such cases manually
|
||||||
|
throw std::logic_error{"duplicate field: "s + fn};
|
||||||
|
}
|
||||||
switch (f->type) {
|
switch (f->type) {
|
||||||
case db2::field_type::integer:
|
case db2::field_type::integer:
|
||||||
jv[fn] = *(int *)p;
|
jv[fn] = *(int *)p;
|
||||||
|
|
@ -322,6 +345,13 @@ struct db2 {
|
||||||
}
|
}
|
||||||
p += vb->size;
|
p += vb->size;
|
||||||
}
|
}
|
||||||
|
if (jt.contains(vn)) {
|
||||||
|
if (!jv.contains("DELETED") || std::get<int>(jv["DELETED"]) == 0) {
|
||||||
|
// we analyze such cases manually
|
||||||
|
throw std::logic_error{"duplicate value: "s + vn};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
jt[vn] = jv;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return m;
|
return m;
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,7 @@ int main(int argc, char *argv[])
|
||||||
auto m = f.to_map();
|
auto m = f.to_map();
|
||||||
write_file(path{db_fn} += ".json", m.to_json().dump(1));
|
write_file(path{db_fn} += ".json", m.to_json().dump(1));
|
||||||
m.save(path{db_fn} += "new");
|
m.save(path{db_fn} += "new");
|
||||||
|
m.written = true;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue