Add flex+bison parser of aim1 scripts.

This commit is contained in:
lzwdgc 2015-12-17 22:46:01 +03:00
parent 165d48d8a5
commit 26588ebbfd
9 changed files with 638 additions and 78 deletions

View file

@ -14,6 +14,8 @@ set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "CMake Targets")
project(aim_tools)
find_package(BISON 3.0 REQUIRED)
find_package(FLEX 2.5.35 REQUIRED)
find_package(Git)
if (MSVC)
@ -28,7 +30,7 @@ add_definitions(-DPOLYGON4_TOOLS=1)
add_definitions(-DPOLYGON4_STATIC=1)
if (NOT DATA_MANAGER_DIR)
message(STATUS "DATA_MANAGER_DIR variable is not set! Some targets won't be built")
message(FATAL_ERROR "DATA_MANAGER_DIR variable is not set!")
else()
set(SOLUTION_FOLDER Extern)
add_subdirectory(${DATA_MANAGER_DIR} DataManager)

View file

@ -9,12 +9,6 @@ add_executable(db_extractor ${db_extractor_src})
target_link_libraries(db_extractor common)
endif()
file(GLOB script2txt_src "script2txt/*")
add_executable(script2txt ${script2txt_src})
target_link_libraries(script2txt common)
if (DATA_MANAGER_DIR)
file(GLOB mmm_extractor_src "mmm_extractor/*")
add_executable(mmm_extractor ${mmm_extractor_src})
target_link_libraries(mmm_extractor DataManager common)
@ -22,8 +16,6 @@ target_link_libraries(mmm_extractor DataManager common)
file(GLOB mmo_extractor_src "mmo_extractor/*")
add_executable(mmo_extractor ${mmo_extractor_src})
target_link_libraries(mmo_extractor DataManager common)
endif()
file(GLOB mmp_extractor_src "mmp_extractor/*")
add_executable(mmp_extractor ${mmp_extractor_src})
@ -45,3 +37,4 @@ file(GLOB name_generator_src "name_generator/*")
add_executable(name_generator ${name_generator_src})
add_subdirectory(common)
add_subdirectory(script2txt)

View file

@ -0,0 +1,48 @@
#
# Polygon-4 Data generator
#
set(BSRC ${CMAKE_CURRENT_BINARY_DIR})
set(_CPP ".*\\.cpp")
set(CPP "${_CPP}$")
set(_H "(.*\\.h|.*\\.hpp)")
set(H "${_H}$")
set(H_CPP "(${H}|${CPP})")
source_group("Generated" "${BSRC}/${H_CPP}")
BISON_TARGET(grammar
"${CMAKE_CURRENT_SOURCE_DIR}/grammar.yy"
"${CMAKE_CURRENT_BINARY_DIR}/grammar.cpp"
)
FLEX_TARGET(lexer
"${CMAKE_CURRENT_SOURCE_DIR}/lexer.ll"
"${CMAKE_CURRENT_BINARY_DIR}/lexer.cpp"
COMPILE_FLAGS --header-file="${CMAKE_CURRENT_BINARY_DIR}/lexer.h"
)
ADD_FLEX_BISON_DEPENDENCY(lexer grammar)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
set(parser_src
${BISON_grammar_OUTPUTS}
${FLEX_lexer_OUTPUTS}
${CMAKE_CURRENT_BINARY_DIR}/lexer.h
)
set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/lexer.h PROPERTIES GENERATED TRUE)
file(GLOB script2txt_src "*.cpp" "*.h")
set(script2txt_src ${script2txt_src}
grammar.yy
lexer.ll
${parser_src}
${SCHEMA_FILE}
)
add_executable (script2txt ${script2txt_src} ${script2txt_inc})
target_link_libraries (script2txt common schema)

View file

@ -0,0 +1,62 @@
/*
* Polygon-4 script2txt
* Copyright (C) 2015-2016 lzwdgc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ParserDriver.h"
#include <algorithm>
#include <fstream>
// Prevent using <unistd.h> because of bug in flex.
#define YY_NO_UNISTD_H 1
#define YY_DECL 1
#include <lexer.h>
extern yy::parser::symbol_type yylex(yyscan_t yyscanner, yy::location &loc);
ParserDriver::ParserDriver()
{
debug = false;
}
yy::parser::symbol_type ParserDriver::lex()
{
return yylex(scanner, location);
}
int ParserDriver::parse(const std::string &s)
{
yylex_init(&scanner);
yy_scan_string(s.c_str(), scanner);
yy::parser parser(*this);
parser.set_debug_level(debug);
int res = parser.parse();
yylex_destroy(scanner);
return res;
}
void ParserDriver::error(const yy::location &l, const std::string &m)
{
std::cerr << l << " " << m << std::endl;
}
void ParserDriver::error(const std::string& m)
{
std::cerr << m << std::endl;
}

View file

@ -0,0 +1,54 @@
/*
* Polygon-4 script2txt
* Copyright (C) 2015-2016 lzwdgc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <set>
#include <string>
#include <vector>
#include <grammar.hpp>
class ParserDriver
{
public:
ParserDriver();
yy::parser::symbol_type lex();
int parse(const std::string &s);
void error(const yy::location &l, const std::string &m);
void error(const std::string &m);
void setContext(Context &&ctx) { context = std::move(ctx); }
const Context &getContext() const { return context; }
// lex & parse
private:
void *scanner;
yy::location location;
bool debug;
// data
private:
Context context;
// other
public:
std::set<std::string> functions;
};

316
src/script2txt/grammar.yy Normal file
View file

@ -0,0 +1,316 @@
%{
#include <assert.h>
#include <iostream>
#include <string>
#include "ParserDriver.h"
#define yylex(p) p.lex()
%}
////////////////////////////////////////
// general settings
%require "3.0"
%debug
%start file
%locations
%verbose
%no-lines
%error-verbose
////////////////////////////////////////
// c++ skeleton and options
%skeleton "lalr1.cc"
%define api.value.type variant
%define api.token.constructor // C++ style of handling variants
%define parse.assert // check C++ variant types
%code requires // forward decl of C++ driver (our parser) in HPP
{
#include <Polygon4/DataManager/Schema/Context.h>
class ParserDriver;
}
// param to yy::parser() constructor
// the parsing context
%param { ParserDriver &driver }
////////////////////////////////////////
// tokens and types
%token EOQ 0 "end of file"
%token ERROR_SYMBOL
%token L_BRACKET R_BRACKET COMMA QUOTE SEMICOLON COLON POINT
L_CURLY_BRACKET R_CURLY_BRACKET SHARP R_ARROW EQUAL
L_SQUARE_BRACKET R_SQUARE_BRACKET ASTERISK
%token IF ELSE NOT AND OR
%token END PROC _PROC
%token <std::string> STRING
%token <int> INTEGER
%type <std::string> string integer number
object_variable object
variables variable
function_name procedure_begin
parameters parameter
conds cond condition_body
function_call
%type <Context> condition condition_begin
statements statement
proc_statements proc_statement
procedure
global_statements global_statement
script
////////////////////////////////////////
%%
file: script EOQ
{ driver.setContext(std::move($1)); }
;
script: global_statements
{ $$ = std::move($1); }
;
global_statements: global_statement
{ $$ = std::move($1); }
| global_statements global_statement
{
auto &ctx = $1;
ctx.addLine();
ctx.addWithRelativeIndent($2);
$$ = std::move(ctx);
}
;
global_statement: function_call
{
Context ctx;
ctx.addLine($1);
$$ = std::move(ctx);
}
| condition
{ $$ = std::move($1); }
| procedure
{ $$ = std::move($1); }
| R_CURLY_BRACKET
{ $$ = Context(); }
| END
{ $$ = Context(); }
| ERROR_SYMBOL
{ $$ = Context(); }
| POINT
{ $$ = Context(); }
| STRING
{ $$ = Context(); }
| R_BRACKET
{ $$ = Context(); }
;
procedure: procedure_begin proc_statements END
{
Context ctx;
ctx.beginBlock($1);
ctx.addWithRelativeIndent($2);
ctx.endBlock();
$$ = std::move(ctx);
}
| procedure_begin END
{
Context ctx;
ctx.beginBlock($1);
ctx.endBlock();
$$ = std::move(ctx);
}
| procedure_begin L_CURLY_BRACKET statements R_CURLY_BRACKET
{
Context ctx;
ctx.beginBlock($1);
ctx.addWithRelativeIndent($3);
ctx.endBlock();
$$ = std::move(ctx);
}
;
procedure_begin: PROC function_name L_BRACKET R_BRACKET
{ $$ = "PROC " + $2 + "()"; }
| PROC function_name
{ $$ = "PROC " + $2 + "()"; }
;
proc_statements: proc_statement
{ $$ = std::move($1); }
| proc_statements proc_statement
{
auto &ctx = $1;
ctx.addWithRelativeIndent($2);
$$ = std::move(ctx);
}
;
proc_statement: function_call
{
Context ctx;
ctx.addLine($1);
$$ = std::move(ctx);
}
| _PROC function_call
{
Context ctx;
ctx.addLine("_PROC " + $2);
$$ = std::move(ctx);
}
| condition
{ $$ = std::move($1); }
| COLON
{ $$ = Context(); }
| R_BRACKET
{ $$ = Context(); }
| ERROR_SYMBOL
{ $$ = Context(); }
;
statements: statement
{ $$ = std::move($1); }
| statements statement
{
auto &ctx = $1;
ctx.addWithRelativeIndent($2);
$$ = std::move(ctx);
}
;
statement: proc_statement
{ $$ = std::move($1); }
| END
{
Context ctx;
ctx.addLine("END");
$$ = std::move(ctx);
}
;
function_call: function_name L_BRACKET parameters R_BRACKET
{ $$ = $1 + "(" + $3 + ")"; driver.functions.insert($1); }
| function_name L_BRACKET parameters COMMA R_BRACKET
{ $$ = $1 + "(" + $3 + ")"; driver.functions.insert($1); }
| function_name L_BRACKET R_BRACKET
{ $$ = $1 + "()"; driver.functions.insert($1); }
;
parameters: parameter
{ $$ = $1; }
| parameters COMMA parameter
{ $$ = $1 + ", " + $3; }
;
parameter: object
{ $$ = $1; }
| number
{ $$ = $1; }
| object_variable
{ $$ = $1; }
| ASTERISK
{ $$ = "*"; }
;
condition: condition_begin
{ $$ = std::move($1); }
| condition_begin ELSE L_CURLY_BRACKET statements R_CURLY_BRACKET
{
auto &ctx = $1;
ctx.beginBlock("else");
ctx.addWithRelativeIndent($4);
ctx.endBlock();
$$ = std::move(ctx);
}
;
condition_begin: IF L_BRACKET condition_body R_BRACKET L_CURLY_BRACKET statements R_CURLY_BRACKET
{
Context ctx;
ctx.beginBlock("if (" + $3 + ")");
ctx.addWithRelativeIndent($6);
ctx.endBlock();
$$ = std::move(ctx);
}
| IF L_BRACKET condition_body L_CURLY_BRACKET statements R_CURLY_BRACKET
{
Context ctx;
ctx.beginBlock("if (" + $3 + ")");
ctx.addWithRelativeIndent($5);
ctx.endBlock();
$$ = std::move(ctx);
}
| IF L_BRACKET condition_body R_BRACKET L_CURLY_BRACKET R_CURLY_BRACKET
{
Context ctx;
ctx.beginBlock("if (" + $3 + ")");
ctx.endBlock();
$$ = std::move(ctx);
}
;
condition_body: conds
{ $$ = $1; }
;
conds: cond
{ $$ = $1; }
| conds AND cond
{ $$ = $1 + " && " + $3; }
| conds OR cond
{ $$ = $1 + " || " + $3; }
;
cond: object
{ $$ = $1; }
| object_variable
{ $$ = $1; }
| function_call
{ $$ = $1; }
| NOT cond
{ $$ = "!" + $2; }
;
object_variable: object POINT variables
{ $$ = $1 + "." + $3; }
;
variables: /* empty */
{ $$ = ""; }
| variable
{ $$ = $1; }
| variables POINT variable
{ $$ = $1 + "." + $3; }
;
function_name: string
{ $$ = $1; }
;
object: string
{ $$ = $1; }
;
variable: string
{ $$ = $1; }
| integer
{ $$ = $1; }
;
number: integer POINT integer
{ $$ = $1 + "." + $3; }
| integer
{ $$ = $1; }
;
string: STRING
{ $$ = $1; }
;
integer: INTEGER
{ $$ = std::to_string($1); }
;
%%
void yy::parser::error(const location_type& l, const std::string& m)
{
driver.error(l, m);
}

98
src/script2txt/lexer.ll Normal file
View file

@ -0,0 +1,98 @@
%{
#pragma warning(disable: 4005)
#include <string>
#include "grammar.hpp"
#define YY_USER_ACTION loc.columns(yyleng);
#define PUSH_STATE(x) BEGIN(x)
#define POP_STATE() BEGIN(0)
#define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, yy::location &loc)
#define MAKE(x) yy::parser::make_ ## x(loc)
#define MAKE_VALUE(x, v) yy::parser::make_ ## x((v), loc)
%}
%option nounistd
%option yylineno
%option nounput
%option batch
%option never-interactive
%option reentrant
%option noyywrap
DIGIT [0-9]
DIGITS {DIGIT}{DIGIT}*
INTEGER {DIGITS}[Ff]?
STRING [[:alpha:]_-][[:alnum:]_-]*
%x user_string
%%
%{
// Code run each time yylex is called.
loc.step();
%}
#.*/\n ; // ignore comments
[ \t]+ loc.step();
\r loc.step();
\n {
loc.lines(yyleng);
loc.step();
}
";" return MAKE(SEMICOLON);
":" return MAKE(COLON);
"(" return MAKE(L_BRACKET);
")" return MAKE(R_BRACKET);
"{" return MAKE(L_CURLY_BRACKET);
"}" return MAKE(R_CURLY_BRACKET);
"[" return MAKE(L_SQUARE_BRACKET);
"]" return MAKE(R_SQUARE_BRACKET);
"," return MAKE(COMMA);
"\." return MAKE(POINT);
"->" return MAKE(R_ARROW);
"=" return MAKE(EQUAL);
"\*" return MAKE(ASTERISK);
IF { return MAKE(IF); }
ELSE { return MAKE(ELSE); }
"!" { return MAKE(NOT); }
"&" { return MAKE(AND); }
"|" { return MAKE(OR); }
"||" { return MAKE(OR); }
END { return MAKE(END); }
PROC { return MAKE(PROC); }
_PROC { return MAKE(_PROC); }
{INTEGER} { return MAKE_VALUE(INTEGER, std::stoi(yytext)); }
{STRING} { return MAKE_VALUE(STRING, yytext); }
\" { PUSH_STATE(user_string); return MAKE(QUOTE); }
<user_string>\" { POP_STATE(); return MAKE(QUOTE); }
<user_string>(?:[^"\\]|\\.)*/\" {
int n = 0;
char *p = yytext;
while ((p = strstr(p, "\n"))++ != 0)
n++;
if (n)
{
loc.lines(n);
loc.step();
}
return MAKE_VALUE(STRING, yytext);
}
. { /*driver.error(loc, "invalid character");*/ return MAKE(ERROR_SYMBOL); }
<<EOF>> return MAKE(EOQ);
%%

View file

@ -43,6 +43,7 @@ struct script
READ(b, raw_text_size);
READ(b, unk1);
raw_text.resize(raw_text_size);
if (raw_text_size)
READ_N(b, raw_text[0], raw_text.size());
READ(b, array_len);
unk2.resize(array_len);
@ -56,7 +57,6 @@ struct script
}
fix_text();
beautify();
}
void fix_text()
@ -74,58 +74,6 @@ struct script
}
}
void beautify()
{
const std::string space = " ";
int brace_count = 0;
int proc_started = 0;
bool proc_started_now = false;
for (auto &line : lines)
{
if (proc_started_now && line.find("{") != line.npos)
{
proc_started--;
}
if (brace_count > 0 || proc_started > 0)
{
auto space_count = brace_count + proc_started;
if (line == "}" ||
(brace_count == 0 && proc_started == 1 &&
line.find("END") != line.npos))
space_count--;
std::string s;
for (int i = 0; i < space_count; i++)
s += space;
line = s + line;
}
if (line.find("PROC") != line.npos &&
line.find("()") != line.npos)
{
proc_started++;
proc_started_now = true;
continue;
}
if (proc_started_now)
proc_started_now = false;
if (line.find("END") != line.npos && proc_started == 1)
{
proc_started--;
}
for (auto &c : line)
{
if (c == '{')
brace_count++;
else if (c == '}')
brace_count--;
if (brace_count < 0)
{
c = '\n';
brace_count++;
}
}
}
}
std::string get_text() const
{
std::string s;
@ -134,16 +82,30 @@ struct script
if (line != "\n")
s += line + "\n";
}
replace_all(s, "IF(", "IF (");
replace_all(s, "\nIF", "\n\nIF");
replace_all(s, "PROC", "PROC ");
replace_all(s, "END\nPROC", "END\n\nPROC");
replace_all(s, "|", " || ");
replace_all(s, "&", " && ");
replace_all(s, "(", "( ");
replace_all(s, ")", " )");
replace_all(s, ",", ", ");
replace_all(s, "!", "! ");
replace_all(s, "ENFD", "END");
replace_all(s, "\nEN\n", "\n");
replace_all(s, "?", " ");
s += "\nEND\n";
// remove wrong braces
int braces = 0;
for (auto &c : s)
{
switch (c)
{
case '{':
braces++;
break;
case '}':
if (braces == 0)
c = ' ';
else
braces--;
break;
}
}
return s;
}
};

View file

@ -20,6 +20,7 @@
#include <iostream>
#include <stdint.h>
#include "ParserDriver.h"
#include "script.h"
using std::cout;
@ -42,11 +43,35 @@ try
s.load(b);
auto str = s.get_text();
// write
ParserDriver driver;
if (driver.parse(str))
{
throw std::runtime_error("error during parsing input file");
}
auto &ctx = driver.getContext();
// write script
{
filename += ".txt";
std::ofstream ofile(filename);
if (ofile)
ofile << str;
ofile << ctx.getText();
}
// write function calls
{
std::ofstream functions("functions.txt", std::ios::app);
if (functions)
{
for (auto &f : driver.functions)
{
std::string f2(f.size(), 0);
std::transform(f.begin(), f.end(), f2.begin(), tolower);
functions << f2 << "\n";
}
}
}
return 0;
}
catch (std::exception &e)