Antlr gymnastics for the game

I am trying to pre-process some dialogue files from an old game - a vampire masquerade: Bloodlines, if you are interested - insert some code in a specific place in some data files.

I want to use Antlr to convert dialog files, but my grammar is ambiguous, although the format is very simple.

The format allows you to use the NPC and PC dialog as a series of lines:

 { TEXT } repeated (it varies, normally 13 but sometimes less)

One of these tokens in particular (the 5th, but the 1st in the example) is important because it determines whether the string belongs to an NPC or a PC. I have a '#' char. However, other tokens may have the same character, and I get warnings about some valid files that I would like to eliminate.

My big problem with ATM is the ambiguity of the grammar. To solve the problem of the number of variable tokens, I decided to use "*" to combine the ones that I do not need, to a new line.

So, I did this:

any* NL* 

Expecting this to match the rest of the tokens before any set of newlines. However, Antlr says the grammar is ambiguous, while:

any NL* or any* NL is not.

EDIT: old grammar removed, check new and new problem.

EDIT: - , , , : "(208): VampireDialog.g: 99: 1: , : NOT_SHARP" NL, , NL Lexer...

- , : npc, #

{1} {? }{ ? } {#} {} {G.Cabbie_Line = 1} {} {} {} {} {} {} {}

pc, #

{2} { . } { . } {0} {} {npc.WorldMap(G.WorldMap_State)} {} {} {} {} {} {} { . }

:

grammar VampireDialog;

options
{
output=AST;
ASTLabelType=CommonTree;
language=Java;
} 
tokens
{
REWRITE;
}

@parser::header {
import java.util.LinkedList;
import java.io.File;
}

@members {
    public static void main(String[] args) throws Exception {
        File vampireDir = new File(System.getProperty("user.home"), "Desktop/Vampire the Masquerade - Bloodlines/Vampire the Masquerade - Bloodlines/Vampire/dlg");
        List<File> files = new LinkedList<File>();
        getFiles(256, new File[]{vampireDir}, files, new LinkedList<File>());
        for (File f : files) {
            if (f.getName().endsWith(".dlg")) {
                VampireDialogLexer lex = new VampireDialogLexer(new ANTLRFileStream(f.getAbsolutePath(), "Windows-1252"));
                TokenRewriteStream tokens = new TokenRewriteStream(lex);
                VampireDialogParser parser = new VampireDialogParser(tokens);
                Tree t = (Tree) parser.dialog().getTree();
                //  System.out.println(t.toStringTree());
            }
        }
    }

    public static void getFiles(int levels, File[] search, List<File> files, List<File> directories) {
        for (File f : search) {
            if (!f.exists()) {
                throw new AssertionError("Search file array has non-existing files");
            }
        }
        getFilesAux(levels, search, files, directories);
    }

    private static void getFilesAux(int levels, File[] startFiles, List<File> files, List<File> directories) {
        List<File[]> subFilesList = new ArrayList<File[]>(50);
        for (File f : startFiles) {
            File[] subFiles = f.listFiles();
            if (subFiles == null) {
                files.add(f);
            } else {
                directories.add(f);
                subFilesList.add(subFiles);
            }
        }

        if (levels > 0) {
            for (File[] subFiles : subFilesList) {
                getFilesAux(levels - 1, subFiles, files, directories);
            }
        }
    }
}




/*------------------------------------------------------------------
 * PARSER RULES
 *------------------------------------------------------------------*/
dialog : (ANY ANY ANY  (npc_line | player_line) ANY* NL*)*;
npc_line :  npc_marker npc_conditional;
player_line : pc_marker conditional;
npc_conditional : '{' condiction '}'
            {   String cond = $condiction.tree.toStringTree(), partial = "npc.Reset()", full = "("+cond+") and npc.Reset()";
                boolean empty = cond.trim().isEmpty(); 
                boolean alreadyProcessed = cond.endsWith("npc.Reset()");}   
                ->   {empty}? '{' REWRITE[partial] '}'
                ->   {alreadyProcessed}? '{' REWRITE[cond] '}'
                ->   '{' REWRITE[full] '}';
conditional : '{' condiction '}'
            {   String cond = $condiction.tree.toStringTree(), full = "("+cond+") and npc.Count()";
                boolean empty = cond.trim().isEmpty(); 
                boolean alreadyProcessed = cond.endsWith("npc.Count()");}   
                ->   {empty}? '{' REWRITE[cond] '}'
                ->   {alreadyProcessed}? '{' REWRITE[cond] '}'
                ->   '{' REWRITE[full] '}';
condiction : TEXT*;
//in the parser ~('#') means: "match any token except the token that matches '#'" 
//and in lexer rules ~('#') means: "match any character except '#'"
pc_marker : '{' NOT_SHARP* '}';
npc_marker : '{' NOT_SHARP* '#' NOT_SHARP* '}';


/*------------------------------------------------------------------
 * LEXER RULES
 *------------------------------------------------------------------*/
ANY : '{' TEXT* '}';
TEXT : ~(NL|'}');
NOT_SHARP : ~(NL|'#'|'}');
NL : ( '\r' | '\n'| '\u000C');
+3
1

. , . (some_parser_or_lexer_rules_here)=> parser_or_lexer_rules. :

line
  :  (A B)=> A B
  |          A C
  ;

, line, : , , A B. , , , A C .

, , #, , npc, , a pc.

:

grammar VampireDialog;

parse
  :  LineBreak* line (LineBreak+ line)* LineBreak* EOF
  ;

line
  :  (any_except_line_breaks_and_hash+ Hash)=> conditionals {System.out.println("> npc :: " + $conditionals.text);}
  |                                            conditionals {System.out.println("> pc  :: " + $conditionals.text);}
  ;

conditionals  
  :  Space* conditional (Space* conditional)* Space*
  ;

conditional
  :  Open conditional_text Close
  ;

conditional_text
  :  (Hash | Space | Other)*
  ;

any_except_line_breaks_and_hash
  :  (Space | Open | Close | Other)
  ;

LineBreak
  :  '\r'? '\n'
  |  '\r'
  ;

Space
  :  ' ' | '\t'
  ;

Hash  : '#';
Open  : '{';
Close : '}';

// Fall through rule: if the lexer does not match anything 
// above this rule, this `Any` rule will match.
Other
  :  .
  ;

:

import org.antlr.runtime.*;

public class Main {
    public static void main(String[] args) throws Exception {
        String source = 
                "{ 1 }{ Where to? }{ Where to? }{ # }{ }{ G.Cabbie_Line = 1 }{ }{ }{ }{ }{ }{ }{ }\n" + 
                "\n" +
                "{ 2 }{ Just drive. }{ Just drive. }{ 0 }{ }{ npc.WorldMap( G.WorldMap_State ) }{ }{ }{ }{ }{ }{ }{ Not here. }\n";
        ANTLRStringStream in = new ANTLRStringStream(source);
        VampireDialogLexer lexer = new VampireDialogLexer(in);
        CommonTokenStream tokens = new CommonTokenStream(lexer);
        VampireDialogParser parser = new VampireDialogParser(tokens);
        parser.parse();
    }
}

:

> npc :: { 1 }{ Where to? }{ Where to? }{ # }{ }{ G.Cabbie_Line = 1 }{ }{ }{ }{ }{ }{ }{ }
> pc  :: { 2 }{ Just drive. }{ Just drive. }{ 0 }{ }{ npc.WorldMap( G.WorldMap_State ) }{ }{ }{ }{ }{ }{ }{ Not here. }

, .

( , ANTLRWorks, !)

+3

Source: https://habr.com/ru/post/1793257/


All Articles