feat: WIP awk rewrite

2025-12-12 07:40:30 -08:00 · 2025-12-01 21:42:10 +01:00
parent 2c35c5b4ad
commit 0a945aa152
12 changed files with 1018 additions and 369 deletions
--- a/marzipan/src/grammar/_cryptoverif.lark
+++ b/marzipan/src/grammar/_cryptoverif.lark
@@ -0,0 +1,7 @@
+    # TODO: finish defining these (comes from Cryptoverif)
+    #param_decl: "param" _non_empty_seq{IDENT} options "."
+    #proba_decl: "proba" IDENT ["(...)"] options "."
+    #letproba_decl: "letproba" IDENT ["(...)"] "= ..." "."
+    #proof_decl: "proof" "{" proof "}"
+    #def_decl: "def" IDENT "(" _maybe_empty_seq{typeid} ")" "{" decl* "}"
+    #expand_decl: "expand" IDENT "(" _maybe_empty_seq{typeid} ")" "."
--- a/marzipan/src/grammar/_main.lark
+++ b/marzipan/src/grammar/_main.lark
@@ -0,0 +1,10 @@
+start: decl* PROCESS process
+%import .common.*
+%import .process.*
+%import .term.*
+%import .decl.*
+%import .query.*
+
+%import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library
+%ignore WS // Disregard spaces in text
+%ignore COMMENT
--- a/marzipan/src/grammar/common.lark
+++ b/marzipan/src/grammar/common.lark
@@ -0,0 +1,111 @@
+PROCESS: "process"
+EQUIVALENCE: "equivalence"
+YIELD: "yield"
+channel: CHANNEL
+CHANNEL: "channel"
+IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/
+ZERO: "0"
+INFIX: "||"
+        | "&&"
+        | "="
+        | "<>"
+        | "<="
+        | ">="
+        | "<"
+        | ">"
+typeid: channel
+        | IDENT
+_non_empty_seq{x}: x ("," x)*
+_maybe_empty_seq{x}: [ _non_empty_seq{x} ]
+
+OPTIONS_FUN_CONST: "data" | "private" | "typeConverter"
+OPTIONS_FUN: OPTIONS_FUN_CONST
+OPTIONS_CONST: OPTIONS_FUN_CONST
+OPTIONS_FREE_REDUC: "private"
+OPTIONS_PRED: "memberOptim" | "block"
+OPTIONS_PROCESS: "precise"
+OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif"
+OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM
+OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction"
+OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA | "maxSubset"
+OPTIONS_QUERY: OPTIONS_QUERY_LEMMA | "proveAll"
+OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/"
+OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml
+OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml
+OPTIONS_TYPE: "fixed" | "bounded" | "large" | "nonuniform" | "password" | "size" NAT | "size" NAT "_" NAT | "pcoll" NAT | "small" | "ghost" # from Page 22 in CryptoVerif reference manual
+options{idents}: [ "[" _non_empty_seq{idents} "]" ]
+BOOL : "true" | "false"
+NONE: "none"
+FULL: "full"
+ALL: "all"
+FUNC: IDENT
+ignoretype_options: BOOL | ALL | NONE | "attacker"
+boolean_settings_names: "privateCommOnPublicTerms"
+                        | "rejectChoiceTrueFalse"
+                        | "rejectNoSimplif"
+                        | "allowDiffPatterns"
+                        | "inductionQueries"
+                        | "inductionLemmas"
+                        | "movenew"
+                        | "movelet"
+                        | "stopTerm"
+                        | "removeEventsForLemma"
+                        | "simpEqAll"
+                        | "eqInNames"
+                        | "preciseLetExpand"
+                        | "expandSimplifyIfCst"
+                        | "featureFuns"
+                        | "featureNames"
+                        | "featurePredicates"
+                        | "featureEvents"
+                        | "featureTables"
+                        | "featureDepth"
+                        | "featureWidth"
+                        | "simplifyDerivation"
+                        | "abbreviateDerivation"
+                        | "explainDerivation"
+                        | "unifyDerivation"
+                        | "reconstructDerivation"
+                        | "displayDerivation"
+                        | "traceBacktracking"
+                        | "interactiveSwapping"
+                        | "color"
+                        | "verboseLemmas"
+                        | "abbreviateClauses"
+                        | "removeUselessClausesBeforeDisplay"
+                        | "verboseEq"
+                        | "verboseDestructors"
+                        | "verboseTerm"
+                        | "verboseStatistics"
+                        | "verboseRules"
+                        | "verboseBase"
+                        | "verboseRedundant"
+                        | "verboseCompleted"
+                        | "verboseGoalReachable"
+
+_decl_pair{name, value}: "set" name "=" value "."
+
+set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL}
+
+ignore_types_values: BOOL | "all" | "none" | "attacker"
+simplify_process_values: BOOL | "interactive"
+precise_actions_values: BOOL | "trueWithoutArgsInNames"
+redundant_hyp_elim_values: BOOL | "beginOnly"
+reconstruct_trace_values: BOOL | NAT | "yes" | "no" # TODO(blipp): check whether yes/no is always allowed for BOOL
+attacker_values: "active" | "passive"
+key_compromise_values: "none" | "approx" | "strict"
+predicates_implementable: "check" | "nocheck"
+application_values: "instantiate" | "full" | "none" | "discard"
+max_values: "none" | "n"
+sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset"
+redundancy_elim_values: "best" | "simple" | "no"
+nounif_ignore_a_few_times_values: "none" | "auto" | "all"
+nounif_ignore_ntimes_values: "n"
+trace_display_values: "short" | "long" | "none"
+verbose_clauses_values: "none" | "explained" | "short"
+INT: NAT | "-" NAT
+COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
+
+%import common (WORD, DIGIT, NUMBER, WS, ESCAPED_STRING) // imports from terminal library
+%ignore WS // Disregard spaces in text
+%ignore COMMENT
--- a/marzipan/src/grammar/decl.lark
+++ b/marzipan/src/grammar/decl.lark
@@ -0,0 +1,99 @@
+decl: type_decl
+    | channel_decl
+    | free_decl
+    | const_decl
+    | fun_decl
+    | letfun_decl
+    | reduc_decl
+    | fun_reduc_decl
+    | equation_decl
+    | pred_decl
+    | table_decl
+    | let_decl
+    | set_settings_decl
+    | event_decl
+    | query_decl
+    | axiom_decl
+    | restriction_decl
+    | lemma_decl
+    | noninterf_decl
+    | weaksecret_decl
+    | not_decl
+    | select_decl
+    | noselect_decl
+    | nounif_decl
+    | elimtrue_decl
+    | clauses_decl
+    | module_decl
+    #| param_decl
+    #| proba_decl
+    #| letproba_decl
+    #| proof_decl
+    #| def_decl
+    #| expand_decl
+
+type_decl: "type" IDENT options{OPTIONS_TYPE} "."
+channel_decl: "channel" _non_empty_seq{IDENT} "."
+free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
+const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
+fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
+letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
+reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "."
+fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "."
+equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "."
+pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "."
+table_decl: "table" IDENT "(" _maybe_empty_seq{typeid} ")" "."
+let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "."
+
+set_settings_decl: set_settings_boolean_decl
+            | _decl_pair{"ignoreTypes", ignore_types_values}
+            | _decl_pair{"simplifyProcess", simplify_process_values}
+            | _decl_pair{"preciseActions", precise_actions_values}
+            | _decl_pair{"redundantHypElim", redundant_hyp_elim_values}
+            | _decl_pair{"reconstructTrace", reconstruct_trace_values}
+            | _decl_pair{"attacker", attacker_values}
+            | _decl_pair{"keyCompromise", key_compromise_values}
+            | _decl_pair{"predicatesImplementable", predicates_implementable}
+            | _decl_pair{"saturationApplication", application_values}
+            | _decl_pair{"verificationApplication", application_values}
+            | _decl_pair{"maxDepth", max_values}
+            | _decl_pair{"maxHyp", max_values}
+            | _decl_pair{"selFun", sel_fun_values}
+            | _decl_pair{"redundancyElim", redundancy_elim_values}
+            | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values}
+            | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values}
+            | _decl_pair{"traceDisplay", trace_display_values}
+            | _decl_pair{"verboseClauses", verbose_clauses_values}
+            | set_strategy
+            | set_symb_order
+
+_swap_strategy_seq{x}: x ("->" x)*
+set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "."
+_symb_ord_seq{x}: x (">" x)*
+set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "."
+
+event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "."
+select_decl:    "select"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
+noselect_decl:  "noselect"  [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
+nounif_decl:    "nounif"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "."
+
+elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "."
+clauses_decl: "clauses" clauses "."
+
+module_decl: "@module" " " IDENT
+
+noninterf_decl: "noninterf" [ typedecl ";"] _maybe_empty_seq{nidecl} "."
+weaksecret_decl: "weaksecret" IDENT "."
+
+nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ]
+equality: term "=" term
+        | "let" IDENT "=" term "in" equality
+mayfailequality: IDENT mayfailterm_seq "=" mayfailterm
+eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ]
+clause: term
+        | term "->" term
+        | term "<->" term
+        | term "<=>" term
+clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ]
+mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ]
+NAT: DIGIT+
--- a/marzipan/src/grammar/lemma.lark
+++ b/marzipan/src/grammar/lemma.lark
@@ -0,0 +1,6 @@
+lemma: gterm [";" lemma]
+         | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma]
+         | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma]
+axiom_decl:       "axiom"       [ typedecl ";"] lemma options{OPTIONS_AXIOM} "."
+restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "."
+lemma_decl:       "lemma"       [ typedecl ";"] lemma options{OPTIONS_LEMMA} "."
--- a/marzipan/src/grammar/process.lark
+++ b/marzipan/src/grammar/process.lark
@@ -0,0 +1,45 @@
+start: decl* PROCESS process | decl* EQUIVALENCE process process
+process: ZERO
+        | YIELD
+        | IDENT [ "(" _maybe_empty_seq{pterm} ")" ]
+        | bracketed_process
+        | piped_process
+        | replicated_process
+        | replicated_process_bounds
+        | sample_process
+        | if_process
+        | in_process
+        | out_process
+        | let_process
+        | insert_process
+        | get_process
+        | event_process
+        | phase
+        | sync
+bracketed_process: "(" process ")"
+piped_process: process "|" process
+replicated_process: "!" process
+replicated_process_bounds: "!" IDENT "<=" IDENT process
+                            | "foreach" IDENT "<=" IDENT "do" process
+sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process]
+                | IDENT "<-R" typeid [";" process]
+let_process: "let" pattern "=" pterm ["in" process [ "else" process ]]
+            | IDENT [":" typeid] "<-" pterm [";" process]
+            | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
+if_process: "if" pterm "then" process [ "else" process ]
+in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ]
+get_process: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
+out_process: "out" "(" pterm "," pterm ")" [ ";" process ]
+insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ]
+event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ]
+term: IDENT
+        | NAT
+        | "(" _maybe_empty_seq{term} ")"
+        | IDENT "(" _maybe_empty_seq{term} ")"
+        | term ( "+" | "-" ) NAT
+        | NAT "+" term
+        | term INFIX term
+        | "not" "(" term ")"
+
+phase: "phase" NAT [";" process]
+sync: "sync" NAT ["[" TAG "]"] [";" process]
--- a/marzipan/src/grammar/query.lark
+++ b/marzipan/src/grammar/query.lark
@@ -0,0 +1,24 @@
+query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query]
+        | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query]
+        | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces.
+        | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query]
+nounifdecl: "let" IDENT "=" gformat "in" nounifdecl
+            | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]]
+gformat: IDENT
+        | "*" IDENT
+        | IDENT "(" _maybe_empty_seq{gformat} ")"
+        | "choice" "[" gformat "," gformat "]"
+        | "not" "(" _maybe_empty_seq{gformat} ")"
+        | "new" IDENT [ "[" [ fbinding ] "]" ]
+        | "let" IDENT "=" gformat "in" gformat
+fbinding: "!" NAT "=" gformat [";" fbinding]
+        | IDENT "=" gformat [";" fbinding]
+nounifoption: "hypothesis"
+            | "conclusion"
+            | "ignoreAFewTimes"
+            | "inductionOn" "=" IDENT
+            | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}"
+
+query_decl: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "."
+not_decl: "not" [ typedecl ";"] gterm "."
+TAG: IDENT
--- a/marzipan/src/grammar/term.lark
+++ b/marzipan/src/grammar/term.lark
@@ -0,0 +1,67 @@
+gterm: ident_gterm
+        | fun_gterm
+        | choice_gterm
+        | infix_gterm
+        | arith_gterm
+        | arith2_gterm
+        | event_gterm
+        | injevent_gterm
+        | implies_gterm
+        | paren_gterm
+        | sample_gterm
+        | let_gterm
+ident_gterm: IDENT
+fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT]
+choice_gterm: "choice" "[" gterm "," gterm "]"
+infix_gterm: gterm INFIX gterm
+arith_gterm: gterm ( "+" | "-" ) NAT
+arith2_gterm: NAT "+" gterm
+event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
+injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
+implies_gterm: gterm "==>" gterm
+paren_gterm: "(" _maybe_empty_seq{gterm} ")"
+sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ]
+let_gterm: "let" IDENT "=" gterm "in" gterm
+
+gbinding: "!" NAT "=" gterm [";" gbinding]
+        | IDENT "=" gterm [";" gbinding]
+
+pterm: IDENT
+        | NAT
+        | "(" _maybe_empty_seq{pterm} ")"
+        | IDENT "(" _maybe_empty_seq{pterm} ")"
+        | choice_pterm
+        | pterm ("+" | "-") NAT
+        | NAT "+" pterm
+        | pterm INFIX pterm
+        | not_pterm
+        | sample_pterm
+        | if_pterm
+        | let_pterm
+        | insert_pterm
+        | get_pterm
+        | event_pterm
+choice_pterm: "choice[" pterm "," pterm "]"
+if_pterm: "if" pterm "then" pterm [ "else" pterm ]
+not_pterm: "not" "(" pterm ")"
+let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ]
+            | IDENT [":" typeid] "<-" pterm ";" pterm
+            | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ]
+sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm]
+            | IDENT "<-R" typeid [";" pterm]
+insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm
+event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm
+get_pterm: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ]
+pattern: IDENT [":" typeid]
+        | "_" [ ":" typeid ]
+        | NAT
+        | pattern "+" NAT
+        | NAT "+" pattern
+        | "(" _maybe_empty_seq{pattern} ")"
+        | IDENT "(" _maybe_empty_seq{pattern} ")"
+        | "=" pterm
+mayfailterm: term
+            | "fail"
+mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")"
+typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ]
+failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ]
--- a/marzipan/src/grammars/marzipan_awk.lark
+++ b/marzipan/src/grammars/marzipan_awk.lark
@@ -0,0 +1,47 @@
+start: line*
+
+line: directive_line NEWLINE
+    | code_line NEWLINE
+    | NEWLINE
+
+directive_line: module_directive
+              | alias_directive
+              | long_alias_start
+              | long_alias_end
+              | query_directive
+              | reachable_directive
+              | lemma_directive
+
+code_line: /[^\n]*/       // Anything not recognized as a directive
+
+module_directive: MODULE NAME
+
+alias_directive: ALIAS alias_pair (alias_pair)*
+
+alias_pair: IDENT "=" TOKEN
+
+long_alias_start: LONG_ALIAS IDENT
+long_alias_end: LONG_ALIAS_END
+
+query_directive: QUERY quoted_string
+reachable_directive: REACHABLE quoted_string
+lemma_directive: LEMMA quoted_string
+
+
+quoted_string: ESCAPED_STRING
+
+NAME: IDENT
+TOKEN: /[^ \t\n]+/
+IDENT: /[A-Za-z_][A-Za-z0-9_']*/
+MODULE: "@module"
+ALIAS: "@alias"
+LONG_ALIAS: "@long-alias"
+LONG_ALIAS_END: "@long-alias-end"
+QUERY: "@query"
+REACHABLE: "@reachable"
+LEMMA: "@lemma"
+NEWLINE: /\r?\n/
+
+%import common.ESCAPED_STRING
+%import common.WS
+%ignore WS
--- a/marzipan/src/grammars/proverif.lark
+++ b/marzipan/src/grammars/proverif.lark
@@ -0,0 +1,361 @@
+PROCESS: "process"
+start: decl* PROCESS process
+YIELD: "yield"
+channel: CHANNEL
+CHANNEL: "channel"
+IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/
+ZERO: "0"
+INFIX: "||"
+        | "&&"
+        | "="
+        | "<>"
+        | "<="
+        | ">="
+        | "<"
+        | ">"
+typeid: channel
+        | IDENT
+_non_empty_seq{x}: x ("," x)*
+_maybe_empty_seq{x}: [ _non_empty_seq{x} ]
+
+OPTIONS_FUN_CONST: "data" | "private" | "typeConverter"
+    OPTIONS_FUN: OPTIONS_FUN_CONST
+    OPTIONS_CONST: OPTIONS_FUN_CONST
+    OPTIONS_FREE_REDUC: "private"
+    OPTIONS_PRED: "memberOptim" | "block"
+    OPTIONS_PROCESS: "precise"
+    OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif"
+    OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM
+    OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction"
+    OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "maxSubset"
+    OPTIONS_QUERY: OPTIONS_QUERY_LEMMA_AXIOM | "proveAll"
+    OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/"
+    OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml
+    OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml
+    OPTIONS_TYPE: "fixed" | "bounded" # TODO(blipp): complete this. These are only for compatibility with CryptoVerif and are ignored
+    options{idents}: [ "[" _non_empty_seq{idents} "]" ]
+    process: ZERO
+           | YIELD
+           | IDENT [ "(" _maybe_empty_seq{pterm} ")" ]
+           | bracketed_process
+           | piped_process
+           | replicated_process
+           | replicated_process_bounds
+           | sample_process
+           | if_process
+           | in_process
+           | out_process
+           | let_process
+           | insert_process
+           | get_process
+           | event_process
+           | phase
+           | sync
+    bracketed_process: "(" process ")"
+    piped_process: process "|" process
+    replicated_process: "!" process
+    replicated_process_bounds: "!" IDENT "<=" IDENT process
+                             | "foreach" IDENT "<=" IDENT "do" process
+    sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process]
+                  | IDENT "<-R" typeid [";" process]
+    let_process: "let" pattern "=" pterm ["in" process [ "else" process ]]
+               | IDENT [":" typeid] "<-" pterm [";" process]
+               | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
+    if_process: "if" pterm "then" process [ "else" process ]
+    in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ]
+    get_process: IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
+    out_process: "out" "(" pterm "," pterm ")" [ ";" process ]
+    insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ]
+    event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ]
+    term: IDENT
+          | NAT
+          | "(" _maybe_empty_seq{term} ")"
+          | IDENT "(" _maybe_empty_seq{term} ")"
+          | term ( "+" | "-" ) NAT
+          | NAT "+" term
+          | term INFIX term
+          | "not" "(" term ")"
+
+    query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query]
+         | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query]
+         | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces.
+         | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query]
+    lemma: gterm [";" lemma]
+         | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma]
+         | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma]
+    gterm: ident_gterm
+         | fun_gterm
+         | choice_gterm
+         | infix_gterm
+         | arith_gterm
+         | arith2_gterm
+         | event_gterm
+         | injevent_gterm
+         | implies_gterm
+         | paren_gterm
+         | sample_gterm
+         | let_gterm
+    ident_gterm: IDENT
+    fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT]
+    choice_gterm: "choice" "[" gterm "," gterm "]"
+    infix_gterm: gterm INFIX gterm
+    arith_gterm: gterm ( "+" | "-" ) NAT
+    arith2_gterm: NAT "+" gterm
+    event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
+    injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
+    implies_gterm: gterm "==>" gterm
+    paren_gterm: "(" _maybe_empty_seq{gterm} ")"
+    sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ]
+    let_gterm: "let" IDENT "=" gterm "in" gterm
+
+    gbinding: "!" NAT "=" gterm [";" gbinding]
+            | IDENT "=" gterm [";" gbinding]
+
+    nounifdecl: "let" IDENT "=" gformat "in" nounifdecl
+              | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]]
+    gformat: IDENT
+           | "*" IDENT
+           | IDENT "(" _maybe_empty_seq{gformat} ")"
+           | "choice" "[" gformat "," gformat "]"
+           | "not" "(" _maybe_empty_seq{gformat} ")"
+           | "new" IDENT [ "[" [ fbinding ] "]" ]
+           | "let" IDENT "=" gformat "in" gformat
+    fbinding: "!" NAT "=" gformat [";" fbinding]
+            | IDENT "=" gformat [";" fbinding]
+    nounifoption: "hypothesis"
+                | "conclusion"
+                | "ignoreAFewTimes"
+                | "inductionOn" "=" IDENT
+                | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}"
+
+    pterm: IDENT
+          | NAT
+          | "(" _maybe_empty_seq{pterm} ")"
+          | IDENT "(" _maybe_empty_seq{pterm} ")"
+          | choice_pterm
+          | pterm ("+" | "-") NAT
+          | NAT "+" pterm
+          | pterm INFIX pterm
+          | not_pterm
+          | sample_pterm
+          | if_pterm
+          | let_pterm
+          | insert_pterm
+          | get_pterm
+          | event_pterm
+    choice_pterm: "choice[" pterm "," pterm "]"
+    if_pterm: "if" pterm "then" pterm [ "else" pterm ]
+    not_pterm: "not" "(" pterm ")"
+    let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ]
+             | IDENT [":" typeid] "<-" pterm ";" pterm
+             | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ]
+    sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm]
+                | IDENT "<-R" typeid [";" pterm]
+    insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm
+    event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm
+    get_pterm: IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ]
+    pattern: IDENT [":" typeid]
+           | "_" [ ":" typeid ]
+           | NAT
+           | pattern "+" NAT
+           | NAT "+" pattern
+           | "(" _maybe_empty_seq{pattern} ")"
+           | IDENT "(" _maybe_empty_seq{pattern} ")"
+           | "=" pterm
+    mayfailterm: term
+               | "fail"
+    mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")"
+    typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ]
+    failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ]
+
+    decl: type_decl
+        | channel_decl
+        | free_decl
+        | const_decl
+        | fun_decl
+        | letfun_decl
+        | reduc_decl
+        | fun_reduc_decl
+        | equation_decl
+        | pred_decl
+        | table_decl
+        | let_decl
+        | set_settings_decl
+        | event_decl
+        | query_decl
+        | axiom_decl
+        | restriction_decl
+        | lemma_decl
+        | noninterf_decl
+        | weaksecret_decl
+        | not_decl
+        | select_decl
+        | noselect_decl
+        | nounif_decl
+        | elimtrue_decl
+        | clauses_decl
+        | module_decl
+        #| param_decl
+        #| proba_decl
+        #| letproba_decl
+        #| proof_decl
+        #| def_decl
+        #| expand_decl
+
+    type_decl: "type" IDENT options{OPTIONS_TYPE} "."
+    channel_decl: "channel" _non_empty_seq{IDENT} "."
+    free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
+    const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
+    fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
+    letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
+    reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "."
+    fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "."
+    equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "."
+    pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "."
+    table_decl: IDENT "(" _maybe_empty_seq{typeid} ")" "."
+    let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "."
+
+    BOOL : "true" | "false"
+    NONE: "none"
+    FULL: "full"
+    ALL: "all"
+    FUNC: IDENT
+    ignoretype_options: BOOL | ALL | NONE | "attacker"
+    boolean_settings_names: "privateCommOnPublicTerms"
+                            | "rejectChoiceTrueFalse"
+                            | "rejectNoSimplif"
+                            | "allowDiffPatterns"
+                            | "inductionQueries"
+                            | "inductionLemmas"
+                            | "movenew"
+                            | "movelet"
+                            | "stopTerm"
+                            | "removeEventsForLemma"
+                            | "simpEqAll"
+                            | "eqInNames"
+                            | "preciseLetExpand"
+                            | "expandSimplifyIfCst"
+                            | "featureFuns"
+                            | "featureNames"
+                            | "featurePredicates"
+                            | "featureEvents"
+                            | "featureTables"
+                            | "featureDepth"
+                            | "featureWidth"
+                            | "simplifyDerivation"
+                            | "abbreviateDerivation"
+                            | "explainDerivation"
+                            | "unifyDerivation"
+                            | "reconstructDerivation"
+                            | "displayDerivation"
+                            | "traceBacktracking"
+                            | "interactiveSwapping"
+                            | "color"
+                            | "verboseLemmas"
+                            | "abbreviateClauses"
+                            | "removeUselessClausesBeforeDisplay"
+                            | "verboseEq"
+                            | "verboseDestructors"
+                            | "verboseTerm"
+                            | "verboseStatistics"
+                            | "verboseRules"
+                            | "verboseBase"
+                            | "verboseRedundant"
+                            | "verboseCompleted"
+                            | "verboseGoalReachable"
+
+    _decl_pair{name, value}: "set" name "=" value "."
+
+    set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL}
+
+    ignore_types_values: BOOL | "all" | "none" | "attacker"
+    simplify_process_values: BOOL | "interactive"
+    precise_actions_values: BOOL | "trueWithoutArgsInNames"
+    redundant_hyp_elim_values: BOOL | "beginOnly"
+    reconstruct_trace_values: BOOL | "n"
+    attacker_values: "active" | "passive"
+    key_compromise_values: "none" | "approx" | "strict"
+    predicates_implementable: "check" | "nocheck"
+    application_values: "instantiate" | "full" | "none" | "discard"
+    max_values: "none" | "n"
+    sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset"
+    redundancy_elim_values: "best" | "simple" | "no"
+    nounif_ignore_a_few_times_values: "none" | "auto" | "all"
+    nounif_ignore_ntimes_values: "n"
+    trace_display_values: "short" | "long" | "none"
+    verbose_clauses_values: "none" | "explained" | "short"
+    set_settings_decl: set_settings_boolean_decl
+                | _decl_pair{"ignoreTypes", ignore_types_values}
+                | _decl_pair{"simplifyProcess", simplify_process_values}
+                | _decl_pair{"preciseActions", precise_actions_values}
+                | _decl_pair{"redundantHypElim", redundant_hyp_elim_values}
+                | _decl_pair{"reconstructTrace", reconstruct_trace_values}
+                | _decl_pair{"attacker", attacker_values}
+                | _decl_pair{"keyCompromise", key_compromise_values}
+                | _decl_pair{"predicatesImplementable", predicates_implementable}
+                | _decl_pair{"saturationApplication", application_values}
+                | _decl_pair{"verificationApplication", application_values}
+                | _decl_pair{"maxDepth", max_values}
+                | _decl_pair{"maxHyp", max_values}
+                | _decl_pair{"selFun", sel_fun_values}
+                | _decl_pair{"redundancyElim", redundancy_elim_values}
+                | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values}
+                | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values}
+                | _decl_pair{"traceDisplay", trace_display_values}
+                | _decl_pair{"verboseClauses", verbose_clauses_values}
+                | set_strategy
+                | set_symb_order
+
+    _swap_strategy_seq{x}: x ("->" x)*
+    set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "."
+    _symb_ord_seq{x}: x (">" x)*
+    set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "."
+
+    event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "."
+    query_decl: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "."
+
+    axiom_decl:       "axiom"       [ typedecl ";"] lemma options{OPTIONS_AXIOM} "."
+    restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "."
+    lemma_decl:       "lemma"       [ typedecl ";"] lemma options{OPTIONS_LEMMA} "."
+
+    noninterf_decl: [ typedecl ";"] _maybe_empty_seq{nidecl} "."
+    weaksecret_decl: "weaksecret" IDENT "."
+    not_decl: "not" [ typedecl ";"] gterm "."
+
+    INT: NAT | "-" NAT
+    select_decl:    "select"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
+    noselect_decl:  "noselect"  [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
+    nounif_decl:    "nounif"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "."
+
+    elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "."
+    clauses_decl: "clauses" clauses "."
+
+    module_decl: "@module" " " IDENT
+
+    # TODO: finish defining these (comes from Cryptoverif)
+    #param_decl: "param" _non_empty_seq{IDENT} options "."
+    #proba_decl: "proba" IDENT ["(...)"] options "."
+    #letproba_decl: "letproba" IDENT ["(...)"] "= ..." "."
+    #proof_decl: "proof" "{" proof "}"
+    #def_decl: "def" IDENT "(" _maybe_empty_seq{typeid} ")" "{" decl* "}"
+    #expand_decl: "expand" IDENT "(" _maybe_empty_seq{typeid} ")" "."
+
+    nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ]
+    equality: term "=" term
+            | "let" IDENT "=" term "in" equality
+    mayfailequality: IDENT mayfailterm_seq "=" mayfailterm
+    eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ]
+    clause: term
+           | term "->" term
+           | term "<->" term
+           | term "<=>" term
+    clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ]
+    mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ]
+    NAT: DIGIT+
+    phase: "phase" NAT [";" process]
+    TAG: IDENT
+    sync: "sync" NAT ["[" TAG "]"] [";" process]
+    COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
+    %import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library
+    %ignore WS // Disregard spaces in text
+    %ignore COMMENT
--- a/marzipan/src/parser.py
+++ b/marzipan/src/parser.py
@@ -1,4 +1,6 @@
+import re
 import sys
+from pathlib import Path

 from lark import Lark, Token, Transformer, exceptions, tree

@@ -73,377 +75,79 @@ ident_regex = (
    "/^" + "".join(f"(?!{w}$)" for w in reserved_words) + "[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/"
 )

-proverif_grammar = Lark(
-    grammar="""
-    PROCESS: "process"
-    EQUIVALENCE: "equivalence"
-    start: decl* PROCESS process | decl* EQUIVALENCE process process
-    YIELD: "yield"
-    channel: CHANNEL
-    CHANNEL: "channel"
+"""
+It might be desirable to move the grammar files around such that we only need
+to import _main.lark and import all other .larks files in _main.lark.
+This does not work at the moment due to cyclic dependencies between the grammar files,
+so we are loading all files as strings and simply concatenating for now...
+"""
+# gawk_file = "./src/grammars/marzipan_awk.lark"
+# gproverif = "./src/grammars/proverif.lark"
+# gmain = "./src/grammar/main.lark"
+# gfile = gmain
+# with open(gfile) as f:
+#     parser = Lark(
+#         f
+#         #grammar=grammar,
+#         # debug=True,
+#         # lexer_callbacks={"COMMENT": comments.append},
+#     )
+# files = sorted(Path("./src/grammar").glob("*.lark"))
+# gfiles = [f for f in files if not f.name.startswith("_")]
+# gfiles = ["common.lark", "decl.lark", "process.lark", "query.lark", "term.lark"]
+# grammar = "\n".join(parent_dir + f.read_text() for f in gfiles)
+
+parent_dir = "./src/grammar/"
+
+common_rules = Path(parent_dir + "common.lark").read_text()
+decl_rules = Path(parent_dir + "decl.lark").read_text()
+process_rules = Path(parent_dir + "process.lark").read_text()
+query_rules = Path(parent_dir + "query.lark").read_text()
+lemma_rules = Path(parent_dir + "lemma.lark").read_text()
+term_rules = Path(parent_dir + "term.lark").read_text()
+
+# marzipan additives
+common_rules = (
    """
-    + "IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/"
-    + """
-    ZERO: "0"
-    INFIX: "||"
-         | "&&"
-         | "="
-         | "<>"
-         | "<="
-         | ">="
-         | "<"
-         | ">"
-    typeid: channel
-          | IDENT
-    _non_empty_seq{x}: x ("," x)*
-    _maybe_empty_seq{x}: [ _non_empty_seq{x} ]
-
-    OPTIONS_FUN_CONST: "data" | "private" | "typeConverter"
-    OPTIONS_FUN: OPTIONS_FUN_CONST
-    OPTIONS_CONST: OPTIONS_FUN_CONST
-    OPTIONS_FREE_REDUC: "private"
-    OPTIONS_PRED: "memberOptim" | "block"
-    OPTIONS_PROCESS: "precise"
-    OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif"
-    OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM
-    OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction"
-    OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "maxSubset"
-    OPTIONS_QUERY: OPTIONS_QUERY_LEMMA | "proveAll"
-    OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/"
-    OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml
-    OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml
-    OPTIONS_TYPE: "fixed" | "bounded" | "large" | "nonuniform" | "password" | "size" NAT | "size" NAT "_" NAT | "pcoll" NAT | "small" | "ghost" # from Page 22 in CryptoVerif reference manual
-    options{idents}: [ "[" _non_empty_seq{idents} "]" ]
-    process: ZERO
-           | YIELD
-           | IDENT [ "(" _maybe_empty_seq{pterm} ")" ]
-           | bracketed_process
-           | piped_process
-           | replicated_process
-           | replicated_process_bounds
-           | sample_process
-           | if_process
-           | in_process
-           | out_process
-           | let_process
-           | insert_process
-           | get_process
-           | event_process
-           | phase
-           | sync
-    bracketed_process: "(" process ")"
-    piped_process: process "|" process
-    replicated_process: "!" process
-    replicated_process_bounds: "!" IDENT "<=" IDENT process
-                             | "foreach" IDENT "<=" IDENT "do" process
-    sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process]
-                  | IDENT "<-R" typeid [";" process]
-    let_process: "let" pattern "=" pterm ["in" process [ "else" process ]]
-               | IDENT [":" typeid] "<-" pterm [";" process]
-               | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
-    if_process: "if" pterm "then" process [ "else" process ]
-    in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ]
-    get_process: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
-    out_process: "out" "(" pterm "," pterm ")" [ ";" process ]
-    insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ]
-    event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ]
-    term: IDENT
-          | NAT
-          | "(" _maybe_empty_seq{term} ")"
-          | IDENT "(" _maybe_empty_seq{term} ")"
-          | term ( "+" | "-" ) NAT
-          | NAT "+" term
-          | term INFIX term
-          | "not" "(" term ")"
-
-    query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query]
-         | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query]
-         | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces.
-         | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query]
-    lemma: gterm [";" lemma]
-         | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma]
-         | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma]
-    gterm: ident_gterm
-         | fun_gterm
-         | choice_gterm
-         | infix_gterm
-         | arith_gterm
-         | arith2_gterm
-         | event_gterm
-         | injevent_gterm
-         | implies_gterm
-         | paren_gterm
-         | sample_gterm
-         | let_gterm
-    ident_gterm: IDENT
-    fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT]
-    choice_gterm: "choice" "[" gterm "," gterm "]"
-    infix_gterm: gterm INFIX gterm
-    arith_gterm: gterm ( "+" | "-" ) NAT
-    arith2_gterm: NAT "+" gterm
-    event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
-    injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
-    implies_gterm: gterm "==>" gterm
-    paren_gterm: "(" _maybe_empty_seq{gterm} ")"
-    sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ]
-    let_gterm: "let" IDENT "=" gterm "in" gterm
-
-    gbinding: "!" NAT "=" gterm [";" gbinding]
-            | IDENT "=" gterm [";" gbinding]
-
-    nounifdecl: "let" IDENT "=" gformat "in" nounifdecl
-              | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]]
-    gformat: IDENT
-           | "*" IDENT
-           | IDENT "(" _maybe_empty_seq{gformat} ")"
-           | "choice" "[" gformat "," gformat "]"
-           | "not" "(" _maybe_empty_seq{gformat} ")"
-           | "new" IDENT [ "[" [ fbinding ] "]" ]
-           | "let" IDENT "=" gformat "in" gformat
-    fbinding: "!" NAT "=" gformat [";" fbinding]
-            | IDENT "=" gformat [";" fbinding]
-    nounifoption: "hypothesis"
-                | "conclusion"
-                | "ignoreAFewTimes"
-                | "inductionOn" "=" IDENT
-                | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}"
-
-    pterm: IDENT
-          | NAT
-          | "(" _maybe_empty_seq{pterm} ")"
-          | IDENT "(" _maybe_empty_seq{pterm} ")"
-          | choice_pterm
-          | pterm ("+" | "-") NAT
-          | NAT "+" pterm
-          | pterm INFIX pterm
-          | not_pterm
-          | sample_pterm
-          | if_pterm
-          | let_pterm
-          | insert_pterm
-          | get_pterm
-          | event_pterm
-    choice_pterm: "choice[" pterm "," pterm "]"
-    if_pterm: "if" pterm "then" pterm [ "else" pterm ]
-    not_pterm: "not" "(" pterm ")"
-    let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ]
-             | IDENT [":" typeid] "<-" pterm ";" pterm
-             | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ]
-    sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm]
-                | IDENT "<-R" typeid [";" pterm]
-    insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm
-    event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm
-    get_pterm: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ]
-    pattern: IDENT [":" typeid]
-           | "_" [ ":" typeid ]
-           | NAT
-           | pattern "+" NAT
-           | NAT "+" pattern
-           | "(" _maybe_empty_seq{pattern} ")"
-           | IDENT "(" _maybe_empty_seq{pattern} ")"
-           | "=" pterm
-    mayfailterm: term
-               | "fail"
-    mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")"
-    typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ]
-    failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ]
-
-    decl: type_decl
-        | channel_decl
-        | free_decl
-        | const_decl
-        | fun_decl
-        | letfun_decl
-        | reduc_decl
-        | fun_reduc_decl
-        | equation_decl
-        | pred_decl
-        | table_decl
-        | let_decl
-        | set_settings_decl
-        | event_decl
-        | query_decl
-        | axiom_decl
-        | restriction_decl
-        | lemma_decl
-        | noninterf_decl
-        | weaksecret_decl
-        | not_decl
-        | select_decl
-        | noselect_decl
-        | nounif_decl
-        | elimtrue_decl
-        | clauses_decl
-        | module_decl
-        #| param_decl
-        #| proba_decl
-        #| letproba_decl
-        #| proof_decl
-        #| def_decl
-        #| expand_decl
-
-    type_decl: "type" IDENT options{OPTIONS_TYPE} "."
-    channel_decl: "channel" _non_empty_seq{IDENT} "."
-    free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
-    const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
-    fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
-    letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
-    reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "."
-    fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "."
-    equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "."
-    pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "."
-    table_decl: "table" IDENT "(" _maybe_empty_seq{typeid} ")" "."
-    let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "."
-
-    BOOL : "true" | "false"
-    NONE: "none"
-    FULL: "full"
-    ALL: "all"
-    FUNC: IDENT
-    ignoretype_options: BOOL | ALL | NONE | "attacker"
-    boolean_settings_names: "privateCommOnPublicTerms"
-                            | "rejectChoiceTrueFalse"
-                            | "rejectNoSimplif"
-                            | "allowDiffPatterns"
-                            | "inductionQueries"
-                            | "inductionLemmas"
-                            | "movenew"
-                            | "movelet"
-                            | "stopTerm"
-                            | "removeEventsForLemma"
-                            | "simpEqAll"
-                            | "eqInNames"
-                            | "preciseLetExpand"
-                            | "expandSimplifyIfCst"
-                            | "featureFuns"
-                            | "featureNames"
-                            | "featurePredicates"
-                            | "featureEvents"
-                            | "featureTables"
-                            | "featureDepth"
-                            | "featureWidth"
-                            | "simplifyDerivation"
-                            | "abbreviateDerivation"
-                            | "explainDerivation"
-                            | "unifyDerivation"
-                            | "reconstructDerivation"
-                            | "displayDerivation"
-                            | "traceBacktracking"
-                            | "interactiveSwapping"
-                            | "color"
-                            | "verboseLemmas"
-                            | "abbreviateClauses"
-                            | "removeUselessClausesBeforeDisplay"
-                            | "verboseEq"
-                            | "verboseDestructors"
-                            | "verboseTerm"
-                            | "verboseStatistics"
-                            | "verboseRules"
-                            | "verboseBase"
-                            | "verboseRedundant"
-                            | "verboseCompleted"
-                            | "verboseGoalReachable"
-
-    _decl_pair{name, value}: "set" name "=" value "."
-
-    set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL}
-
-    ignore_types_values: BOOL | "all" | "none" | "attacker"
-    simplify_process_values: BOOL | "interactive"
-    precise_actions_values: BOOL | "trueWithoutArgsInNames"
-    redundant_hyp_elim_values: BOOL | "beginOnly"
-    reconstruct_trace_values: BOOL | NAT | "yes" | "no" # TODO(blipp): check whether yes/no is always allowed for BOOL
-    attacker_values: "active" | "passive"
-    key_compromise_values: "none" | "approx" | "strict"
-    predicates_implementable: "check" | "nocheck"
-    application_values: "instantiate" | "full" | "none" | "discard"
-    max_values: "none" | "n"
-    sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset"
-    redundancy_elim_values: "best" | "simple" | "no"
-    nounif_ignore_a_few_times_values: "none" | "auto" | "all"
-    nounif_ignore_ntimes_values: "n"
-    trace_display_values: "short" | "long" | "none"
-    verbose_clauses_values: "none" | "explained" | "short"
-    set_settings_decl: set_settings_boolean_decl
-                | _decl_pair{"ignoreTypes", ignore_types_values}
-                | _decl_pair{"simplifyProcess", simplify_process_values}
-                | _decl_pair{"preciseActions", precise_actions_values}
-                | _decl_pair{"redundantHypElim", redundant_hyp_elim_values}
-                | _decl_pair{"reconstructTrace", reconstruct_trace_values}
-                | _decl_pair{"attacker", attacker_values}
-                | _decl_pair{"keyCompromise", key_compromise_values}
-                | _decl_pair{"predicatesImplementable", predicates_implementable}
-                | _decl_pair{"saturationApplication", application_values}
-                | _decl_pair{"verificationApplication", application_values}
-                | _decl_pair{"maxDepth", max_values}
-                | _decl_pair{"maxHyp", max_values}
-                | _decl_pair{"selFun", sel_fun_values}
-                | _decl_pair{"redundancyElim", redundancy_elim_values}
-                | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values}
-                | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values}
-                | _decl_pair{"traceDisplay", trace_display_values}
-                | _decl_pair{"verboseClauses", verbose_clauses_values}
-                | set_strategy
-                | set_symb_order
-
-    _swap_strategy_seq{x}: x ("->" x)*
-    set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "."
-    _symb_ord_seq{x}: x (">" x)*
-    set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "."
-
-    event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "."
-    query_decl: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "."
-
-    axiom_decl:       "axiom"       [ typedecl ";"] lemma options{OPTIONS_AXIOM} "."
-    restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "."
-    lemma_decl:       "lemma"       [ typedecl ";"] lemma options{OPTIONS_LEMMA} "."
-
-    noninterf_decl: "noninterf" [ typedecl ";"] _maybe_empty_seq{nidecl} "."
-    weaksecret_decl: "weaksecret" IDENT "."
-    not_decl: "not" [ typedecl ";"] gterm "."
-
-    INT: NAT | "-" NAT
-    select_decl:    "select"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
-    noselect_decl:  "noselect"  [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
-    nounif_decl:    "nounif"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "."
-
-    elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "."
-    clauses_decl: "clauses" clauses "."
-
-    module_decl: "@module" " " IDENT
-
-    # TODO: finish defining these (comes from Cryptoverif)
-    #param_decl: "param" _non_empty_seq{IDENT} options "."
-    #proba_decl: "proba" IDENT ["(...)"] options "."
-    #letproba_decl: "letproba" IDENT ["(...)"] "= ..." "."
-    #proof_decl: "proof" "{" proof "}"
-    #def_decl: "def" IDENT "(" _maybe_empty_seq{typeid} ")" "{" decl* "}"
-    #expand_decl: "expand" IDENT "(" _maybe_empty_seq{typeid} ")" "."
-
-    nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ]
-    equality: term "=" term
-            | "let" IDENT "=" term "in" equality
-    mayfailequality: IDENT mayfailterm_seq "=" mayfailterm
-    eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ]
-    clause: term
-           | term "->" term
-           | term "<->" term
-           | term "<=>" term
-    clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ]
-    mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ]
-    NAT: DIGIT+
-    phase: "phase" NAT [";" process]
-    TAG: IDENT
-    sync: "sync" NAT ["[" TAG "]"] [";" process]
-    COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
-    %import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library
-    %ignore WS // Disregard spaces in text
-    %ignore COMMENT
-""",
-    debug=True,
-    # lexer_callbacks={"COMMENT": comments.append},
+QUERY: "@query"
+REACHABLE: "@reachable"
+LEMMA: "@lemma"
+"""
+    + common_rules
 )

+
+# add @query and @reachable to query_decl, @lemma to lemma_decl
+def modify_decl_rule(rules: str, decl_rule: str, new_rule: str) -> str:
+    old_decl_renamed = f"{decl_rule}_core"
+    rename_target = f"{decl_rule}\s*:"
+    # rename *_decl -> *_decl_core
+    rules, count = re.subn(rename_target, f"{old_decl_renamed}:", rules, count=1)
+    if count == 0:
+        raise RuntimeError("*_decl not found!")
+    wrapper = f"{decl_rule}: {new_rule} {old_decl_renamed}"
+    old_decl_target = f"{old_decl_renamed}\s*:"
+    # get index of *_decl_core rule
+    match = re.search(old_decl_target, rules, flags=re.MULTILINE)
+    if not match:
+        raise RuntimeError("*_decl_core: rule not found after rename")
+    insert_pos = match.start()
+    rules = rules[:insert_pos] + wrapper + "\n" + rules[insert_pos:]
+    return rules
+
+
+query_rules = modify_decl_rule(
+    query_rules, "query_decl", "[(REACHABLE|QUERY) ESCAPED_STRING]"
+)
+
+lemma_rules = modify_decl_rule(lemma_rules, "lemma_decl", "[LEMMA ESCAPED_STRING]")
+
+grammar = (
+    common_rules + decl_rules + process_rules + query_rules + lemma_rules + term_rules
+)
+
+parser = Lark(grammar)
+
 # COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
 # COMMENT:  "(*" /(\*(?!\))|[^*])*/  "*)"
 # comment: /\(\*(?:(?!\(\*|\*\)).|(?R))*\*\)/
@@ -456,7 +160,7 @@ proverif_grammar = Lark(


 def parsertest(input):
-    parsetree = proverif_grammar.parse(input)
+    parsetree = parser.parse(input)
    # tree.pydot__tree_to_png(parsetree, name + ".png")
    return parsetree

--- a/marzipan/src/splitter.py
+++ b/marzipan/src/splitter.py
@@ -0,0 +1,168 @@
+import os
+from pathlib import Path
+
+"""
+ChatGPT generated code for initial proverif grammar split
+"""
+
+# ---------- CONFIG ----------
+OUT = Path("grammar")
+OUT.mkdir(exist_ok=True)
+
+# Raw grammar text (paste full grammar here)
+GRAMMAR = read("grammars/proverif.lark")
+
+# Rules grouped by prefix – expandable later if needed
+GROUPS = {
+    "common.lark": [
+        "PROCESS",
+        "YIELD",
+        "CHANNEL",
+        "IDENT",
+        "ZERO",
+        "INFIX",
+        "typeid",
+        "_non_empty_seq",
+        "_maybe_empty_seq",
+        "OPTIONS_",  # all options-related rules
+        "BOOL",
+        "NONE",
+        "FULL",
+        "ALL",
+        "FUNC",
+        "ignoretype_options",
+        "boolean_settings_names",
+        "INT",
+        "COMMENT",
+    ],
+    "process.lark": [
+        "start",
+        "process",
+        "bracketed_process",
+        "piped_process",
+        "replicated_process",
+        "replicated_process_bounds",
+        "sample_process",
+        "let_process",
+        "if_process",
+        "in_process",
+        "out_process",
+        "insert_process",
+        "event_process",
+        "term",
+        "phase",
+        "sync",
+    ],
+    "term.lark": [
+        "gterm",
+        "ident_gterm",
+        "fun_gterm",
+        "choice_gterm",
+        "infix_gterm",
+        "arith_gterm",
+        "arith2_gterm",
+        "event_gterm",
+        "injevent_gterm",
+        "implies_gterm",
+        "paren_gterm",
+        "sample_gterm",
+        "let_gterm",
+        "gbinding",
+        "pterm",
+        "choice_pterm",
+        "if_pterm",
+        "not_pterm",
+        "let_pterm",
+        "sample_pterm",
+        "insert_pterm",
+        "event_pterm",
+        "get_pterm",
+        "pattern",
+        "mayfailterm",
+        "mayfailterm_seq",
+        "typedecl",
+        "failtypedecl",
+    ],
+    "decl.lark": [
+        "decl",
+        "type_decl",
+        "channel_decl",
+        "free_decl",
+        "const_decl",
+        "fun_decl",
+        "letfun_decl",
+        "reduc_decl",
+        "fun_reduc_decl",
+        "equation_decl",
+        "pred_decl",
+        "table_decl",
+        "let_decl",
+        "set_settings_decl",
+        "event_decl",
+        "select_decl",
+        "noselect_decl",
+        "nounif_decl",
+        "elimtrue_decl",
+        "clauses_decl",
+        "module_decl",
+        "nidecl",
+        "equality",
+        "mayfailequality",
+        "eqlist",
+        "clause",
+        "clauses",
+        "mayfailreduc",
+    ],
+    "query.lark": [
+        "query",
+        "lemma",
+        "nounifdecl",
+        "gformat",
+        "fbinding",
+        "nounifoption",
+        "TAG",
+    ],
+}
+
+# ---------- SPLITTING LOGIC ----------
+rules = GRAMMAR.strip().splitlines()
+buckets = {k: [] for k in GROUPS}
+
+
+def match(rule, prefixes):
+    return any(rule.startswith(p) for p in prefixes)
+
+
+current_target = None
+
+for line in rules:
+    striped = line.strip()
+    if ":" in striped and not striped.startswith("%"):
+        rule_name = striped.split(":")[0].strip()
+        for module, prefixes in GROUPS.items():
+            if match(rule_name, prefixes):
+                current_target = module
+                break
+    if current_target:
+        buckets[current_target].append(line)
+
+# main.lark will import everything else
+main_lark = """start: decl* PROCESS process
+%import .common.*
+%import .process.*
+%import .term.*
+%import .decl.*
+%import .query.*
+%ignore WS
+%ignore COMMENT
+"""
+
+# ---------- OUTPUT ----------
+for filename, lines in buckets.items():
+    path = OUT / filename
+    with open(path, "w") as f:
+        f.write("\n".join(lines) + "\n")
+
+(OUT / "main.lark").write_text(main_lark)
+
+print("Grammar split completed into:", OUT)