diff --git a/marzipan/src/grammar/_cryptoverif.lark b/marzipan/src/grammar/_cryptoverif.lark new file mode 100644 index 0000000..b406f4c --- /dev/null +++ b/marzipan/src/grammar/_cryptoverif.lark @@ -0,0 +1,7 @@ + # TODO: finish defining these (comes from Cryptoverif) + #param_decl: "param" _non_empty_seq{IDENT} options "." + #proba_decl: "proba" IDENT ["(...)"] options "." + #letproba_decl: "letproba" IDENT ["(...)"] "= ..." "." + #proof_decl: "proof" "{" proof "}" + #def_decl: "def" IDENT "(" _maybe_empty_seq{typeid} ")" "{" decl* "}" + #expand_decl: "expand" IDENT "(" _maybe_empty_seq{typeid} ")" "." diff --git a/marzipan/src/grammar/_main.lark b/marzipan/src/grammar/_main.lark new file mode 100644 index 0000000..0126ad3 --- /dev/null +++ b/marzipan/src/grammar/_main.lark @@ -0,0 +1,10 @@ +start: decl* PROCESS process +%import .common.* +%import .process.* +%import .term.* +%import .decl.* +%import .query.* + +%import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library +%ignore WS // Disregard spaces in text +%ignore COMMENT diff --git a/marzipan/src/grammar/common.lark b/marzipan/src/grammar/common.lark new file mode 100644 index 0000000..d8f1f15 --- /dev/null +++ b/marzipan/src/grammar/common.lark @@ -0,0 +1,111 @@ +PROCESS: "process" +EQUIVALENCE: "equivalence" +YIELD: "yield" +channel: CHANNEL +CHANNEL: "channel" +IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/ +ZERO: "0" +INFIX: "||" + | "&&" + | "=" + | "<>" + | "<=" + | ">=" + | "<" + | ">" +typeid: channel + | IDENT +_non_empty_seq{x}: x ("," x)* +_maybe_empty_seq{x}: [ _non_empty_seq{x} ] + +OPTIONS_FUN_CONST: "data" | "private" | "typeConverter" +OPTIONS_FUN: OPTIONS_FUN_CONST +OPTIONS_CONST: OPTIONS_FUN_CONST +OPTIONS_FREE_REDUC: "private" +OPTIONS_PRED: "memberOptim" | "block" +OPTIONS_PROCESS: "precise" +OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif" +OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM +OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction" +OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA | "maxSubset" +OPTIONS_QUERY: OPTIONS_QUERY_LEMMA | "proveAll" +OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/" +OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml +OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml +OPTIONS_TYPE: "fixed" | "bounded" | "large" | "nonuniform" | "password" | "size" NAT | "size" NAT "_" NAT | "pcoll" NAT | "small" | "ghost" # from Page 22 in CryptoVerif reference manual +options{idents}: [ "[" _non_empty_seq{idents} "]" ] +BOOL : "true" | "false" +NONE: "none" +FULL: "full" +ALL: "all" +FUNC: IDENT +ignoretype_options: BOOL | ALL | NONE | "attacker" +boolean_settings_names: "privateCommOnPublicTerms" + | "rejectChoiceTrueFalse" + | "rejectNoSimplif" + | "allowDiffPatterns" + | "inductionQueries" + | "inductionLemmas" + | "movenew" + | "movelet" + | "stopTerm" + | "removeEventsForLemma" + | "simpEqAll" + | "eqInNames" + | "preciseLetExpand" + | "expandSimplifyIfCst" + | "featureFuns" + | "featureNames" + | "featurePredicates" + | "featureEvents" + | "featureTables" + | "featureDepth" + | "featureWidth" + | "simplifyDerivation" + | "abbreviateDerivation" + | "explainDerivation" + | "unifyDerivation" + | "reconstructDerivation" + | "displayDerivation" + | "traceBacktracking" + | "interactiveSwapping" + | "color" + | "verboseLemmas" + | "abbreviateClauses" + | "removeUselessClausesBeforeDisplay" + | "verboseEq" + | "verboseDestructors" + | "verboseTerm" + | "verboseStatistics" + | "verboseRules" + | "verboseBase" + | "verboseRedundant" + | "verboseCompleted" + | "verboseGoalReachable" + +_decl_pair{name, value}: "set" name "=" value "." + +set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL} + +ignore_types_values: BOOL | "all" | "none" | "attacker" +simplify_process_values: BOOL | "interactive" +precise_actions_values: BOOL | "trueWithoutArgsInNames" +redundant_hyp_elim_values: BOOL | "beginOnly" +reconstruct_trace_values: BOOL | NAT | "yes" | "no" # TODO(blipp): check whether yes/no is always allowed for BOOL +attacker_values: "active" | "passive" +key_compromise_values: "none" | "approx" | "strict" +predicates_implementable: "check" | "nocheck" +application_values: "instantiate" | "full" | "none" | "discard" +max_values: "none" | "n" +sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset" +redundancy_elim_values: "best" | "simple" | "no" +nounif_ignore_a_few_times_values: "none" | "auto" | "all" +nounif_ignore_ntimes_values: "n" +trace_display_values: "short" | "long" | "none" +verbose_clauses_values: "none" | "explained" | "short" +INT: NAT | "-" NAT +COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/ + +%import common (WORD, DIGIT, NUMBER, WS, ESCAPED_STRING) // imports from terminal library +%ignore WS // Disregard spaces in text +%ignore COMMENT diff --git a/marzipan/src/grammar/decl.lark b/marzipan/src/grammar/decl.lark new file mode 100644 index 0000000..ab73993 --- /dev/null +++ b/marzipan/src/grammar/decl.lark @@ -0,0 +1,99 @@ +decl: type_decl + | channel_decl + | free_decl + | const_decl + | fun_decl + | letfun_decl + | reduc_decl + | fun_reduc_decl + | equation_decl + | pred_decl + | table_decl + | let_decl + | set_settings_decl + | event_decl + | query_decl + | axiom_decl + | restriction_decl + | lemma_decl + | noninterf_decl + | weaksecret_decl + | not_decl + | select_decl + | noselect_decl + | nounif_decl + | elimtrue_decl + | clauses_decl + | module_decl + #| param_decl + #| proba_decl + #| letproba_decl + #| proof_decl + #| def_decl + #| expand_decl + +type_decl: "type" IDENT options{OPTIONS_TYPE} "." +channel_decl: "channel" _non_empty_seq{IDENT} "." +free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "." +const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "." +fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "." +letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "." +reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "." +fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "." +equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "." +pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "." +table_decl: "table" IDENT "(" _maybe_empty_seq{typeid} ")" "." +let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "." + +set_settings_decl: set_settings_boolean_decl + | _decl_pair{"ignoreTypes", ignore_types_values} + | _decl_pair{"simplifyProcess", simplify_process_values} + | _decl_pair{"preciseActions", precise_actions_values} + | _decl_pair{"redundantHypElim", redundant_hyp_elim_values} + | _decl_pair{"reconstructTrace", reconstruct_trace_values} + | _decl_pair{"attacker", attacker_values} + | _decl_pair{"keyCompromise", key_compromise_values} + | _decl_pair{"predicatesImplementable", predicates_implementable} + | _decl_pair{"saturationApplication", application_values} + | _decl_pair{"verificationApplication", application_values} + | _decl_pair{"maxDepth", max_values} + | _decl_pair{"maxHyp", max_values} + | _decl_pair{"selFun", sel_fun_values} + | _decl_pair{"redundancyElim", redundancy_elim_values} + | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values} + | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values} + | _decl_pair{"traceDisplay", trace_display_values} + | _decl_pair{"verboseClauses", verbose_clauses_values} + | set_strategy + | set_symb_order + +_swap_strategy_seq{x}: x ("->" x)* +set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "." +_symb_ord_seq{x}: x (">" x)* +set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "." + +event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "." +select_decl: "select" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." +noselect_decl: "noselect" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." +nounif_decl: "nounif" [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "." + +elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "." +clauses_decl: "clauses" clauses "." + +module_decl: "@module" " " IDENT + +noninterf_decl: "noninterf" [ typedecl ";"] _maybe_empty_seq{nidecl} "." +weaksecret_decl: "weaksecret" IDENT "." + +nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ] +equality: term "=" term + | "let" IDENT "=" term "in" equality +mayfailequality: IDENT mayfailterm_seq "=" mayfailterm +eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ] +clause: term + | term "->" term + | term "<->" term + | term "<=>" term +clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ] +mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ] +NAT: DIGIT+ diff --git a/marzipan/src/grammar/lemma.lark b/marzipan/src/grammar/lemma.lark new file mode 100644 index 0000000..ddb95b4 --- /dev/null +++ b/marzipan/src/grammar/lemma.lark @@ -0,0 +1,6 @@ +lemma: gterm [";" lemma] + | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma] + | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma] +axiom_decl: "axiom" [ typedecl ";"] lemma options{OPTIONS_AXIOM} "." +restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "." +lemma_decl: "lemma" [ typedecl ";"] lemma options{OPTIONS_LEMMA} "." diff --git a/marzipan/src/grammar/process.lark b/marzipan/src/grammar/process.lark new file mode 100644 index 0000000..9d5abea --- /dev/null +++ b/marzipan/src/grammar/process.lark @@ -0,0 +1,45 @@ +start: decl* PROCESS process | decl* EQUIVALENCE process process +process: ZERO + | YIELD + | IDENT [ "(" _maybe_empty_seq{pterm} ")" ] + | bracketed_process + | piped_process + | replicated_process + | replicated_process_bounds + | sample_process + | if_process + | in_process + | out_process + | let_process + | insert_process + | get_process + | event_process + | phase + | sync +bracketed_process: "(" process ")" +piped_process: process "|" process +replicated_process: "!" process +replicated_process_bounds: "!" IDENT "<=" IDENT process + | "foreach" IDENT "<=" IDENT "do" process +sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process] + | IDENT "<-R" typeid [";" process] +let_process: "let" pattern "=" pterm ["in" process [ "else" process ]] + | IDENT [":" typeid] "<-" pterm [";" process] + | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ] +if_process: "if" pterm "then" process [ "else" process ] +in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ] +get_process: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ] +out_process: "out" "(" pterm "," pterm ")" [ ";" process ] +insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ] +event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ] +term: IDENT + | NAT + | "(" _maybe_empty_seq{term} ")" + | IDENT "(" _maybe_empty_seq{term} ")" + | term ( "+" | "-" ) NAT + | NAT "+" term + | term INFIX term + | "not" "(" term ")" + +phase: "phase" NAT [";" process] +sync: "sync" NAT ["[" TAG "]"] [";" process] diff --git a/marzipan/src/grammar/query.lark b/marzipan/src/grammar/query.lark new file mode 100644 index 0000000..0432494 --- /dev/null +++ b/marzipan/src/grammar/query.lark @@ -0,0 +1,24 @@ +query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query] + | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query] + | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces. + | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query] +nounifdecl: "let" IDENT "=" gformat "in" nounifdecl + | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]] +gformat: IDENT + | "*" IDENT + | IDENT "(" _maybe_empty_seq{gformat} ")" + | "choice" "[" gformat "," gformat "]" + | "not" "(" _maybe_empty_seq{gformat} ")" + | "new" IDENT [ "[" [ fbinding ] "]" ] + | "let" IDENT "=" gformat "in" gformat +fbinding: "!" NAT "=" gformat [";" fbinding] + | IDENT "=" gformat [";" fbinding] +nounifoption: "hypothesis" + | "conclusion" + | "ignoreAFewTimes" + | "inductionOn" "=" IDENT + | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}" + +query_decl: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "." +not_decl: "not" [ typedecl ";"] gterm "." +TAG: IDENT diff --git a/marzipan/src/grammar/term.lark b/marzipan/src/grammar/term.lark new file mode 100644 index 0000000..291f1a6 --- /dev/null +++ b/marzipan/src/grammar/term.lark @@ -0,0 +1,67 @@ +gterm: ident_gterm + | fun_gterm + | choice_gterm + | infix_gterm + | arith_gterm + | arith2_gterm + | event_gterm + | injevent_gterm + | implies_gterm + | paren_gterm + | sample_gterm + | let_gterm +ident_gterm: IDENT +fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT] +choice_gterm: "choice" "[" gterm "," gterm "]" +infix_gterm: gterm INFIX gterm +arith_gterm: gterm ( "+" | "-" ) NAT +arith2_gterm: NAT "+" gterm +event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] +injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] +implies_gterm: gterm "==>" gterm +paren_gterm: "(" _maybe_empty_seq{gterm} ")" +sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ] +let_gterm: "let" IDENT "=" gterm "in" gterm + +gbinding: "!" NAT "=" gterm [";" gbinding] + | IDENT "=" gterm [";" gbinding] + +pterm: IDENT + | NAT + | "(" _maybe_empty_seq{pterm} ")" + | IDENT "(" _maybe_empty_seq{pterm} ")" + | choice_pterm + | pterm ("+" | "-") NAT + | NAT "+" pterm + | pterm INFIX pterm + | not_pterm + | sample_pterm + | if_pterm + | let_pterm + | insert_pterm + | get_pterm + | event_pterm +choice_pterm: "choice[" pterm "," pterm "]" +if_pterm: "if" pterm "then" pterm [ "else" pterm ] +not_pterm: "not" "(" pterm ")" +let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ] + | IDENT [":" typeid] "<-" pterm ";" pterm + | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ] +sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm] + | IDENT "<-R" typeid [";" pterm] +insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm +event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm +get_pterm: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ] +pattern: IDENT [":" typeid] + | "_" [ ":" typeid ] + | NAT + | pattern "+" NAT + | NAT "+" pattern + | "(" _maybe_empty_seq{pattern} ")" + | IDENT "(" _maybe_empty_seq{pattern} ")" + | "=" pterm +mayfailterm: term + | "fail" +mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")" +typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ] +failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ] diff --git a/marzipan/src/grammars/marzipan_awk.lark b/marzipan/src/grammars/marzipan_awk.lark new file mode 100644 index 0000000..475fc20 --- /dev/null +++ b/marzipan/src/grammars/marzipan_awk.lark @@ -0,0 +1,47 @@ +start: line* + +line: directive_line NEWLINE + | code_line NEWLINE + | NEWLINE + +directive_line: module_directive + | alias_directive + | long_alias_start + | long_alias_end + | query_directive + | reachable_directive + | lemma_directive + +code_line: /[^\n]*/ // Anything not recognized as a directive + +module_directive: MODULE NAME + +alias_directive: ALIAS alias_pair (alias_pair)* + +alias_pair: IDENT "=" TOKEN + +long_alias_start: LONG_ALIAS IDENT +long_alias_end: LONG_ALIAS_END + +query_directive: QUERY quoted_string +reachable_directive: REACHABLE quoted_string +lemma_directive: LEMMA quoted_string + + +quoted_string: ESCAPED_STRING + +NAME: IDENT +TOKEN: /[^ \t\n]+/ +IDENT: /[A-Za-z_][A-Za-z0-9_']*/ +MODULE: "@module" +ALIAS: "@alias" +LONG_ALIAS: "@long-alias" +LONG_ALIAS_END: "@long-alias-end" +QUERY: "@query" +REACHABLE: "@reachable" +LEMMA: "@lemma" +NEWLINE: /\r?\n/ + +%import common.ESCAPED_STRING +%import common.WS +%ignore WS diff --git a/marzipan/src/grammars/proverif.lark b/marzipan/src/grammars/proverif.lark new file mode 100644 index 0000000..1119130 --- /dev/null +++ b/marzipan/src/grammars/proverif.lark @@ -0,0 +1,361 @@ +PROCESS: "process" +start: decl* PROCESS process +YIELD: "yield" +channel: CHANNEL +CHANNEL: "channel" +IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/ +ZERO: "0" +INFIX: "||" + | "&&" + | "=" + | "<>" + | "<=" + | ">=" + | "<" + | ">" +typeid: channel + | IDENT +_non_empty_seq{x}: x ("," x)* +_maybe_empty_seq{x}: [ _non_empty_seq{x} ] + +OPTIONS_FUN_CONST: "data" | "private" | "typeConverter" + OPTIONS_FUN: OPTIONS_FUN_CONST + OPTIONS_CONST: OPTIONS_FUN_CONST + OPTIONS_FREE_REDUC: "private" + OPTIONS_PRED: "memberOptim" | "block" + OPTIONS_PROCESS: "precise" + OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif" + OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM + OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction" + OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "maxSubset" + OPTIONS_QUERY: OPTIONS_QUERY_LEMMA_AXIOM | "proveAll" + OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/" + OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml + OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml + OPTIONS_TYPE: "fixed" | "bounded" # TODO(blipp): complete this. These are only for compatibility with CryptoVerif and are ignored + options{idents}: [ "[" _non_empty_seq{idents} "]" ] + process: ZERO + | YIELD + | IDENT [ "(" _maybe_empty_seq{pterm} ")" ] + | bracketed_process + | piped_process + | replicated_process + | replicated_process_bounds + | sample_process + | if_process + | in_process + | out_process + | let_process + | insert_process + | get_process + | event_process + | phase + | sync + bracketed_process: "(" process ")" + piped_process: process "|" process + replicated_process: "!" process + replicated_process_bounds: "!" IDENT "<=" IDENT process + | "foreach" IDENT "<=" IDENT "do" process + sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process] + | IDENT "<-R" typeid [";" process] + let_process: "let" pattern "=" pterm ["in" process [ "else" process ]] + | IDENT [":" typeid] "<-" pterm [";" process] + | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ] + if_process: "if" pterm "then" process [ "else" process ] + in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ] + get_process: IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ] + out_process: "out" "(" pterm "," pterm ")" [ ";" process ] + insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ] + event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ] + term: IDENT + | NAT + | "(" _maybe_empty_seq{term} ")" + | IDENT "(" _maybe_empty_seq{term} ")" + | term ( "+" | "-" ) NAT + | NAT "+" term + | term INFIX term + | "not" "(" term ")" + + query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query] + | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query] + | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces. + | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query] + lemma: gterm [";" lemma] + | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma] + | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma] + gterm: ident_gterm + | fun_gterm + | choice_gterm + | infix_gterm + | arith_gterm + | arith2_gterm + | event_gterm + | injevent_gterm + | implies_gterm + | paren_gterm + | sample_gterm + | let_gterm + ident_gterm: IDENT + fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT] + choice_gterm: "choice" "[" gterm "," gterm "]" + infix_gterm: gterm INFIX gterm + arith_gterm: gterm ( "+" | "-" ) NAT + arith2_gterm: NAT "+" gterm + event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] + injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] + implies_gterm: gterm "==>" gterm + paren_gterm: "(" _maybe_empty_seq{gterm} ")" + sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ] + let_gterm: "let" IDENT "=" gterm "in" gterm + + gbinding: "!" NAT "=" gterm [";" gbinding] + | IDENT "=" gterm [";" gbinding] + + nounifdecl: "let" IDENT "=" gformat "in" nounifdecl + | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]] + gformat: IDENT + | "*" IDENT + | IDENT "(" _maybe_empty_seq{gformat} ")" + | "choice" "[" gformat "," gformat "]" + | "not" "(" _maybe_empty_seq{gformat} ")" + | "new" IDENT [ "[" [ fbinding ] "]" ] + | "let" IDENT "=" gformat "in" gformat + fbinding: "!" NAT "=" gformat [";" fbinding] + | IDENT "=" gformat [";" fbinding] + nounifoption: "hypothesis" + | "conclusion" + | "ignoreAFewTimes" + | "inductionOn" "=" IDENT + | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}" + + pterm: IDENT + | NAT + | "(" _maybe_empty_seq{pterm} ")" + | IDENT "(" _maybe_empty_seq{pterm} ")" + | choice_pterm + | pterm ("+" | "-") NAT + | NAT "+" pterm + | pterm INFIX pterm + | not_pterm + | sample_pterm + | if_pterm + | let_pterm + | insert_pterm + | get_pterm + | event_pterm + choice_pterm: "choice[" pterm "," pterm "]" + if_pterm: "if" pterm "then" pterm [ "else" pterm ] + not_pterm: "not" "(" pterm ")" + let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ] + | IDENT [":" typeid] "<-" pterm ";" pterm + | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ] + sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm] + | IDENT "<-R" typeid [";" pterm] + insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm + event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm + get_pterm: IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ] + pattern: IDENT [":" typeid] + | "_" [ ":" typeid ] + | NAT + | pattern "+" NAT + | NAT "+" pattern + | "(" _maybe_empty_seq{pattern} ")" + | IDENT "(" _maybe_empty_seq{pattern} ")" + | "=" pterm + mayfailterm: term + | "fail" + mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")" + typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ] + failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ] + + decl: type_decl + | channel_decl + | free_decl + | const_decl + | fun_decl + | letfun_decl + | reduc_decl + | fun_reduc_decl + | equation_decl + | pred_decl + | table_decl + | let_decl + | set_settings_decl + | event_decl + | query_decl + | axiom_decl + | restriction_decl + | lemma_decl + | noninterf_decl + | weaksecret_decl + | not_decl + | select_decl + | noselect_decl + | nounif_decl + | elimtrue_decl + | clauses_decl + | module_decl + #| param_decl + #| proba_decl + #| letproba_decl + #| proof_decl + #| def_decl + #| expand_decl + + type_decl: "type" IDENT options{OPTIONS_TYPE} "." + channel_decl: "channel" _non_empty_seq{IDENT} "." + free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "." + const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "." + fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "." + letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "." + reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "." + fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "." + equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "." + pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "." + table_decl: IDENT "(" _maybe_empty_seq{typeid} ")" "." + let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "." + + BOOL : "true" | "false" + NONE: "none" + FULL: "full" + ALL: "all" + FUNC: IDENT + ignoretype_options: BOOL | ALL | NONE | "attacker" + boolean_settings_names: "privateCommOnPublicTerms" + | "rejectChoiceTrueFalse" + | "rejectNoSimplif" + | "allowDiffPatterns" + | "inductionQueries" + | "inductionLemmas" + | "movenew" + | "movelet" + | "stopTerm" + | "removeEventsForLemma" + | "simpEqAll" + | "eqInNames" + | "preciseLetExpand" + | "expandSimplifyIfCst" + | "featureFuns" + | "featureNames" + | "featurePredicates" + | "featureEvents" + | "featureTables" + | "featureDepth" + | "featureWidth" + | "simplifyDerivation" + | "abbreviateDerivation" + | "explainDerivation" + | "unifyDerivation" + | "reconstructDerivation" + | "displayDerivation" + | "traceBacktracking" + | "interactiveSwapping" + | "color" + | "verboseLemmas" + | "abbreviateClauses" + | "removeUselessClausesBeforeDisplay" + | "verboseEq" + | "verboseDestructors" + | "verboseTerm" + | "verboseStatistics" + | "verboseRules" + | "verboseBase" + | "verboseRedundant" + | "verboseCompleted" + | "verboseGoalReachable" + + _decl_pair{name, value}: "set" name "=" value "." + + set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL} + + ignore_types_values: BOOL | "all" | "none" | "attacker" + simplify_process_values: BOOL | "interactive" + precise_actions_values: BOOL | "trueWithoutArgsInNames" + redundant_hyp_elim_values: BOOL | "beginOnly" + reconstruct_trace_values: BOOL | "n" + attacker_values: "active" | "passive" + key_compromise_values: "none" | "approx" | "strict" + predicates_implementable: "check" | "nocheck" + application_values: "instantiate" | "full" | "none" | "discard" + max_values: "none" | "n" + sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset" + redundancy_elim_values: "best" | "simple" | "no" + nounif_ignore_a_few_times_values: "none" | "auto" | "all" + nounif_ignore_ntimes_values: "n" + trace_display_values: "short" | "long" | "none" + verbose_clauses_values: "none" | "explained" | "short" + set_settings_decl: set_settings_boolean_decl + | _decl_pair{"ignoreTypes", ignore_types_values} + | _decl_pair{"simplifyProcess", simplify_process_values} + | _decl_pair{"preciseActions", precise_actions_values} + | _decl_pair{"redundantHypElim", redundant_hyp_elim_values} + | _decl_pair{"reconstructTrace", reconstruct_trace_values} + | _decl_pair{"attacker", attacker_values} + | _decl_pair{"keyCompromise", key_compromise_values} + | _decl_pair{"predicatesImplementable", predicates_implementable} + | _decl_pair{"saturationApplication", application_values} + | _decl_pair{"verificationApplication", application_values} + | _decl_pair{"maxDepth", max_values} + | _decl_pair{"maxHyp", max_values} + | _decl_pair{"selFun", sel_fun_values} + | _decl_pair{"redundancyElim", redundancy_elim_values} + | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values} + | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values} + | _decl_pair{"traceDisplay", trace_display_values} + | _decl_pair{"verboseClauses", verbose_clauses_values} + | set_strategy + | set_symb_order + + _swap_strategy_seq{x}: x ("->" x)* + set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "." + _symb_ord_seq{x}: x (">" x)* + set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "." + + event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "." + query_decl: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "." + + axiom_decl: "axiom" [ typedecl ";"] lemma options{OPTIONS_AXIOM} "." + restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "." + lemma_decl: "lemma" [ typedecl ";"] lemma options{OPTIONS_LEMMA} "." + + noninterf_decl: [ typedecl ";"] _maybe_empty_seq{nidecl} "." + weaksecret_decl: "weaksecret" IDENT "." + not_decl: "not" [ typedecl ";"] gterm "." + + INT: NAT | "-" NAT + select_decl: "select" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." + noselect_decl: "noselect" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." + nounif_decl: "nounif" [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "." + + elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "." + clauses_decl: "clauses" clauses "." + + module_decl: "@module" " " IDENT + + # TODO: finish defining these (comes from Cryptoverif) + #param_decl: "param" _non_empty_seq{IDENT} options "." + #proba_decl: "proba" IDENT ["(...)"] options "." + #letproba_decl: "letproba" IDENT ["(...)"] "= ..." "." + #proof_decl: "proof" "{" proof "}" + #def_decl: "def" IDENT "(" _maybe_empty_seq{typeid} ")" "{" decl* "}" + #expand_decl: "expand" IDENT "(" _maybe_empty_seq{typeid} ")" "." + + nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ] + equality: term "=" term + | "let" IDENT "=" term "in" equality + mayfailequality: IDENT mayfailterm_seq "=" mayfailterm + eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ] + clause: term + | term "->" term + | term "<->" term + | term "<=>" term + clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ] + mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ] + NAT: DIGIT+ + phase: "phase" NAT [";" process] + TAG: IDENT + sync: "sync" NAT ["[" TAG "]"] [";" process] + COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/ + %import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library + %ignore WS // Disregard spaces in text + %ignore COMMENT diff --git a/marzipan/src/parser.py b/marzipan/src/parser.py index 8c21a65..fc3271d 100644 --- a/marzipan/src/parser.py +++ b/marzipan/src/parser.py @@ -1,4 +1,6 @@ +import re import sys +from pathlib import Path from lark import Lark, Token, Transformer, exceptions, tree @@ -73,377 +75,79 @@ ident_regex = ( "/^" + "".join(f"(?!{w}$)" for w in reserved_words) + "[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/" ) -proverif_grammar = Lark( - grammar=""" - PROCESS: "process" - EQUIVALENCE: "equivalence" - start: decl* PROCESS process | decl* EQUIVALENCE process process - YIELD: "yield" - channel: CHANNEL - CHANNEL: "channel" +""" +It might be desirable to move the grammar files around such that we only need +to import _main.lark and import all other .larks files in _main.lark. +This does not work at the moment due to cyclic dependencies between the grammar files, +so we are loading all files as strings and simply concatenating for now... +""" +# gawk_file = "./src/grammars/marzipan_awk.lark" +# gproverif = "./src/grammars/proverif.lark" +# gmain = "./src/grammar/main.lark" +# gfile = gmain +# with open(gfile) as f: +# parser = Lark( +# f +# #grammar=grammar, +# # debug=True, +# # lexer_callbacks={"COMMENT": comments.append}, +# ) +# files = sorted(Path("./src/grammar").glob("*.lark")) +# gfiles = [f for f in files if not f.name.startswith("_")] +# gfiles = ["common.lark", "decl.lark", "process.lark", "query.lark", "term.lark"] +# grammar = "\n".join(parent_dir + f.read_text() for f in gfiles) + +parent_dir = "./src/grammar/" + +common_rules = Path(parent_dir + "common.lark").read_text() +decl_rules = Path(parent_dir + "decl.lark").read_text() +process_rules = Path(parent_dir + "process.lark").read_text() +query_rules = Path(parent_dir + "query.lark").read_text() +lemma_rules = Path(parent_dir + "lemma.lark").read_text() +term_rules = Path(parent_dir + "term.lark").read_text() + +# marzipan additives +common_rules = ( """ - + "IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/" - + """ - ZERO: "0" - INFIX: "||" - | "&&" - | "=" - | "<>" - | "<=" - | ">=" - | "<" - | ">" - typeid: channel - | IDENT - _non_empty_seq{x}: x ("," x)* - _maybe_empty_seq{x}: [ _non_empty_seq{x} ] - - OPTIONS_FUN_CONST: "data" | "private" | "typeConverter" - OPTIONS_FUN: OPTIONS_FUN_CONST - OPTIONS_CONST: OPTIONS_FUN_CONST - OPTIONS_FREE_REDUC: "private" - OPTIONS_PRED: "memberOptim" | "block" - OPTIONS_PROCESS: "precise" - OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif" - OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM - OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction" - OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "maxSubset" - OPTIONS_QUERY: OPTIONS_QUERY_LEMMA | "proveAll" - OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/" - OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml - OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml - OPTIONS_TYPE: "fixed" | "bounded" | "large" | "nonuniform" | "password" | "size" NAT | "size" NAT "_" NAT | "pcoll" NAT | "small" | "ghost" # from Page 22 in CryptoVerif reference manual - options{idents}: [ "[" _non_empty_seq{idents} "]" ] - process: ZERO - | YIELD - | IDENT [ "(" _maybe_empty_seq{pterm} ")" ] - | bracketed_process - | piped_process - | replicated_process - | replicated_process_bounds - | sample_process - | if_process - | in_process - | out_process - | let_process - | insert_process - | get_process - | event_process - | phase - | sync - bracketed_process: "(" process ")" - piped_process: process "|" process - replicated_process: "!" process - replicated_process_bounds: "!" IDENT "<=" IDENT process - | "foreach" IDENT "<=" IDENT "do" process - sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process] - | IDENT "<-R" typeid [";" process] - let_process: "let" pattern "=" pterm ["in" process [ "else" process ]] - | IDENT [":" typeid] "<-" pterm [";" process] - | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ] - if_process: "if" pterm "then" process [ "else" process ] - in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ] - get_process: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ] - out_process: "out" "(" pterm "," pterm ")" [ ";" process ] - insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ] - event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ] - term: IDENT - | NAT - | "(" _maybe_empty_seq{term} ")" - | IDENT "(" _maybe_empty_seq{term} ")" - | term ( "+" | "-" ) NAT - | NAT "+" term - | term INFIX term - | "not" "(" term ")" - - query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query] - | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query] - | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces. - | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query] - lemma: gterm [";" lemma] - | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma] - | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma] - gterm: ident_gterm - | fun_gterm - | choice_gterm - | infix_gterm - | arith_gterm - | arith2_gterm - | event_gterm - | injevent_gterm - | implies_gterm - | paren_gterm - | sample_gterm - | let_gterm - ident_gterm: IDENT - fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT] - choice_gterm: "choice" "[" gterm "," gterm "]" - infix_gterm: gterm INFIX gterm - arith_gterm: gterm ( "+" | "-" ) NAT - arith2_gterm: NAT "+" gterm - event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] - injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] - implies_gterm: gterm "==>" gterm - paren_gterm: "(" _maybe_empty_seq{gterm} ")" - sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ] - let_gterm: "let" IDENT "=" gterm "in" gterm - - gbinding: "!" NAT "=" gterm [";" gbinding] - | IDENT "=" gterm [";" gbinding] - - nounifdecl: "let" IDENT "=" gformat "in" nounifdecl - | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]] - gformat: IDENT - | "*" IDENT - | IDENT "(" _maybe_empty_seq{gformat} ")" - | "choice" "[" gformat "," gformat "]" - | "not" "(" _maybe_empty_seq{gformat} ")" - | "new" IDENT [ "[" [ fbinding ] "]" ] - | "let" IDENT "=" gformat "in" gformat - fbinding: "!" NAT "=" gformat [";" fbinding] - | IDENT "=" gformat [";" fbinding] - nounifoption: "hypothesis" - | "conclusion" - | "ignoreAFewTimes" - | "inductionOn" "=" IDENT - | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}" - - pterm: IDENT - | NAT - | "(" _maybe_empty_seq{pterm} ")" - | IDENT "(" _maybe_empty_seq{pterm} ")" - | choice_pterm - | pterm ("+" | "-") NAT - | NAT "+" pterm - | pterm INFIX pterm - | not_pterm - | sample_pterm - | if_pterm - | let_pterm - | insert_pterm - | get_pterm - | event_pterm - choice_pterm: "choice[" pterm "," pterm "]" - if_pterm: "if" pterm "then" pterm [ "else" pterm ] - not_pterm: "not" "(" pterm ")" - let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ] - | IDENT [":" typeid] "<-" pterm ";" pterm - | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ] - sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm] - | IDENT "<-R" typeid [";" pterm] - insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm - event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm - get_pterm: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ] - pattern: IDENT [":" typeid] - | "_" [ ":" typeid ] - | NAT - | pattern "+" NAT - | NAT "+" pattern - | "(" _maybe_empty_seq{pattern} ")" - | IDENT "(" _maybe_empty_seq{pattern} ")" - | "=" pterm - mayfailterm: term - | "fail" - mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")" - typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ] - failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ] - - decl: type_decl - | channel_decl - | free_decl - | const_decl - | fun_decl - | letfun_decl - | reduc_decl - | fun_reduc_decl - | equation_decl - | pred_decl - | table_decl - | let_decl - | set_settings_decl - | event_decl - | query_decl - | axiom_decl - | restriction_decl - | lemma_decl - | noninterf_decl - | weaksecret_decl - | not_decl - | select_decl - | noselect_decl - | nounif_decl - | elimtrue_decl - | clauses_decl - | module_decl - #| param_decl - #| proba_decl - #| letproba_decl - #| proof_decl - #| def_decl - #| expand_decl - - type_decl: "type" IDENT options{OPTIONS_TYPE} "." - channel_decl: "channel" _non_empty_seq{IDENT} "." - free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "." - const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "." - fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "." - letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "." - reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "." - fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "." - equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "." - pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "." - table_decl: "table" IDENT "(" _maybe_empty_seq{typeid} ")" "." - let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "." - - BOOL : "true" | "false" - NONE: "none" - FULL: "full" - ALL: "all" - FUNC: IDENT - ignoretype_options: BOOL | ALL | NONE | "attacker" - boolean_settings_names: "privateCommOnPublicTerms" - | "rejectChoiceTrueFalse" - | "rejectNoSimplif" - | "allowDiffPatterns" - | "inductionQueries" - | "inductionLemmas" - | "movenew" - | "movelet" - | "stopTerm" - | "removeEventsForLemma" - | "simpEqAll" - | "eqInNames" - | "preciseLetExpand" - | "expandSimplifyIfCst" - | "featureFuns" - | "featureNames" - | "featurePredicates" - | "featureEvents" - | "featureTables" - | "featureDepth" - | "featureWidth" - | "simplifyDerivation" - | "abbreviateDerivation" - | "explainDerivation" - | "unifyDerivation" - | "reconstructDerivation" - | "displayDerivation" - | "traceBacktracking" - | "interactiveSwapping" - | "color" - | "verboseLemmas" - | "abbreviateClauses" - | "removeUselessClausesBeforeDisplay" - | "verboseEq" - | "verboseDestructors" - | "verboseTerm" - | "verboseStatistics" - | "verboseRules" - | "verboseBase" - | "verboseRedundant" - | "verboseCompleted" - | "verboseGoalReachable" - - _decl_pair{name, value}: "set" name "=" value "." - - set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL} - - ignore_types_values: BOOL | "all" | "none" | "attacker" - simplify_process_values: BOOL | "interactive" - precise_actions_values: BOOL | "trueWithoutArgsInNames" - redundant_hyp_elim_values: BOOL | "beginOnly" - reconstruct_trace_values: BOOL | NAT | "yes" | "no" # TODO(blipp): check whether yes/no is always allowed for BOOL - attacker_values: "active" | "passive" - key_compromise_values: "none" | "approx" | "strict" - predicates_implementable: "check" | "nocheck" - application_values: "instantiate" | "full" | "none" | "discard" - max_values: "none" | "n" - sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset" - redundancy_elim_values: "best" | "simple" | "no" - nounif_ignore_a_few_times_values: "none" | "auto" | "all" - nounif_ignore_ntimes_values: "n" - trace_display_values: "short" | "long" | "none" - verbose_clauses_values: "none" | "explained" | "short" - set_settings_decl: set_settings_boolean_decl - | _decl_pair{"ignoreTypes", ignore_types_values} - | _decl_pair{"simplifyProcess", simplify_process_values} - | _decl_pair{"preciseActions", precise_actions_values} - | _decl_pair{"redundantHypElim", redundant_hyp_elim_values} - | _decl_pair{"reconstructTrace", reconstruct_trace_values} - | _decl_pair{"attacker", attacker_values} - | _decl_pair{"keyCompromise", key_compromise_values} - | _decl_pair{"predicatesImplementable", predicates_implementable} - | _decl_pair{"saturationApplication", application_values} - | _decl_pair{"verificationApplication", application_values} - | _decl_pair{"maxDepth", max_values} - | _decl_pair{"maxHyp", max_values} - | _decl_pair{"selFun", sel_fun_values} - | _decl_pair{"redundancyElim", redundancy_elim_values} - | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values} - | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values} - | _decl_pair{"traceDisplay", trace_display_values} - | _decl_pair{"verboseClauses", verbose_clauses_values} - | set_strategy - | set_symb_order - - _swap_strategy_seq{x}: x ("->" x)* - set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "." - _symb_ord_seq{x}: x (">" x)* - set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "." - - event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "." - query_decl: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "." - - axiom_decl: "axiom" [ typedecl ";"] lemma options{OPTIONS_AXIOM} "." - restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "." - lemma_decl: "lemma" [ typedecl ";"] lemma options{OPTIONS_LEMMA} "." - - noninterf_decl: "noninterf" [ typedecl ";"] _maybe_empty_seq{nidecl} "." - weaksecret_decl: "weaksecret" IDENT "." - not_decl: "not" [ typedecl ";"] gterm "." - - INT: NAT | "-" NAT - select_decl: "select" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." - noselect_decl: "noselect" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." - nounif_decl: "nounif" [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "." - - elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "." - clauses_decl: "clauses" clauses "." - - module_decl: "@module" " " IDENT - - # TODO: finish defining these (comes from Cryptoverif) - #param_decl: "param" _non_empty_seq{IDENT} options "." - #proba_decl: "proba" IDENT ["(...)"] options "." - #letproba_decl: "letproba" IDENT ["(...)"] "= ..." "." - #proof_decl: "proof" "{" proof "}" - #def_decl: "def" IDENT "(" _maybe_empty_seq{typeid} ")" "{" decl* "}" - #expand_decl: "expand" IDENT "(" _maybe_empty_seq{typeid} ")" "." - - nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ] - equality: term "=" term - | "let" IDENT "=" term "in" equality - mayfailequality: IDENT mayfailterm_seq "=" mayfailterm - eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ] - clause: term - | term "->" term - | term "<->" term - | term "<=>" term - clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ] - mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ] - NAT: DIGIT+ - phase: "phase" NAT [";" process] - TAG: IDENT - sync: "sync" NAT ["[" TAG "]"] [";" process] - COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/ - %import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library - %ignore WS // Disregard spaces in text - %ignore COMMENT -""", - debug=True, - # lexer_callbacks={"COMMENT": comments.append}, +QUERY: "@query" +REACHABLE: "@reachable" +LEMMA: "@lemma" +""" + + common_rules ) + +# add @query and @reachable to query_decl, @lemma to lemma_decl +def modify_decl_rule(rules: str, decl_rule: str, new_rule: str) -> str: + old_decl_renamed = f"{decl_rule}_core" + rename_target = f"{decl_rule}\s*:" + # rename *_decl -> *_decl_core + rules, count = re.subn(rename_target, f"{old_decl_renamed}:", rules, count=1) + if count == 0: + raise RuntimeError("*_decl not found!") + wrapper = f"{decl_rule}: {new_rule} {old_decl_renamed}" + old_decl_target = f"{old_decl_renamed}\s*:" + # get index of *_decl_core rule + match = re.search(old_decl_target, rules, flags=re.MULTILINE) + if not match: + raise RuntimeError("*_decl_core: rule not found after rename") + insert_pos = match.start() + rules = rules[:insert_pos] + wrapper + "\n" + rules[insert_pos:] + return rules + + +query_rules = modify_decl_rule( + query_rules, "query_decl", "[(REACHABLE|QUERY) ESCAPED_STRING]" +) + +lemma_rules = modify_decl_rule(lemma_rules, "lemma_decl", "[LEMMA ESCAPED_STRING]") + +grammar = ( + common_rules + decl_rules + process_rules + query_rules + lemma_rules + term_rules +) + +parser = Lark(grammar) + # COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/ # COMMENT: "(*" /(\*(?!\))|[^*])*/ "*)" # comment: /\(\*(?:(?!\(\*|\*\)).|(?R))*\*\)/ @@ -456,7 +160,7 @@ proverif_grammar = Lark( def parsertest(input): - parsetree = proverif_grammar.parse(input) + parsetree = parser.parse(input) # tree.pydot__tree_to_png(parsetree, name + ".png") return parsetree diff --git a/marzipan/src/splitter.py b/marzipan/src/splitter.py new file mode 100644 index 0000000..7d37fe6 --- /dev/null +++ b/marzipan/src/splitter.py @@ -0,0 +1,168 @@ +import os +from pathlib import Path + +""" +ChatGPT generated code for initial proverif grammar split +""" + +# ---------- CONFIG ---------- +OUT = Path("grammar") +OUT.mkdir(exist_ok=True) + +# Raw grammar text (paste full grammar here) +GRAMMAR = read("grammars/proverif.lark") + +# Rules grouped by prefix – expandable later if needed +GROUPS = { + "common.lark": [ + "PROCESS", + "YIELD", + "CHANNEL", + "IDENT", + "ZERO", + "INFIX", + "typeid", + "_non_empty_seq", + "_maybe_empty_seq", + "OPTIONS_", # all options-related rules + "BOOL", + "NONE", + "FULL", + "ALL", + "FUNC", + "ignoretype_options", + "boolean_settings_names", + "INT", + "COMMENT", + ], + "process.lark": [ + "start", + "process", + "bracketed_process", + "piped_process", + "replicated_process", + "replicated_process_bounds", + "sample_process", + "let_process", + "if_process", + "in_process", + "out_process", + "insert_process", + "event_process", + "term", + "phase", + "sync", + ], + "term.lark": [ + "gterm", + "ident_gterm", + "fun_gterm", + "choice_gterm", + "infix_gterm", + "arith_gterm", + "arith2_gterm", + "event_gterm", + "injevent_gterm", + "implies_gterm", + "paren_gterm", + "sample_gterm", + "let_gterm", + "gbinding", + "pterm", + "choice_pterm", + "if_pterm", + "not_pterm", + "let_pterm", + "sample_pterm", + "insert_pterm", + "event_pterm", + "get_pterm", + "pattern", + "mayfailterm", + "mayfailterm_seq", + "typedecl", + "failtypedecl", + ], + "decl.lark": [ + "decl", + "type_decl", + "channel_decl", + "free_decl", + "const_decl", + "fun_decl", + "letfun_decl", + "reduc_decl", + "fun_reduc_decl", + "equation_decl", + "pred_decl", + "table_decl", + "let_decl", + "set_settings_decl", + "event_decl", + "select_decl", + "noselect_decl", + "nounif_decl", + "elimtrue_decl", + "clauses_decl", + "module_decl", + "nidecl", + "equality", + "mayfailequality", + "eqlist", + "clause", + "clauses", + "mayfailreduc", + ], + "query.lark": [ + "query", + "lemma", + "nounifdecl", + "gformat", + "fbinding", + "nounifoption", + "TAG", + ], +} + +# ---------- SPLITTING LOGIC ---------- +rules = GRAMMAR.strip().splitlines() +buckets = {k: [] for k in GROUPS} + + +def match(rule, prefixes): + return any(rule.startswith(p) for p in prefixes) + + +current_target = None + +for line in rules: + striped = line.strip() + if ":" in striped and not striped.startswith("%"): + rule_name = striped.split(":")[0].strip() + for module, prefixes in GROUPS.items(): + if match(rule_name, prefixes): + current_target = module + break + if current_target: + buckets[current_target].append(line) + +# main.lark will import everything else +main_lark = """start: decl* PROCESS process +%import .common.* +%import .process.* +%import .term.* +%import .decl.* +%import .query.* +%ignore WS +%ignore COMMENT +""" + +# ---------- OUTPUT ---------- +for filename, lines in buckets.items(): + path = OUT / filename + with open(path, "w") as f: + f.write("\n".join(lines) + "\n") + +(OUT / "main.lark").write_text(main_lark) + +print("Grammar split completed into:", OUT)