feat: module declaration, WIP: missing ProVerif option terms

Co-authored-by: Anja Rabich <a.rabich@uni-luebeck.de>
feat: add parser for multi-line comments, without nesting
2025-12-12 07:40:30 -08:00 · 2025-10-14 18:01:13 +02:00 · 2025-10-14 17:31:31 +02:00
3 changed files with 77 additions and 39 deletions
--- a/marzipan/TODO.md
+++ b/marzipan/TODO.md
@@ -33,11 +33,12 @@
 * ~~exception handling in analyze() and in run_proverif()~~
 * ~~refactor filtering in run_proverif (see karo's comment)~~
 * ~configurable target directory~
+* ~lark parser: multiline comments, how???~

 ## Next Steps

 * integrate marzipan.awk into Python, somehow
-  * lark parser: multiline comments, how???
+  * options term special cases (c.f. manual page 133, starting with "fun" term)
 * rewrite marzipan.awk into Python/LARK
  * define a LARK grammar for marzipan.awk rules
  * write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv)
@@ -54,6 +55,6 @@
    * and rewrite the AST within Python
    * reconstruct ProVerif input file for ProVerif
  * rewrite our CPP usages into Python/…?
-
+* low priority: nested comments in ProVerif code

 “it replaces the Bash script and is idiomatic Python code”
--- a/marzipan/src/init.py
+++ b/marzipan/src/init.py
@@ -1,5 +1,7 @@
 from .util import pkgs, setup_exports, export, rename
-#from rich.console import Console
+from .parser import *
+
+# from rich.console import Console
 import click

 target_subdir = "target/proverif"
@@ -52,7 +54,13 @@ def run_proverif(file, extra_args=[]):
    params = ["proverif", "-test", *extra_args, file]
    logger.debug(params)

-    process = exc_piped(params, stderr=pkgs.subprocess.PIPE, stdout=pkgs.subprocess.PIPE, text=True, bufsize=1)
+    process = exc_piped(
+        params,
+        stderr=pkgs.subprocess.PIPE,
+        stdout=pkgs.subprocess.PIPE,
+        text=True,
+        bufsize=1,
+    )
    try:
        prev_line = None
        for line in process.stdout:
@@ -72,7 +80,9 @@ def run_proverif(file, extra_args=[]):
        return_code = process.wait()

        if return_code != 0:
-            logger.error(f"Proverif exited with a non-zero error code {params}: {return_code}")
+            logger.error(
+                f"Proverif exited with a non-zero error code {params}: {return_code}"
+            )
            exit(return_code)


@@ -88,8 +98,13 @@ def cpp(file, cpp_prep):


 def awk(repo_path, cpp_prep, awk_prep):
-    params = ["awk", "-f", str(pkgs.os.path.join(repo_path, "marzipan/marzipan.awk")), cpp_prep]
-    with open(awk_prep, 'w') as file:
+    params = [
+        "awk",
+        "-f",
+        str(pkgs.os.path.join(repo_path, "marzipan/marzipan.awk")),
+        cpp_prep,
+    ]
+    with open(awk_prep, "w") as file:
        exc(params, stderr=pkgs.sys.stderr, stdout=file)
        file.write("\nprocess main")

@@ -104,11 +119,11 @@ def pretty_output_init(file_path):
    expected = []
    descs = []

-    with open(file_path, 'r') as file:
+    with open(file_path, "r") as file:
        content = file.read()

        # Process lemmas first
-        result = pkgs.re.findall(r'@(lemma)(?=\s+\"([^\"]*)\")', content)
+        result = pkgs.re.findall(r"@(lemma)(?=\s+\"([^\"]*)\")", content)
        if result:
            # The regex only returns lemmas. For lemmas, we always expect the result 'true' from ProVerif.
            expected.extend([True for _ in range(len(result))])
@@ -118,8 +133,10 @@ def pretty_output_init(file_path):
        result = pkgs.re.findall(r'@(query|reachable)(?=\s+"[^\"]*")', content)
        if result:
            # For queries, we expect 'true' from ProVerif, for reachable, we expect 'false'.
-            expected.extend([e == '@query' for e in result])
-            reachable_result = pkgs.re.findall(r'@(query|reachable)\s+"([^\"]*)"', content)
+            expected.extend([e == "@query" for e in result])
+            reachable_result = pkgs.re.findall(
+                r'@(query|reachable)\s+"([^\"]*)"', content
+            )
            descs.extend([e[1] for e in reachable_result])

    ta = pkgs.time.time()
@@ -133,9 +150,9 @@ def pretty_output_step(file_path, line, expected, descs, res, ctr, ta):

    # Output from ProVerif contains a trailing newline, which we do not have in the expected output. Remove it for meaningful matching.
    outp_clean_raw = line.rstrip()
-    if outp_clean_raw == 'true':
+    if outp_clean_raw == "true":
        outp_clean = True
-    elif outp_clean_raw == 'false':
+    elif outp_clean_raw == "false":
        outp_clean = False
    else:
        outp_clean = outp_clean_raw
@@ -155,7 +172,9 @@ def pretty_output_step(file_path, line, expected, descs, res, ctr, ta):
 def pretty_output(file_path):
    (ta, res, ctr, expected, descs) = pretty_output_init(file_path)
    for line in pkgs.sys.stdin:
-        (res, ctr, ta) = pretty_output_step(file_path, line, expected, descs, res, ctr, ta)
+        (res, ctr, ta) = pretty_output_step(
+            file_path, line, expected, descs, res, ctr, ta
+        )


 def get_target_dir(path, output):
@@ -166,18 +185,21 @@ def get_target_dir(path, output):


@main.command()
-@click.option('--output', 'output', required=False)
+@click.option("--output", "output", required=False)
@click.argument("repo_path")
 def analyze(repo_path, output):
    target_dir = get_target_dir(repo_path, output)
    pkgs.os.makedirs(target_dir, exist_ok=True)

    entries = []
-    analysis_dir = pkgs.os.path.join(repo_path, 'analysis')
-    entries.extend(sorted(pkgs.glob.glob(str(analysis_dir) + '/*.entry.mpv')))
+    analysis_dir = pkgs.os.path.join(repo_path, "analysis")
+    entries.extend(sorted(pkgs.glob.glob(str(analysis_dir) + "/*.entry.mpv")))

    with pkgs.concurrent.futures.ProcessPoolExecutor() as executor:
-        futures = {executor.submit(metaverif, repo_path, target_dir, entry): entry for entry in entries}
+        futures = {
+            executor.submit(metaverif, repo_path, target_dir, entry): entry
+            for entry in entries
+        }
        for future in pkgs.concurrent.futures.as_completed(futures):
            cmd = futures[future]
            logger.info(f"Metaverif {cmd} finished.")
@@ -186,7 +208,7 @@ def analyze(repo_path, output):


@main.command()
-@click.option('--output', 'output', required=False)
+@click.option("--output", "output", required=False)
@click.argument("repo_path")
 def clean(repo_path, output):
    cleans_failed = 0
@@ -194,7 +216,9 @@ def clean(repo_path, output):
    if pkgs.os.path.isdir(target_dir):
        for filename in pkgs.os.listdir(target_dir):
            file_path = pkgs.os.path.join(target_dir, filename)
-            if pkgs.os.path.isfile(file_path) and pkgs.os.path.splitext(file_path)[1] in [".pv", ".log"]:
+            if pkgs.os.path.isfile(file_path) and pkgs.os.path.splitext(file_path)[
+                1
+            ] in [".pv", ".log"]:
                try:
                    pkgs.os.remove(file_path)
                except Exception as e:
@@ -206,11 +230,10 @@ def clean(repo_path, output):
        exit(1)


-
 def metaverif(repo_path, tmpdir, file):
    print(f"Start metaverif on {file}")
    # Extract the name using regex
-    name_match = pkgs.re.search(r'([^/]*)(?=\.mpv)', file)
+    name_match = pkgs.re.search(r"([^/]*)(?=\.mpv)", file)
    if name_match:
        name = name_match.group(0)  # Get the matched name

@@ -229,20 +252,30 @@ def metaverif(repo_path, tmpdir, file):
        log_file = pkgs.os.path.join(tmpdir, f"{name}.log")

        ta, res, ctr, expected, descs = pretty_output_init(cpp_prep)
-        with open(log_file, 'a') as log:
+        with open(log_file, "a") as log:
            generator = run_proverif(awk_prep)
            for line in generator:
                log.write(line)
                # parse-result-line:
-                match = pkgs.re.search(r'^RESULT .* \b(true|false)\b\.$', line)
+                match = pkgs.re.search(r"^RESULT .* \b(true|false)\b\.$", line)
                if match:
                    result = match.group(1)
                    # pretty-output:
-                    res, ctr, ta = pretty_output_step(cpp_prep, result, expected, descs, res, ctr, ta)
+                    res, ctr, ta = pretty_output_step(
+                        cpp_prep, result, expected, descs, res, ctr, ta
+                    )
    else:
-        logger.error(f"No match found for the filename {file}: extension should be .mpv")
+        logger.error(
+            f"No match found for the filename {file}: extension should be .mpv"
+        )
        exit(1)


+@main.command()
+@click.argument("file_path")
+def parse(file_path):
+    parse_main(file_path)
+
+
 if __name__ == "__main__":
    main()
--- a/marzipan/src/parser.py
+++ b/marzipan/src/parser.py
@@ -33,7 +33,13 @@ proverif_grammar = Lark(
    _non_empty_seq{x}: x ("," x)*
    _maybe_empty_seq{x}: [ _non_empty_seq{x} ]

+    IDENT_FUN_CONST: "data" | "private" | "typeConverter"
+    IDENT_FREE_REDUC: "private"
+    IDENT_PRED: "memberOptim" | "block"
+    IDENT_PROCESS: "precise"
+    IDENT_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif"
    options: [ "[" _non_empty_seq{IDENT} "]" ]
+    options{idents}: [ "[" _non_empty_seq{idents} "]" ]
    process: ZERO
           | YIELD
           | IDENT [ "(" _maybe_empty_seq{pterm} ")" ]
@@ -193,6 +199,7 @@ proverif_grammar = Lark(
        | nounif_decl
        | elimtrue_decl
        | clauses_decl
+        | module_decl
        #| param_decl
        #| proba_decl
        #| letproba_decl
@@ -204,7 +211,7 @@ proverif_grammar = Lark(
    channel_decl: "channel" _non_empty_seq{IDENT} "."
    free_decl: "free" _non_empty_seq{IDENT} ":" typeid options "."
    const_decl: "const" _non_empty_seq{IDENT} ":" typeid options "."
-    fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options "."
+    fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{IDENT_FUN_CONST} "."
    letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
    reduc_decl: "reduc" eqlist options "."
    fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options "."
@@ -328,6 +335,8 @@ proverif_grammar = Lark(
    elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "."
    clauses_decl: "clauses" clauses "."

+    module_decl: "@module" " " IDENT
+
    # TODO: finish defining these (comes from Cryptoverif)
    #param_decl: "param" _non_empty_seq{IDENT} options "."
    #proba_decl: "proba" IDENT ["(...)"] options "."
@@ -351,7 +360,7 @@ proverif_grammar = Lark(
    phase: "phase" NAT [";" process]
    TAG: IDENT
    sync: "sync" NAT ["[" TAG "]"] [";" process]
-    COMMENT:  "(*" /.*/
+    COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
    %import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library
    %ignore WS // Disregard spaces in text
    %ignore COMMENT
@@ -360,6 +369,10 @@ proverif_grammar = Lark(
    # lexer_callbacks={"COMMENT": comments.append},
 )

+# COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
+# COMMENT:  "(*" /(\*(?!\))|[^*])*/  "*)"
+# comment: /\(\*(?:(?!\(\*|\*\)).|(?R))*\*\)/
+
 # TODO Open ProVerif compatibility questions
 # TODO * does it allow leading zeros for NAT?
 # TODO * tag is not defined? is it ident?
@@ -373,17 +386,8 @@ def parsertest(input):
    return parsetree


-def main():
-    if len(sys.argv) != 2:
-        print(f"Usage: {sys.argv[0]} <filename>")
-        sys.exit(1)
-    filename = sys.argv[1]
-
-    with open(filename, "r") as f:
+def parse_main(file_path):
+    with open(file_path, "r") as f:
        content = f.read()
        # print(content)
        parsertest(content)
-
-
-if __name__ == "__main__":
-    main()
Author	SHA1	Message	Date
Benjamin Lipp	f20fd1acc3	feat: module declaration, WIP: missing ProVerif option terms Co-authored-by: Anja Rabich <a.rabich@uni-luebeck.de>	2025-10-14 18:01:13 +02:00
Benjamin Lipp	3ce0d262d9	feat: add parser for multi-line comments, without nesting Co-authored-by: Anja Rabich <a.rabich@uni-luebeck.de>	2025-10-14 17:31:31 +02:00