Compare commits

...

2 Commits

Author SHA1 Message Date
Benjamin Lipp
f20fd1acc3 feat: module declaration, WIP: missing ProVerif option terms
Co-authored-by: Anja Rabich <a.rabich@uni-luebeck.de>
2025-10-14 18:01:13 +02:00
Benjamin Lipp
3ce0d262d9 feat: add parser for multi-line comments, without nesting
Co-authored-by: Anja Rabich <a.rabich@uni-luebeck.de>
2025-10-14 17:31:31 +02:00
3 changed files with 77 additions and 39 deletions

View File

@@ -33,11 +33,12 @@
* ~~exception handling in analyze() and in run_proverif()~~
* ~~refactor filtering in run_proverif (see karo's comment)~~
* ~configurable target directory~
* ~lark parser: multiline comments, how???~
## Next Steps
* integrate marzipan.awk into Python, somehow
* lark parser: multiline comments, how???
* options term special cases (c.f. manual page 133, starting with "fun" term)
* rewrite marzipan.awk into Python/LARK
* define a LARK grammar for marzipan.awk rules
* write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv)
@@ -54,6 +55,6 @@
* and rewrite the AST within Python
* reconstruct ProVerif input file for ProVerif
* rewrite our CPP usages into Python/…?
* low priority: nested comments in ProVerif code
“it replaces the Bash script and is idiomatic Python code”

View File

@@ -1,5 +1,7 @@
from .util import pkgs, setup_exports, export, rename
#from rich.console import Console
from .parser import *
# from rich.console import Console
import click
target_subdir = "target/proverif"
@@ -52,7 +54,13 @@ def run_proverif(file, extra_args=[]):
params = ["proverif", "-test", *extra_args, file]
logger.debug(params)
process = exc_piped(params, stderr=pkgs.subprocess.PIPE, stdout=pkgs.subprocess.PIPE, text=True, bufsize=1)
process = exc_piped(
params,
stderr=pkgs.subprocess.PIPE,
stdout=pkgs.subprocess.PIPE,
text=True,
bufsize=1,
)
try:
prev_line = None
for line in process.stdout:
@@ -72,7 +80,9 @@ def run_proverif(file, extra_args=[]):
return_code = process.wait()
if return_code != 0:
logger.error(f"Proverif exited with a non-zero error code {params}: {return_code}")
logger.error(
f"Proverif exited with a non-zero error code {params}: {return_code}"
)
exit(return_code)
@@ -88,8 +98,13 @@ def cpp(file, cpp_prep):
def awk(repo_path, cpp_prep, awk_prep):
params = ["awk", "-f", str(pkgs.os.path.join(repo_path, "marzipan/marzipan.awk")), cpp_prep]
with open(awk_prep, 'w') as file:
params = [
"awk",
"-f",
str(pkgs.os.path.join(repo_path, "marzipan/marzipan.awk")),
cpp_prep,
]
with open(awk_prep, "w") as file:
exc(params, stderr=pkgs.sys.stderr, stdout=file)
file.write("\nprocess main")
@@ -104,11 +119,11 @@ def pretty_output_init(file_path):
expected = []
descs = []
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
content = file.read()
# Process lemmas first
result = pkgs.re.findall(r'@(lemma)(?=\s+\"([^\"]*)\")', content)
result = pkgs.re.findall(r"@(lemma)(?=\s+\"([^\"]*)\")", content)
if result:
# The regex only returns lemmas. For lemmas, we always expect the result 'true' from ProVerif.
expected.extend([True for _ in range(len(result))])
@@ -118,8 +133,10 @@ def pretty_output_init(file_path):
result = pkgs.re.findall(r'@(query|reachable)(?=\s+"[^\"]*")', content)
if result:
# For queries, we expect 'true' from ProVerif, for reachable, we expect 'false'.
expected.extend([e == '@query' for e in result])
reachable_result = pkgs.re.findall(r'@(query|reachable)\s+"([^\"]*)"', content)
expected.extend([e == "@query" for e in result])
reachable_result = pkgs.re.findall(
r'@(query|reachable)\s+"([^\"]*)"', content
)
descs.extend([e[1] for e in reachable_result])
ta = pkgs.time.time()
@@ -133,9 +150,9 @@ def pretty_output_step(file_path, line, expected, descs, res, ctr, ta):
# Output from ProVerif contains a trailing newline, which we do not have in the expected output. Remove it for meaningful matching.
outp_clean_raw = line.rstrip()
if outp_clean_raw == 'true':
if outp_clean_raw == "true":
outp_clean = True
elif outp_clean_raw == 'false':
elif outp_clean_raw == "false":
outp_clean = False
else:
outp_clean = outp_clean_raw
@@ -155,7 +172,9 @@ def pretty_output_step(file_path, line, expected, descs, res, ctr, ta):
def pretty_output(file_path):
(ta, res, ctr, expected, descs) = pretty_output_init(file_path)
for line in pkgs.sys.stdin:
(res, ctr, ta) = pretty_output_step(file_path, line, expected, descs, res, ctr, ta)
(res, ctr, ta) = pretty_output_step(
file_path, line, expected, descs, res, ctr, ta
)
def get_target_dir(path, output):
@@ -166,18 +185,21 @@ def get_target_dir(path, output):
@main.command()
@click.option('--output', 'output', required=False)
@click.option("--output", "output", required=False)
@click.argument("repo_path")
def analyze(repo_path, output):
target_dir = get_target_dir(repo_path, output)
pkgs.os.makedirs(target_dir, exist_ok=True)
entries = []
analysis_dir = pkgs.os.path.join(repo_path, 'analysis')
entries.extend(sorted(pkgs.glob.glob(str(analysis_dir) + '/*.entry.mpv')))
analysis_dir = pkgs.os.path.join(repo_path, "analysis")
entries.extend(sorted(pkgs.glob.glob(str(analysis_dir) + "/*.entry.mpv")))
with pkgs.concurrent.futures.ProcessPoolExecutor() as executor:
futures = {executor.submit(metaverif, repo_path, target_dir, entry): entry for entry in entries}
futures = {
executor.submit(metaverif, repo_path, target_dir, entry): entry
for entry in entries
}
for future in pkgs.concurrent.futures.as_completed(futures):
cmd = futures[future]
logger.info(f"Metaverif {cmd} finished.")
@@ -186,7 +208,7 @@ def analyze(repo_path, output):
@main.command()
@click.option('--output', 'output', required=False)
@click.option("--output", "output", required=False)
@click.argument("repo_path")
def clean(repo_path, output):
cleans_failed = 0
@@ -194,7 +216,9 @@ def clean(repo_path, output):
if pkgs.os.path.isdir(target_dir):
for filename in pkgs.os.listdir(target_dir):
file_path = pkgs.os.path.join(target_dir, filename)
if pkgs.os.path.isfile(file_path) and pkgs.os.path.splitext(file_path)[1] in [".pv", ".log"]:
if pkgs.os.path.isfile(file_path) and pkgs.os.path.splitext(file_path)[
1
] in [".pv", ".log"]:
try:
pkgs.os.remove(file_path)
except Exception as e:
@@ -206,11 +230,10 @@ def clean(repo_path, output):
exit(1)
def metaverif(repo_path, tmpdir, file):
print(f"Start metaverif on {file}")
# Extract the name using regex
name_match = pkgs.re.search(r'([^/]*)(?=\.mpv)', file)
name_match = pkgs.re.search(r"([^/]*)(?=\.mpv)", file)
if name_match:
name = name_match.group(0) # Get the matched name
@@ -229,20 +252,30 @@ def metaverif(repo_path, tmpdir, file):
log_file = pkgs.os.path.join(tmpdir, f"{name}.log")
ta, res, ctr, expected, descs = pretty_output_init(cpp_prep)
with open(log_file, 'a') as log:
with open(log_file, "a") as log:
generator = run_proverif(awk_prep)
for line in generator:
log.write(line)
# parse-result-line:
match = pkgs.re.search(r'^RESULT .* \b(true|false)\b\.$', line)
match = pkgs.re.search(r"^RESULT .* \b(true|false)\b\.$", line)
if match:
result = match.group(1)
# pretty-output:
res, ctr, ta = pretty_output_step(cpp_prep, result, expected, descs, res, ctr, ta)
res, ctr, ta = pretty_output_step(
cpp_prep, result, expected, descs, res, ctr, ta
)
else:
logger.error(f"No match found for the filename {file}: extension should be .mpv")
logger.error(
f"No match found for the filename {file}: extension should be .mpv"
)
exit(1)
@main.command()
@click.argument("file_path")
def parse(file_path):
parse_main(file_path)
if __name__ == "__main__":
main()

View File

@@ -33,7 +33,13 @@ proverif_grammar = Lark(
_non_empty_seq{x}: x ("," x)*
_maybe_empty_seq{x}: [ _non_empty_seq{x} ]
IDENT_FUN_CONST: "data" | "private" | "typeConverter"
IDENT_FREE_REDUC: "private"
IDENT_PRED: "memberOptim" | "block"
IDENT_PROCESS: "precise"
IDENT_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif"
options: [ "[" _non_empty_seq{IDENT} "]" ]
options{idents}: [ "[" _non_empty_seq{idents} "]" ]
process: ZERO
| YIELD
| IDENT [ "(" _maybe_empty_seq{pterm} ")" ]
@@ -193,6 +199,7 @@ proverif_grammar = Lark(
| nounif_decl
| elimtrue_decl
| clauses_decl
| module_decl
#| param_decl
#| proba_decl
#| letproba_decl
@@ -204,7 +211,7 @@ proverif_grammar = Lark(
channel_decl: "channel" _non_empty_seq{IDENT} "."
free_decl: "free" _non_empty_seq{IDENT} ":" typeid options "."
const_decl: "const" _non_empty_seq{IDENT} ":" typeid options "."
fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options "."
fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{IDENT_FUN_CONST} "."
letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
reduc_decl: "reduc" eqlist options "."
fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options "."
@@ -328,6 +335,8 @@ proverif_grammar = Lark(
elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "."
clauses_decl: "clauses" clauses "."
module_decl: "@module" " " IDENT
# TODO: finish defining these (comes from Cryptoverif)
#param_decl: "param" _non_empty_seq{IDENT} options "."
#proba_decl: "proba" IDENT ["(...)"] options "."
@@ -351,7 +360,7 @@ proverif_grammar = Lark(
phase: "phase" NAT [";" process]
TAG: IDENT
sync: "sync" NAT ["[" TAG "]"] [";" process]
COMMENT: "(*" /.*/
COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/
%import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library
%ignore WS // Disregard spaces in text
%ignore COMMENT
@@ -360,6 +369,10 @@ proverif_grammar = Lark(
# lexer_callbacks={"COMMENT": comments.append},
)
# COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/
# COMMENT: "(*" /(\*(?!\))|[^*])*/ "*)"
# comment: /\(\*(?:(?!\(\*|\*\)).|(?R))*\*\)/
# TODO Open ProVerif compatibility questions
# TODO * does it allow leading zeros for NAT?
# TODO * tag is not defined? is it ident?
@@ -373,17 +386,8 @@ def parsertest(input):
return parsetree
def main():
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <filename>")
sys.exit(1)
filename = sys.argv[1]
with open(filename, "r") as f:
def parse_main(file_path):
with open(file_path, "r") as f:
content = f.read()
# print(content)
parsertest(content)
if __name__ == "__main__":
main()