From 56c590438c71c12413e42e446e818cdf63461e32 Mon Sep 17 00:00:00 2001 From: Andreas Rossberg Date: Thu, 4 Jun 2026 11:58:19 +0200 Subject: [PATCH] [spectec] Equivalence tracking in IL validator --- specification/wasm-1.0/A-binary.spectec | 2 +- specification/wasm-2.0/A-binary.spectec | 2 +- .../wasm-3.0/5.1-binary.values.spectec | 1 + .../wasm-latest/5.1-binary.values.spectec | 1 + spectec/src/frontend/elab.ml | 1 + spectec/src/il/env.ml | 9 +++++ spectec/src/il/eval.ml | 6 ++-- spectec/src/il/valid.ml | 14 ++++++-- spectec/test-frontend/TEST.md | 11 ++++--- spectec/test-latex/TEST.md | 2 +- spectec/test-middlend/TEST.md | 33 ++++++++++--------- 11 files changed, 55 insertions(+), 27 deletions(-) diff --git a/specification/wasm-1.0/A-binary.spectec b/specification/wasm-1.0/A-binary.spectec index ce3ede9c73..db5de53d22 100644 --- a/specification/wasm-1.0/A-binary.spectec +++ b/specification/wasm-1.0/A-binary.spectec @@ -53,7 +53,7 @@ def $utf8(ch) = b -- if ch < U+0080 /\ ch = b def $utf8(ch) = b_1 b_2 -- if U+0080 <= ch < U+0800 /\ ch = $(2^6*(b_1 - 0xC0) + (b_2 - 0x80)) def $utf8(ch) = b_1 b_2 b_3 -- if (U+0800 <= ch < U+D800 \/ U+E000 <= ch < U+10000) /\ ch = $(2^12*(b_1 - 0xE0) + 2^6*(b_2 - 0x80) + (b_3 - 0x80)) def $utf8(ch) = b_1 b_2 b_3 b_4 -- if (U+10000 <= ch < U+11000) /\ ch = $(2^18*(b_1 - 0xF0) + 2^12*(b_2 - 0x80) + 2^6*(b_3 - 0x80) + (b_4 - 0x80)) -def $utf8(ch*) = $concat_(byte, $utf8(ch)*) +def $utf8(ch*) = $concat_(byte, $utf8(ch)*) -- if |ch*| > 1 grammar Bname : name = | b*:Blist(Bbyte) => name -- if $utf8(name) = b* diff --git a/specification/wasm-2.0/A-binary.spectec b/specification/wasm-2.0/A-binary.spectec index 8570e0b2e7..9f364f38b5 100644 --- a/specification/wasm-2.0/A-binary.spectec +++ b/specification/wasm-2.0/A-binary.spectec @@ -55,7 +55,7 @@ def $utf8(ch) = b -- if ch < U+0080 /\ ch = b def $utf8(ch) = b_1 b_2 -- if U+0080 <= ch < U+0800 /\ ch = $(2^6*(b_1 - 0xC0) + (b_2 - 0x80)) def $utf8(ch) = b_1 b_2 b_3 -- if (U+0800 <= ch < U+D800 \/ U+E000 <= ch < U+10000) /\ ch = $(2^12*(b_1 - 0xE0) + 2^6*(b_2 - 0x80) + (b_3 - 0x80)) def $utf8(ch) = b_1 b_2 b_3 b_4 -- if (U+10000 <= ch < U+11000) /\ ch = $(2^18*(b_1 - 0xF0) + 2^12*(b_2 - 0x80) + 2^6*(b_3 - 0x80) + (b_4 - 0x80)) -def $utf8(ch*) = $concat_(byte, $utf8(ch)*) +def $utf8(ch*) = $concat_(byte, $utf8(ch)*) -- if |ch*| > 1 grammar Bname : name = | b*:Blist(Bbyte) => name -- if $utf8(name) = b* diff --git a/specification/wasm-3.0/5.1-binary.values.spectec b/specification/wasm-3.0/5.1-binary.values.spectec index 6bb898de24..8740df8c69 100644 --- a/specification/wasm-3.0/5.1-binary.values.spectec +++ b/specification/wasm-3.0/5.1-binary.values.spectec @@ -50,6 +50,7 @@ def $cont(b) = $(b - 0x80) -- if (0x80 < b < 0xC0) ;; def $utf8(char*) : byte* def $utf8(ch*) = $concat_(byte, $utf8(ch)*) + -- if |ch*| > 1 def $utf8(ch) = b -- if ch < U+0080 -- if ch = b diff --git a/specification/wasm-latest/5.1-binary.values.spectec b/specification/wasm-latest/5.1-binary.values.spectec index 6bb898de24..8740df8c69 100644 --- a/specification/wasm-latest/5.1-binary.values.spectec +++ b/specification/wasm-latest/5.1-binary.values.spectec @@ -50,6 +50,7 @@ def $cont(b) = $(b - 0x80) -- if (0x80 < b < 0xC0) ;; def $utf8(char*) : byte* def $utf8(ch*) = $concat_(byte, $utf8(ch)*) + -- if |ch*| > 1 def $utf8(ch) = b -- if ch < U+0080 -- if ch = b diff --git a/spectec/src/frontend/elab.ml b/spectec/src/frontend/elab.ml index 686281dc3a..e4d79bd0d3 100644 --- a/spectec/src/frontend/elab.ml +++ b/spectec/src/frontend/elab.ml @@ -225,6 +225,7 @@ let to_il_env env = defs; rels = Map.empty; grams; + eqs = []; } diff --git a/spectec/src/il/env.ml b/spectec/src/il/env.ml index 044f5393ac..ce3d22776d 100644 --- a/spectec/src/il/env.ml +++ b/spectec/src/il/env.ml @@ -26,6 +26,7 @@ type t = defs : def_def Map.t; rels : rel_def Map.t; grams : gram_def Map.t; + eqs : (exp * exp) list; } @@ -37,6 +38,7 @@ let empty = defs = Map.empty; rels = Map.empty; grams = Map.empty; + eqs = []; } let mem map id = Map.mem id.it map @@ -93,6 +95,13 @@ let rebind_def env id rhs = {env with defs = rebind "definition" env.defs id rhs let rebind_rel env id rhs = {env with rels = rebind "relation" env.rels id rhs} let rebind_gram env id rhs = {env with grams = rebind "grammar" env.grams id rhs} +let record_eq env e1 e2 = {env with eqs = (e1, e2)::env.eqs} +let recall_eq env e1 e2 = + List.exists (fun (x, y) -> + Eq.eq_exp x e1 && Eq.eq_exp y e2 || + Eq.eq_exp x e2 && Eq.eq_exp y e1 + ) env.eqs + (* Extraction *) diff --git a/spectec/src/il/eval.ml b/spectec/src/il/eval.ml index b6b06512db..1b52956ba2 100644 --- a/spectec/src/il/eval.ml +++ b/spectec/src/il/eval.ml @@ -1192,8 +1192,10 @@ and equiv_exp static env e1 e2 = ) @@ fun _ -> (* TODO(3, rossberg): this does not reduce inner type arguments *) match reduce_exp static env e1, reduce_exp static env e2 with - | Ok e1', Ok e2' -> Eq.eq_exp e1' e2' - | (Ok e1' | Error e1'), (Ok e2' | Error e2') when static -> Eq.eq_exp e1' e2' + | Ok e1', Ok e2' -> + Eq.eq_exp e1' e2' || Env.recall_eq env e1' e2' + | (Ok e1' | Error e1'), (Ok e2' | Error e2') when static -> + Eq.eq_exp e1' e2' || Env.recall_eq env e1' e2' | Error _, _ -> Error.error e1.at "validation" "expression failed to evaluate during pattern-matching" diff --git a/spectec/src/il/valid.ml b/spectec/src/il/valid.ml index 0075836c21..a2767ff2b3 100644 --- a/spectec/src/il/valid.ml +++ b/spectec/src/il/valid.ml @@ -592,7 +592,10 @@ and valid_sym env g : typ = (* Premises *) and valid_prem env prem = - Debug.(log_in_at "il.valid_prem" prem.at (fun _ -> il_prem prem)); + Debug.(log_at "il.valid_prem" prem.at + (fun _ -> il_prem prem) + (fun _ -> "") + ) @@ fun _ -> match prem.it with | RulePr (x, as_, mixop, e) -> let ps, mixop', t, _rules = Env.find_rel env x in @@ -600,6 +603,11 @@ and valid_prem env prem = let s = valid_args env as_ ps Subst.empty prem.at in valid_expmix env mixop e (mixop, Subst.subst_typ s t) e.at; env + | IfPr ({it = CmpE (`EqOp, _t, e1, e2); _} as e) -> + valid_exp env e (BoolT $ e.at); + let Ok e1' | Error e1' = Eval.reduce_exp env e1 in + let Ok e2' | Error e2' = Eval.reduce_exp env e2 in + Env.record_eq env e1' e2' | IfPr e -> valid_exp env e (BoolT $ e.at); env @@ -616,7 +624,9 @@ and valid_prem env prem = List.map (fun x -> "`" ^ x ^ "`") |> String.concat ", " ) ^ " do not occur in left-hand side expression"); - env' + let Ok e1' | Error e1' = Eval.reduce_exp env e1 in + let Ok e2' | Error e2' = Eval.reduce_exp env e2 in + Env.record_eq env' e1' e2' | ElsePr -> env | IterPr (prem', ite) -> diff --git a/spectec/test-frontend/TEST.md b/spectec/test-frontend/TEST.md index 15c60d0f10..8172a6aeaf 100644 --- a/spectec/test-frontend/TEST.md +++ b/spectec/test-frontend/TEST.md @@ -785,21 +785,22 @@ rec { ;; ../../../../specification/wasm-latest/1.1-syntax.values.spectec:91.1-91.25 def $utf8(char*) : byte* - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:52.1-52.44 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:52.1-53.18 def $utf8{`ch*` : char*}(ch*{ch <- `ch*`}) = $concat_(syntax byte, $utf8([ch])*{ch <- `ch*`}) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:53.1-55.15 + -- if (|ch*{ch <- `ch*`}| > 1) + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:54.1-56.15 def $utf8{ch : char, b : byte}([ch]) = [b] -- if (ch!`%`_char.0 < 128) -- if (`%`_byte(ch!`%`_char.0) = b) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:56.1-58.46 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:57.1-59.46 def $utf8{ch : char, b_1 : byte, b_2 : byte}([ch]) = [b_1 b_2] -- if ((128 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 2048)) -- if (ch!`%`_char.0 = (((2 ^ 6) * (((b_1!`%`_byte.0 : nat <:> int) - (192 : nat <:> int)) : int <:> nat)) + $cont(b_2))) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:59.1-61.64 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:60.1-62.64 def $utf8{ch : char, b_1 : byte, b_2 : byte, b_3 : byte}([ch]) = [b_1 b_2 b_3] -- if (((2048 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 55296)) \/ ((57344 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 65536))) -- if (ch!`%`_char.0 = ((((2 ^ 12) * (((b_1!`%`_byte.0 : nat <:> int) - (224 : nat <:> int)) : int <:> nat)) + ((2 ^ 6) * $cont(b_2))) + $cont(b_3))) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:62.1-64.82 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:63.1-65.82 def $utf8{ch : char, b_1 : byte, b_2 : byte, b_3 : byte, b_4 : byte}([ch]) = [b_1 b_2 b_3 b_4] -- if ((65536 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 69632)) -- if (ch!`%`_char.0 = (((((2 ^ 18) * (((b_1!`%`_byte.0 : nat <:> int) - (240 : nat <:> int)) : int <:> nat)) + ((2 ^ 12) * $cont(b_2))) + ((2 ^ 6) * $cont(b_3))) + $cont(b_4))) diff --git a/spectec/test-latex/TEST.md b/spectec/test-latex/TEST.md index 1253a23f3c..9ca1bfedb4 100644 --- a/spectec/test-latex/TEST.md +++ b/spectec/test-latex/TEST.md @@ -11394,7 +11394,7 @@ $$ $$ \begin{array}[t]{@{}lcl@{}l@{}} -{\mathrm{utf{\kern-0.1em\scriptstyle 8}}}({{\mathit{ch}}^\ast}) & = & {\bigoplus}\, {{\mathrm{utf{\kern-0.1em\scriptstyle 8}}}({\mathit{ch}})^\ast} \\ +{\mathrm{utf{\kern-0.1em\scriptstyle 8}}}({{\mathit{ch}}^\ast}) & = & {\bigoplus}\, {{\mathrm{utf{\kern-0.1em\scriptstyle 8}}}({\mathit{ch}})^\ast} & \quad \mbox{if}~ {|{{\mathit{ch}}^\ast}|} > 1 \\ {\mathrm{utf{\kern-0.1em\scriptstyle 8}}}({\mathit{ch}}) & = & b & \quad \begin{array}[t]{@{}l@{}} \mbox{if}~ {\mathit{ch}} < \mathrm{U{+}80} \\ diff --git a/spectec/test-middlend/TEST.md b/spectec/test-middlend/TEST.md index 526d41eb0b..2c3c6f95f2 100644 --- a/spectec/test-middlend/TEST.md +++ b/spectec/test-middlend/TEST.md @@ -308,21 +308,22 @@ rec { ;; ../../../../specification/wasm-latest/1.1-syntax.values.spectec:91.1-91.25 def $utf8(char*) : byte* - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:52.1-52.44 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:52.1-53.18 def $utf8{`ch*` : char*}(ch*{ch <- `ch*`}) = $concat_(syntax byte, $utf8([ch])*{ch <- `ch*`}) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:53.1-55.15 + -- if (|ch*{ch <- `ch*`}| > 1) + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:54.1-56.15 def $utf8{ch : char, b : byte}([ch]) = [b] -- if (ch!`%`_char.0 < 128) -- if (`%`_byte(ch!`%`_char.0) = b) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:56.1-58.46 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:57.1-59.46 def $utf8{ch : char, b_1 : byte, b_2 : byte}([ch]) = [b_1 b_2] -- if ((128 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 2048)) -- if (ch!`%`_char.0 = (((2 ^ 6) * (((b_1!`%`_byte.0 : nat <:> int) - (192 : nat <:> int)) : int <:> nat)) + $cont(b_2))) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:59.1-61.64 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:60.1-62.64 def $utf8{ch : char, b_1 : byte, b_2 : byte, b_3 : byte}([ch]) = [b_1 b_2 b_3] -- if (((2048 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 55296)) \/ ((57344 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 65536))) -- if (ch!`%`_char.0 = ((((2 ^ 12) * (((b_1!`%`_byte.0 : nat <:> int) - (224 : nat <:> int)) : int <:> nat)) + ((2 ^ 6) * $cont(b_2))) + $cont(b_3))) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:62.1-64.82 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:63.1-65.82 def $utf8{ch : char, b_1 : byte, b_2 : byte, b_3 : byte, b_4 : byte}([ch]) = [b_1 b_2 b_3 b_4] -- if ((65536 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 69632)) -- if (ch!`%`_char.0 = (((((2 ^ 18) * (((b_1!`%`_byte.0 : nat <:> int) - (240 : nat <:> int)) : int <:> nat)) + ((2 ^ 12) * $cont(b_2))) + ((2 ^ 6) * $cont(b_3))) + $cont(b_4))) @@ -12151,21 +12152,22 @@ rec { ;; ../../../../specification/wasm-latest/1.1-syntax.values.spectec:91.1-91.25 def $utf8(char*) : byte* - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:52.1-52.44 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:52.1-53.18 def $utf8{`ch*` : char*}(ch*{ch <- `ch*`}) = $concat_(syntax byte, $utf8([ch])*{ch <- `ch*`}) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:53.1-55.15 + -- if (|ch*{ch <- `ch*`}| > 1) + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:54.1-56.15 def $utf8{ch : char, b : byte}([ch]) = [b] -- if (ch!`%`_char.0 < 128) -- if (`%`_byte(ch!`%`_char.0) = b) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:56.1-58.46 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:57.1-59.46 def $utf8{ch : char, b_1 : byte, b_2 : byte}([ch]) = [b_1 b_2] -- if ((128 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 2048)) -- if (ch!`%`_char.0 = (((2 ^ 6) * (((b_1!`%`_byte.0 : nat <:> int) - (192 : nat <:> int)) : int <:> nat)) + $cont(b_2))) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:59.1-61.64 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:60.1-62.64 def $utf8{ch : char, b_1 : byte, b_2 : byte, b_3 : byte}([ch]) = [b_1 b_2 b_3] -- if (((2048 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 55296)) \/ ((57344 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 65536))) -- if (ch!`%`_char.0 = ((((2 ^ 12) * (((b_1!`%`_byte.0 : nat <:> int) - (224 : nat <:> int)) : int <:> nat)) + ((2 ^ 6) * $cont(b_2))) + $cont(b_3))) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:62.1-64.82 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:63.1-65.82 def $utf8{ch : char, b_1 : byte, b_2 : byte, b_3 : byte, b_4 : byte}([ch]) = [b_1 b_2 b_3 b_4] -- if ((65536 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 69632)) -- if (ch!`%`_char.0 = (((((2 ^ 18) * (((b_1!`%`_byte.0 : nat <:> int) - (240 : nat <:> int)) : int <:> nat)) + ((2 ^ 12) * $cont(b_2))) + ((2 ^ 6) * $cont(b_3))) + $cont(b_4))) @@ -24003,21 +24005,22 @@ rec { ;; ../../../../specification/wasm-latest/1.1-syntax.values.spectec:91.1-91.25 def $utf8(char*) : byte* - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:52.1-52.44 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:52.1-53.18 def $utf8{`ch*` : char*}(ch*{ch <- `ch*`}) = $concat_(syntax byte, $utf8([ch])*{ch <- `ch*`}) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:53.1-55.15 + -- if (|ch*{ch <- `ch*`}| > 1) + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:54.1-56.15 def $utf8{ch : char, b : byte}([ch]) = [b] -- if (ch!`%`_char.0 < 128) -- if (`%`_byte(ch!`%`_char.0) = b) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:56.1-58.46 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:57.1-59.46 def $utf8{ch : char, b_1 : byte, b_2 : byte}([ch]) = [b_1 b_2] -- if ((128 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 2048)) -- if (ch!`%`_char.0 = (((2 ^ 6) * (((b_1!`%`_byte.0 : nat <:> int) - (192 : nat <:> int)) : int <:> nat)) + $cont(b_2))) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:59.1-61.64 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:60.1-62.64 def $utf8{ch : char, b_1 : byte, b_2 : byte, b_3 : byte}([ch]) = [b_1 b_2 b_3] -- if (((2048 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 55296)) \/ ((57344 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 65536))) -- if (ch!`%`_char.0 = ((((2 ^ 12) * (((b_1!`%`_byte.0 : nat <:> int) - (224 : nat <:> int)) : int <:> nat)) + ((2 ^ 6) * $cont(b_2))) + $cont(b_3))) - ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:62.1-64.82 + ;; ../../../../specification/wasm-latest/5.1-binary.values.spectec:63.1-65.82 def $utf8{ch : char, b_1 : byte, b_2 : byte, b_3 : byte, b_4 : byte}([ch]) = [b_1 b_2 b_3 b_4] -- if ((65536 <= ch!`%`_char.0) /\ (ch!`%`_char.0 < 69632)) -- if (ch!`%`_char.0 = (((((2 ^ 18) * (((b_1!`%`_byte.0 : nat <:> int) - (240 : nat <:> int)) : int <:> nat)) + ((2 ^ 12) * $cont(b_2))) + ((2 ^ 6) * $cont(b_3))) + $cont(b_4)))