From d0a41f9d7feca305aac126c2bbc4effdeb121958 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Wed, 4 Mar 2026 12:04:17 -0500 Subject: [PATCH] [multicast] Bitmap-based multicast replication for sidecar-lite This adds multicast replication to sidecar-lite, emulating DPD's sidecar.p4 We manage 2 bitmaps per multicast destination (external, underlay), in which replication is driven via the `Replicate` extern (from p4rs #240). The ingress control block suppresses (or doesn't) these bitmaps based on the geneve multicast option tag, and then merges both into a single replication bitmap. This tag is read-only, matching DPD's sidecar.p4. OPTE (would) stamp it before the packet reaches the switch. Egress decap is gated on the reserved underlay subnet (ff04::/64, per RFC 7346 admin-local scope + Omicron) and tag=2 (Both), matching DPD's egress mcast_tag_check. Ports in the decap table get geneve stripped, others keep encapsulation for OPTE to handle on the receiving sled. Multicast dst MAC derivation (RFC 1112 for IPv4, RFC 2464 for IPv6) and per-port source MAC rewrite run on every replicated copy. Egress validates the outer IP multicast range before any mcast processing. ### References This work depends on the P4(rs) work in [#240](https://github.com/oxidecomputer/p4/pull/240). --- Cargo.lock | 45 +- Cargo.toml | 6 +- p4/headers.p4 | 4 +- p4/parser.p4 | 37 +- p4/sidecar-lite.p4 | 303 ++++- p4/softnpu.p4 | 16 +- scadm/src/main.rs | 96 +- softnpu/src/test.rs | 2813 +++++++++++++++++++++++++++++++++++++++++-- 8 files changed, 3152 insertions(+), 168 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 23703cf..4c5692f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -370,7 +370,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -764,7 +764,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -1424,7 +1424,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi 0.5.2", "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -1962,7 +1962,7 @@ dependencies = [ [[package]] name = "p4" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/p4?branch=main#0466bce9ba2d6f1b96477d1a30395a66f597e8a8" +source = "git+https://github.com/oxidecomputer/p4?branch=zl%2Fmulticast#3e15edd5d6ce910562d44bea55b969fe92ed36ba" dependencies = [ "colored", "regex", @@ -1971,7 +1971,7 @@ dependencies = [ [[package]] name = "p4-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/p4?branch=main#0466bce9ba2d6f1b96477d1a30395a66f597e8a8" +source = "git+https://github.com/oxidecomputer/p4?branch=zl%2Fmulticast#3e15edd5d6ce910562d44bea55b969fe92ed36ba" dependencies = [ "p4", "p4-rust", @@ -1984,7 +1984,7 @@ dependencies = [ [[package]] name = "p4-rust" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/p4?branch=main#0466bce9ba2d6f1b96477d1a30395a66f597e8a8" +source = "git+https://github.com/oxidecomputer/p4?branch=zl%2Fmulticast#3e15edd5d6ce910562d44bea55b969fe92ed36ba" dependencies = [ "p4", "prettyplease", @@ -1997,7 +1997,18 @@ dependencies = [ [[package]] name = "p4rs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/p4?branch=main#0466bce9ba2d6f1b96477d1a30395a66f597e8a8" +source = "git+https://github.com/oxidecomputer/p4?branch=main#dbf23f78878c308bce432fd776d26c6785a89d69" +dependencies = [ + "bitvec", + "num", + "serde", + "usdt 0.5.0", +] + +[[package]] +name = "p4rs" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/p4?branch=zl%2Fmulticast#3e15edd5d6ce910562d44bea55b969fe92ed36ba" dependencies = [ "bitvec", "num", @@ -2605,7 +2616,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -2696,7 +2707,7 @@ dependencies = [ "libc", "macaddr", "num", - "p4rs", + "p4rs 0.1.0 (git+https://github.com/oxidecomputer/p4?branch=zl%2Fmulticast)", "serde", "serde_json", "softnpu", @@ -2952,7 +2963,7 @@ dependencies = [ "colored", "num", "p4-macro", - "p4rs", + "p4rs 0.1.0 (git+https://github.com/oxidecomputer/p4?branch=zl%2Fmulticast)", "pnet", "pnet_macros", "pnet_macros_support", @@ -3111,7 +3122,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.117", @@ -3132,7 +3143,7 @@ name = "softnpu" version = "0.2.0" source = "git+https://github.com/oxidecomputer/softnpu?branch=main#a7329c50acc4605f16c73fdc4c66add9c04258c5" dependencies = [ - "p4rs", + "p4rs 0.1.0 (git+https://github.com/oxidecomputer/p4?branch=main)", "serde", "serde_json", "tokio", @@ -3289,7 +3300,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -3298,7 +3309,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8c27177b12a6399ffc08b98f76f7c9a1f4fe9fc967c784c5a071fa8d93cf7e1" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -3313,14 +3324,14 @@ dependencies = [ [[package]] name = "tests" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/p4?branch=main#0466bce9ba2d6f1b96477d1a30395a66f597e8a8" +source = "git+https://github.com/oxidecomputer/p4?branch=zl%2Fmulticast#3e15edd5d6ce910562d44bea55b969fe92ed36ba" dependencies = [ "anyhow", "bitvec", "colored", "num", "p4-macro", - "p4rs", + "p4rs 0.1.0 (git+https://github.com/oxidecomputer/p4?branch=zl%2Fmulticast)", "pnet", "rand 0.9.2", "usdt 0.5.0", @@ -3994,7 +4005,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 6ec1c95..2f254ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,9 +6,9 @@ members = [ resolver = "2" [workspace.dependencies] -p4rs = { git = "https://github.com/oxidecomputer/p4", branch = "main" } -p4-macro = { git = "https://github.com/oxidecomputer/p4", branch = "main" } -p4-test = { package = "tests", git = "https://github.com/oxidecomputer/p4", branch = "main" } +p4rs = { git = "https://github.com/oxidecomputer/p4", branch = "zl/multicast" } +p4-macro = { git = "https://github.com/oxidecomputer/p4", branch = "zl/multicast" } +p4-test = { package = "tests", git = "https://github.com/oxidecomputer/p4", branch = "zl/multicast" } usdt = { git = "https://github.com/oxidecomputer/usdt" } base64 = { version = "0.22" } diff --git a/p4/headers.p4 b/p4/headers.p4 index a2b3866..95a95ec 100644 --- a/p4/headers.p4 +++ b/p4/headers.p4 @@ -1,4 +1,4 @@ -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company header sidecar_h { bit<8> sc_code; @@ -96,7 +96,7 @@ header geneve_opt_h { } header oxg_opt_multicast_h { - bit<2> replication; + bit<2> mcast_tag; bit<30> reserved; } diff --git a/p4/parser.p4 b/p4/parser.p4 index d3e0692..b3dde50 100644 --- a/p4/parser.p4 +++ b/p4/parser.p4 @@ -1,4 +1,4 @@ -// Copyright 2024 Oxide Computer Company +// Copyright 2026 Oxide Computer Company parser parse( packet_in pkt, @@ -46,6 +46,29 @@ parser parse( state ipv6 { pkt.extract(hdr.ipv6); + if (hdr.ipv6.dst[127:120] == 8w0xff) { transition ipv6_mcast; } + transition ipv6_proto; + } + + state ipv6_mcast { + // Interface-local (ff01) must not be forwarded. + if (hdr.ipv6.dst[127:112] == 16w0xff01) { transition reject; } + + // Link-local (ff02) is forwarded to scrimlet. + // Allow hop_limit == 1 (link-local scope) but still reject expired + // packets. + if (hdr.ipv6.dst[127:112] == 16w0xff02) { + if (hdr.ipv6.hop_limit == 8w0) { transition reject; } + transition ipv6_proto; + } + + // Non-link-local multicast requires hop_limit > 1. + if (hdr.ipv6.hop_limit == 8w0) { transition reject; } + if (hdr.ipv6.hop_limit == 8w1) { transition reject; } + transition ipv6_proto; + } + + state ipv6_proto { if (hdr.ipv6.next_hdr == 8w0xdd) { transition ddm; } if (hdr.ipv6.next_hdr == 8w58) { transition icmp; } if (hdr.ipv6.next_hdr == 8w17) { transition udp; } @@ -82,6 +105,18 @@ parser parse( state ipv4 { pkt.extract(hdr.ipv4); + if (hdr.ipv4.dst[31:28] == 4w0xe) { transition ipv4_mcast; } + transition ipv4_proto; + } + + state ipv4_mcast { + // Multicast with TTL <= 1 must not be forwarded (RFC 1112). + if (hdr.ipv4.ttl == 8w0) { transition reject; } + if (hdr.ipv4.ttl == 8w1) { transition reject; } + transition ipv4_proto; + } + + state ipv4_proto { if (hdr.ipv4.protocol == 8w17) { transition udp; } if (hdr.ipv4.protocol == 8w6) { transition tcp; } if (hdr.ipv4.protocol == 8w1) { transition icmp; } diff --git a/p4/sidecar-lite.p4 b/p4/sidecar-lite.p4 index 20c6a78..91f97c1 100644 --- a/p4/sidecar-lite.p4 +++ b/p4/sidecar-lite.p4 @@ -1,4 +1,4 @@ -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company #include #include @@ -16,13 +16,15 @@ control ingress( inout ingress_metadata_t ingress, inout egress_metadata_t egress, ) { - attached() attached; - local() local; - router() router; - nat_ingress() nat; - resolver() resolver; - mac_rewrite() mac; - proxy_arp() pxarp; + attached() attached; + local() local; + router() router; + nat_ingress() nat; + resolver() resolver; + mac_rewrite() mac; + proxy_arp() pxarp; + mcast_ingress() mcast; + Replicate() mcast_rep; apply { // @@ -61,9 +63,11 @@ control ingress( // // After local and NAT processing, basic packet forwarding happens. // - router.apply(hdr, ingress, egress); // router table lookups - resolver.apply(hdr, egress); // resolve the nexthop - mac.apply(hdr, egress); // source mac rewrite + router.apply(hdr, ingress, egress); + mcast.apply(hdr, ingress, egress); + mcast_rep.replicate(egress.port_bitmap); + resolver.apply(hdr, egress); + mac.apply(hdr, egress); // Prevent reflection. if (ingress.port == egress.port) { egress.drop = true; } @@ -377,8 +381,11 @@ control router_v4_route( inout egress_metadata_t egress, ) { table rtr { - key = { ingress.path_idx: exact; } - actions = { forward; forward_v6; forward_vlan; forward_vlan_v6; } + key = { + ingress.path_idx: exact; + ingress.route_ttl_is_1: exact; + } + actions = { forward; forward_v6; forward_vlan; forward_vlan_v6; ttl_exceeded; } // should never happen, but the compiler requires a default default_action = drop; } @@ -386,6 +393,7 @@ control router_v4_route( apply { rtr.apply(); } action drop() { egress.drop = true; } + action ttl_exceeded() { egress.drop = true; } action forward(bit<16> port, bit<32> nexthop) { egress.port = port; @@ -451,8 +459,11 @@ control router_v6_route( inout egress_metadata_t egress, ) { table rtr { - key = { ingress.path_idx: exact; } - actions = { forward; forward_vlan; } + key = { + ingress.path_idx: exact; + ingress.route_ttl_is_1: exact; + } + actions = { forward; forward_vlan; ttl_exceeded; } // should never happen, but the compiler requires a default default_action = drop; } @@ -460,6 +471,7 @@ control router_v6_route( apply { rtr.apply(); } action drop() { egress.drop = true; } + action ttl_exceeded() { egress.drop = true; } action forward(bit<16> port, bit<128> nexthop) { egress.port = port; @@ -520,11 +532,13 @@ control router( if (hdr.ipv4.isValid()) { v4_idx.apply(hdr.ipv4.dst, hdr.ipv4.src, ingress, egress); if (egress.drop == true) { return; } + if (hdr.ipv4.ttl == 8w1) { ingress.route_ttl_is_1 = 1w1; } v4_route.apply(ingress, egress); } if (hdr.ipv6.isValid()) { v6_idx.apply(hdr.ipv6.dst, hdr.ipv6.src, ingress, egress); if (egress.drop == true) { return; } + if (hdr.ipv6.hop_limit == 8w1) { ingress.route_ttl_is_1 = 1w1; } v6_route.apply(ingress, egress); } outport = egress.port; @@ -583,9 +597,266 @@ control proxy_arp( } } +control mcast_ingress( + inout headers_t hdr, + inout ingress_metadata_t ingress, + inout egress_metadata_t egress, +) { + table mcast_replication_v6 { + key = { hdr.ipv6.dst: exact; } + actions = { set_port_bitmap; } + default_action = NoAction; + } + + table mcast_replication_v4 { + key = { hdr.ipv4.dst: exact; } + actions = { set_port_bitmap; } + default_action = NoAction; + } + + table mcast_source_filter_v4 { + key = { + hdr.inner_ipv4.src: lpm; + hdr.inner_ipv4.dst: exact; + } + actions = { allow_source; } + default_action = NoAction; + } + + table mcast_source_filter_v6 { + key = { + hdr.inner_ipv6.src: lpm; + hdr.inner_ipv6.dst: exact; + } + actions = { allow_source; } + default_action = NoAction; + } + + apply { + // Source filtering for geneve-encapsulated multicast traffic. + // + // Check inner destination is a multicast address before applying + // the source filter table. + if (hdr.geneve.isValid()) { + if (hdr.inner_ipv4.isValid()) { + // 224.0.0.0/4 + if (hdr.inner_ipv4.dst[31:28] == 4w0xe) { + mcast_source_filter_v4.apply(); + } else { + ingress.allow_source_mcast = true; + } + } else if (hdr.inner_ipv6.isValid()) { + // ff00::/8 + if (hdr.inner_ipv6.dst[127:120] == 8w0xff) { + mcast_source_filter_v6.apply(); + } else { + ingress.allow_source_mcast = true; + } + } + } else { + // Non-encapsulated traffic skips source filtering. + ingress.allow_source_mcast = true; + } + + // Replication only proceeds if source filtering passed. + if (ingress.allow_source_mcast) { + if (hdr.ipv6.isValid()) { mcast_replication_v6.apply(); } + if (hdr.ipv4.isValid()) { mcast_replication_v4.apply(); } + } + + // Per-packet tag suppression. If the packet carries a geneve + // multicast option, zero the bitmap for the group that has + // already been served: + // 0 (external) -> suppress bitmap_b (underlay) + // 1 (underlay) -> suppress bitmap_a (external) + // 2 (both) -> neither suppressed + if (hdr.oxg_mcast.isValid()) { + if (hdr.oxg_mcast.mcast_tag == 2w0) { + egress.underlay_bitmap = 128w0; + } + if (hdr.oxg_mcast.mcast_tag == 2w1) { + egress.external_bitmap = 128w0; + } + } + + // Merge both bitmaps into the final replication bitmap. + egress.port_bitmap = egress.external_bitmap | egress.underlay_bitmap; + } + + action set_port_bitmap(bit<128> external, bit<128> underlay) { + egress.external_bitmap = external; + egress.underlay_bitmap = underlay; + } + + action allow_source() { + ingress.allow_source_mcast = true; + } +} control egress( inout headers_t hdr, inout ingress_metadata_t ingress, inout egress_metadata_t egress, -) { } +) { + // Per-port decapsulation for multicast replicated copies. + // + // Ports in this table receive decapsulated (customer-facing) traffic. + // Ports not in the table keep encapsulation intact (sled-bound, + // OPTE handles decap). Equivalent to DPD's tbl_decap_ports. + table mcast_egress_decap { + key = { egress.port: exact; } + actions = { decap; decap_vlan; } + default_action = NoAction; + } + + // Source MAC rewrite per egress port. Runs on every replicated + // copy so both encapsulated and decapsulated packets leave with + // the correct source MAC for the egress port. + table mcast_src_mac { + key = { egress.port: exact; } + actions = { rewrite_src_mac; } + default_action = NoAction; + } + + action rewrite_src_mac(bit<48> mac) { hdr.ethernet.src = mac; } + + apply { + // Validate that the packet is actually multicast by checking + // the outer IP destination range before applying any multicast-specific + // egress processing. + bool is_mcast_pkt = false; + + if (hdr.ipv6.isValid()) { + // ff00::/8 + if (hdr.ipv6.dst[127:120] == 8w0xff) { is_mcast_pkt = true; } + } + if (hdr.ipv4.isValid()) { + // 224.0.0.0/4 + if (hdr.ipv4.dst[31:28] == 4w0xe) { is_mcast_pkt = true; } + } + + if (is_mcast_pkt == false) { return; } + + // Per-port decap only for UNDERLAY_EXTERNAL (tag=2) replicas + // on the reserved underlay multicast subnet (ff04::/64), + // matching Dendrite's egress mcast_tag_check. Tag=0 and tag=1 + // copies pass through without decap consideration. + if (hdr.ipv6.isValid()) { + if (hdr.ipv6.dst[127:64] == 64w0xff04000000000000) { + if (hdr.geneve.isValid()) { + if (hdr.oxg_mcast.isValid()) { + if (hdr.oxg_mcast.mcast_tag == 2w2) { + mcast_egress_decap.apply(); + } + } + } + } + } + + // Derive multicast dst MAC from the IP destination + // (RFC 1112 section 6.4 for IPv4, RFC 2464 for IPv6). + // Encapsulated copies use the outer IP. Decapped copies + // use the inner IP (outer is stripped). + if (hdr.ipv6.isValid()) { + hdr.ethernet.dst[47:32] = 16w0x3333; + hdr.ethernet.dst[31:0] = hdr.ipv6.dst[31:0]; + } + if (hdr.ipv4.isValid()) { + hdr.ethernet.dst[47:24] = 24w0x01005e; + hdr.ethernet.dst[23:16] = hdr.ipv4.dst[23:16]; + hdr.ethernet.dst[15:0] = hdr.ipv4.dst[15:0]; + hdr.ethernet.dst[23:23] = 1w0; + } + if (hdr.geneve.isValid() == false) { + if (hdr.inner_ipv4.isValid()) { + hdr.ethernet.dst[47:24] = 24w0x01005e; + hdr.ethernet.dst[23:16] = hdr.inner_ipv4.dst[23:16]; + hdr.ethernet.dst[15:0] = hdr.inner_ipv4.dst[15:0]; + hdr.ethernet.dst[23:23] = 1w0; + } + if (hdr.inner_ipv6.isValid()) { + hdr.ethernet.dst[47:32] = 16w0x3333; + hdr.ethernet.dst[31:0] = hdr.inner_ipv6.dst[31:0]; + } + } + + // Rewrite source MAC for the egress port. + mcast_src_mac.apply(); + } + + action decap() { + strip_decap(); + hdr.vlan.setInvalid(); + } + + action decap_vlan(bit<12> vlan_id) { + strip_decap(); + hdr.vlan.setValid(); + hdr.vlan.pcp = 3w0; + hdr.vlan.dei = 1w0; + hdr.vlan.vid = vlan_id; + // Inner ethertype moves into VLAN header. + hdr.vlan.ether_type = hdr.ethernet.ether_type; + hdr.ethernet.ether_type = 16w0x8100; + } + + // Shared decap: validate and decrement inner TTL, restore inner + // ethernet header, and strip outer headers. + // + // Sets egress.drop on expired TTL. Callers still run but the + // packet is dropped before emission. + action strip_decap() { + // Drop expired inner packets instead of wrapping TTL/hop_limit. + if (hdr.inner_ipv4.isValid()) { + if (hdr.inner_ipv4.ttl == 8w0) { egress.drop = true; } + if (hdr.inner_ipv4.ttl == 8w1) { egress.drop = true; } + if (egress.drop == false) { + hdr.inner_ipv4.ttl = hdr.inner_ipv4.ttl - 8w1; + // Incremental IPv4 header checksum update (RFC 1624). + // TTL occupies the high byte of a 16-bit word, so + // decrementing TTL by 1 adds 0x0100. Detect overflow + // and fold the carry for ones-complement correctness. + bit<16> old_csum = hdr.inner_ipv4.hdr_checksum; + bit<16> new_csum = old_csum + 16w0x0100; + if (new_csum < old_csum) { + new_csum = new_csum + 16w1; + } + hdr.inner_ipv4.hdr_checksum = new_csum; + } + } + + if (hdr.inner_ipv6.isValid()) { + if (hdr.inner_ipv6.hop_limit == 8w0) { egress.drop = true; } + if (hdr.inner_ipv6.hop_limit == 8w1) { egress.drop = true; } + if (egress.drop == false) { + hdr.inner_ipv6.hop_limit = hdr.inner_ipv6.hop_limit - 8w1; + } + } + + if (egress.drop == true) { return; } + + // Restore inner ethernet header, then strip encapsulation. + hdr.ethernet = hdr.inner_eth; + hdr.inner_eth.setInvalid(); + + // Set ethertype based on inner IP version. + if (hdr.inner_ipv4.isValid()) { + hdr.ethernet.ether_type = 16w0x0800; + } + if (hdr.inner_ipv6.isValid()) { + hdr.ethernet.ether_type = 16w0x86dd; + } + + // Strip outer headers. + hdr.ipv6.setInvalid(); + hdr.ipv4.setInvalid(); + hdr.udp.setInvalid(); + hdr.tcp.setInvalid(); + hdr.geneve.setInvalid(); + hdr.oxg_external_tag.setInvalid(); + hdr.oxg_mcast_tag.setInvalid(); + hdr.oxg_mcast.setInvalid(); + hdr.oxg_mss_tag.setInvalid(); + hdr.oxg_mss.setInvalid(); + } +} diff --git a/p4/softnpu.p4 b/p4/softnpu.p4 index 6db8a0b..e04770c 100644 --- a/p4/softnpu.p4 +++ b/p4/softnpu.p4 @@ -1,4 +1,4 @@ -// Copyright 2022 Oxide Computer Company +// Copyright 2026 Oxide Computer Company #include @@ -11,6 +11,8 @@ struct ingress_metadata_t { bit<16> path_idx; bool forward_needed; bool lldp; + bit<1> route_ttl_is_1; + bool allow_source_mcast; // Used as mutable scratchpad shared between parser states. bit<6> geneve_chunks; @@ -24,8 +26,20 @@ struct egress_metadata_t { bit<12> vlan_id; bool drop; bool broadcast; + // Merged replication bitmap. + // + // We keep this as separate fields (rather than + // inlining external_bitmap | underlay_bitmap in the Replicate call) + // so the post-suppression state is inspectable. + bit<128> port_bitmap; + bit<128> external_bitmap; + bit<128> underlay_bitmap; } extern Checksum { bit<16> run(in T data); } + +extern Replicate { + void replicate(in bit<128> bitmap); +} diff --git a/scadm/src/main.rs b/scadm/src/main.rs index 0d1e77b..8ef8b52 100644 --- a/scadm/src/main.rs +++ b/scadm/src/main.rs @@ -1,4 +1,4 @@ -// Copyright 2022 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use softnpu::p4rs::TableEntry; use std::collections::BTreeMap; @@ -317,7 +317,8 @@ async fn main() { let idx = table.find_available(); // Add idx->route to the route table - let keyset_data: Vec = idx.to_le_bytes().to_vec(); + let mut keyset_data: Vec = idx.to_le_bytes().to_vec(); + keyset_data.push(0); let mut parameter_data = port.to_le_bytes().to_vec(); let mut nexthop_data: Vec = nexthop.octets().into(); @@ -335,6 +336,21 @@ async fn main() { ) .await; + // Add ttl==1 entry to drop. + let mut ttl_keyset_data = idx.to_le_bytes().to_vec(); + ttl_keyset_data.push(1); + + send( + ManagementRequest::TableAdd(TableAdd { + table: ROUTER_V4_RT.into(), + action: "ttl_exceeded".into(), + keyset_data: ttl_keyset_data, + parameter_data: Vec::new(), + }), + &cli, + ) + .await; + // Now add cidr->idx to the index table let mut keyset_data: Vec = destination.octets().into(); keyset_data.push(mask); @@ -371,16 +387,18 @@ async fn main() { .await; // Remove the entry from the idx->route table table - let mut keyset_data: Vec = idx.to_le_bytes().to_vec(); - keyset_data.push(mask); - send( - ManagementRequest::TableRemove(TableRemove { - table: ROUTER_V4_RT.into(), - keyset_data, - }), - &cli, - ) - .await; + for ttl in [0u8, 1u8] { + let mut keyset_data: Vec = idx.to_le_bytes().to_vec(); + keyset_data.push(ttl); + send( + ManagementRequest::TableRemove(TableRemove { + table: ROUTER_V4_RT.into(), + keyset_data, + }), + &cli, + ) + .await; + } } } @@ -394,7 +412,8 @@ async fn main() { let idx = table.find_available(); // Add idx->route to the route table - let keyset_data: Vec = idx.to_le_bytes().to_vec(); + let mut keyset_data: Vec = idx.to_le_bytes().to_vec(); + keyset_data.push(0); let mut parameter_data = port.to_le_bytes().to_vec(); let mut nexthop_data: Vec = nexthop.octets().into(); @@ -412,6 +431,21 @@ async fn main() { ) .await; + // Add ttl==1 entry to drop. + let mut ttl_keyset_data = idx.to_le_bytes().to_vec(); + ttl_keyset_data.push(1); + + send( + ManagementRequest::TableAdd(TableAdd { + table: ROUTER_V6_RT.into(), + action: "ttl_exceeded".into(), + keyset_data: ttl_keyset_data, + parameter_data: Vec::new(), + }), + &cli, + ) + .await; + // Now add cidr->idx to the index table let mut keyset_data: Vec = destination.octets().into(); keyset_data.push(mask); @@ -448,16 +482,18 @@ async fn main() { .await; // Remove the entry from the idx->route table table - let mut keyset_data: Vec = idx.to_le_bytes().to_vec(); - keyset_data.push(mask); - send( - ManagementRequest::TableRemove(TableRemove { - table: ROUTER_V6_RT.into(), - keyset_data, - }), - &cli, - ) - .await; + for ttl in [0u8, 1u8] { + let mut keyset_data: Vec = idx.to_le_bytes().to_vec(); + keyset_data.push(ttl); + send( + ManagementRequest::TableRemove(TableRemove { + table: ROUTER_V6_RT.into(), + keyset_data, + }), + &cli, + ) + .await; + } } } @@ -1110,14 +1146,16 @@ fn get_addr(data: &[u8], rev: bool) -> Option { } } +/// Extract a u16 from the first two bytes of `data`. Ignores trailing +/// bytes so this works on compound keys (e.g. path_idx + route_ttl_is_1). fn get_u16(data: &[u8]) -> Option { - match data.len() { - 2 => Some(u16::from_le_bytes([data[0], data[1]])), - _ => { - println!("expected u16, found: {data:x?}"); + data.get(..2) + .and_then(|s| s.try_into().ok()) + .map(u16::from_le_bytes) + .or_else(|| { + println!("expected at least 2 bytes for u16, found: {data:x?}"); None - } - } + }) } fn get_mac(data: &[u8]) -> Option<[u8; 6]> { diff --git a/softnpu/src/test.rs b/softnpu/src/test.rs index 9210e25..2c58288 100644 --- a/softnpu/src/test.rs +++ b/softnpu/src/test.rs @@ -16,6 +16,88 @@ use pnet_macros_support::types::{u1, u16be, u2, u24be, u3, u5, u6, u7}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::println; +// Protocol constants. +const ETHERTYPE_IPV4: u16 = 0x0800; +const ETHERTYPE_IPV6: u16 = 0x86dd; +const ETHERTYPE_SIDECAR: u16 = 0x0901; +const GENEVE_UDP_PORT: u16 = 6081; +const GENEVE_PROTO_ETH: u16 = 0x6558; +const OXG_OPTION_CLASS: u16 = 0x0129; + +// Multicast tag values matching Dendrite's MULTICAST_TAG_* constants. +const MCAST_TAG_EXTERNAL: u8 = 0; +const MCAST_TAG_UNDERLAY: u8 = 1; +const MCAST_TAG_UNDERLAY_EXTERNAL: u8 = 2; + +// Reserved underlay multicast destination. Packets to this prefix +// with tag=UNDERLAY_EXTERNAL are candidates for per-port decap, +// matching Dendrite's ff04::/64 subnet gate. +const UNDERLAY_MCAST_DST: &str = "ff04::1"; + +// Header sizes in bytes. Standard protocol values; used to compute +// field offsets into constructed packet buffers. +const IPV6_HDR_LEN: usize = 40; +const UDP_HDR_LEN: usize = 8; +const GENEVE_HDR_LEN: usize = 8; +const GENEVE_OPT_HDR_LEN: usize = 4; +const ETH_HDR_LEN: usize = 14; +const IPV4_TTL_FIELD_OFFSET: usize = 8; + +// Byte offset of the inner IPv4 TTL field within the geneve-over-IPv6 +// packet buffer (starting from outer IPv6, not including ethernet). +const INNER_IPV4_TTL_OFFSET: usize = IPV6_HDR_LEN + + UDP_HDR_LEN + + GENEVE_HDR_LEN + + GENEVE_OPT_HDR_LEN + + ETH_HDR_LEN + + IPV4_TTL_FIELD_OFFSET; + +/// Poll a condition with bounded retries, panicking with `msg` on timeout. +/// Default: 50 retries at 10ms intervals (500ms total). +fn wait_for bool>(f: F, msg: &str) { + wait_for_retries(f, msg, 50); +} + +fn wait_for_retries bool>(f: F, msg: &str, retries: usize) { + for _ in 0..retries { + if f() { + return; + } + std::thread::sleep(std::time::Duration::from_millis(10)); + } + panic!("timed out waiting for: {msg}"); +} + +/// Build a sentinel IPv4 packet that routes to port 1 via the default +/// 0.0.0.0/0 entry in `pipeline_init`. Send after the packet under test, +/// then `wait_for` its arrival to confirm the pipeline has drained. +fn sentinel_v4() -> Vec { + let mut buf = vec![0u8; 28]; + let mut ip = MutableIpv4Packet::new(&mut buf).unwrap(); + ip.set_version(4); + ip.set_header_length(5); + ip.set_total_length(28); + ip.set_source("10.0.0.1".parse().unwrap()); + ip.set_destination("8.8.8.8".parse().unwrap()); + ip.set_next_level_protocol(IpNextHeaderProtocol::new(17)); + ip.set_ttl(64); + buf +} + +/// Build a sentinel IPv6 packet that routes to port 0 via the fd00:1::/64 +/// entry in `pipeline_init`. +fn sentinel_v6() -> Vec { + let mut buf = vec![0u8; 48]; + let mut ip = MutableIpv6Packet::new(&mut buf).unwrap(); + ip.set_version(6); + ip.set_source("fd00:1::99".parse().unwrap()); + ip.set_destination("fd00:1::1".parse().unwrap()); + ip.set_payload_length(8); + ip.set_next_header(IpNextHeaderProtocol::new(17)); + ip.set_hop_limit(64); + buf +} + // Geneve types used to verify encap and ingress NAT behaviour. #[allow(dead_code)] #[derive(PktDerive)] @@ -46,6 +128,19 @@ pub struct GeneveOpt { payload: Vec, } +/// Oxide geneve multicast option data (oxg_opt_multicast_h). +/// Follows the GeneveOpt tag when option_type == 0x01. +#[allow(dead_code)] +#[derive(PktDerive)] +pub struct OxgMcastOpt { + mcast_tag: u2, + _reserved: u6, + _reserved2: u24be, + + #[payload] + payload: Vec, +} + fn pipeline_init(pipeline: &mut main_pipeline) { // router entry upstream // Add a single path for 0.0.0.0/0 pointing at data in slot 2. @@ -54,7 +149,7 @@ fn pipeline_init(pipeline: &mut main_pipeline) { .add_ingress_router_v4_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); // At slot 2, add a forwarding entry gw=1.2.3.1, port=1 - let (key_buf, param_buf) = router_forward_entry(2, "1.2.3.1", 1); + let (key_buf, param_buf) = router_forward_entry(2, "1.2.3.1", 1, 0); pipeline.add_ingress_router_v4_route_rtr_entry( "forward", &key_buf, ¶m_buf, 0, ); @@ -66,7 +161,7 @@ fn pipeline_init(pipeline: &mut main_pipeline) { .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); // At slot 2, add a forwarding entry gw=fe80::1 port=0 - let (key_buf, param_buf) = router_forward_entry(2, "fe80::1", 0); + let (key_buf, param_buf) = router_forward_entry(2, "fe80::1", 0, 0); pipeline.add_ingress_router_v6_route_rtr_entry( "forward", &key_buf, ¶m_buf, 0, ); @@ -184,19 +279,20 @@ fn vlan_routing_egress() -> Result<(), anyhow::Error> { let mut eth = MutableEthernetPacket::new(&mut eth_data).unwrap(); eth.set_destination(MacAddr::new(0x11, 0x11, 0x11, 0x22, 0x22, 0x22)); eth.set_source(MacAddr::new(0x33, 0x33, 0x33, 0x44, 0x44, 0x44)); - eth.set_ethertype(EtherType(0x0800)); + eth.set_ethertype(EtherType(ETHERTYPE_IPV4)); eth.set_payload(&inner_ip_data); n += 8; + let proto = GENEVE_PROTO_ETH.to_be_bytes(); let mut geneve_data: Vec = - vec![0x00, 0x00, 0x65, 0x58, 0x11, 0x11, 0x11, 0x00]; + vec![0x00, 0x00, proto[0], proto[1], 0x11, 0x11, 0x11, 0x00]; geneve_data.extend_from_slice(ð_data); n += 8; let mut udp_data: Vec = vec![0; n]; let mut udp = MutableUdpPacket::new(&mut udp_data).unwrap(); udp.set_source(100); - udp.set_destination(6081); + udp.set_destination(GENEVE_UDP_PORT); udp.set_checksum(0x1701); udp.set_payload(&geneve_data); @@ -210,11 +306,10 @@ fn vlan_routing_egress() -> Result<(), anyhow::Error> { ip.set_payload(&udp_data); ip.set_next_header(IpNextHeaderProtocol::new(17)); - phy0.send(&[TxFrame::new(phy1.mac, 0x86dd, &ip_data)])?; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; let fs = phy1.recv(); let f = &fs[0]; - let decapped_ip = Ipv4Packet::new(&f.payload).unwrap(); let decapped_udp = UdpPacket::new(decapped_ip.payload()).unwrap(); @@ -278,8 +373,8 @@ fn vlan_routing_ingress() -> Result<(), anyhow::Error> { // ---- CASE 1 ---- // This frame should get through // ---------------- - phy1.send(&[TxFrame::new(phy0.mac, 0x0800, &ip_data)])?; - std::thread::sleep(std::time::Duration::from_millis(250)); + phy1.send(&[TxFrame::new(phy0.mac, ETHERTYPE_IPV4, &ip_data)])?; + wait_for(|| phy0.recv_buffer_len() > 0, "NAT packet to arrive"); assert_eq!(phy0.recv_buffer_len(), 1); let fs = phy0.recv(); @@ -300,17 +395,17 @@ fn vlan_routing_ingress() -> Result<(), anyhow::Error> { assert_eq!(ip.get_payload_length(), recv_body_len as u16); assert_eq!(udp.get_length(), recv_body_len as u16); - assert_eq!(udp.get_source(), 6081); - assert_eq!(udp.get_destination(), 6081); + assert_eq!(udp.get_source(), GENEVE_UDP_PORT); + assert_eq!(udp.get_destination(), GENEVE_UDP_PORT); assert_eq!(geneve.get_version(), 0); assert_eq!(geneve.get_options_len(), 1); assert_eq!(geneve.get_control_packet(), 0); assert_eq!(geneve.get_has_critical_option(), 0); - assert_eq!(geneve.get_protocol_type(), 0x6558); + assert_eq!(geneve.get_protocol_type(), GENEVE_PROTO_ETH); assert_eq!(geneve.get_vni(), 7777); - assert_eq!(geneve_opt.get_option_class(), 0x0129); + assert_eq!(geneve_opt.get_option_class(), OXG_OPTION_CLASS); assert_eq!(geneve_opt.get_critical_option(), 0); assert_eq!(geneve_opt.get_option_type(), 0); assert_eq!(geneve_opt.get_option_len(), 0); @@ -330,7 +425,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); // At slot 4, add a forwarding entry gw=fe80::2 port=2 - let (key_buf, param_buf) = router_forward_entry(4, "fe80::2", 2); + let (key_buf, param_buf) = router_forward_entry(4, "fe80::2", 2, 0); pipeline.add_ingress_router_v6_route_rtr_entry( "forward", &key_buf, ¶m_buf, 0, ); @@ -384,7 +479,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { let mut eth = MutableEthernetPacket::new(&mut eth_data).unwrap(); eth.set_destination(MacAddr::new(0x11, 0x11, 0x11, 0x22, 0x22, 0x22)); eth.set_source(MacAddr::new(0x33, 0x33, 0x33, 0x44, 0x44, 0x44)); - eth.set_ethertype(EtherType(0x0800)); + eth.set_ethertype(EtherType(ETHERTYPE_IPV4)); eth.set_payload(&inner_ip_data); n += 16; @@ -392,10 +487,10 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { let mut gen = MutableGenevePacket::new(&mut geneve_data).unwrap(); gen.set_version(0); gen.set_options_len(2); - gen.set_protocol_type(0x6558); + gen.set_protocol_type(GENEVE_PROTO_ETH); gen.set_vni(7777); let mut genopt = MutableGeneveOptPacket::new(gen.payload_mut()).unwrap(); - genopt.set_option_class(0x0129); + genopt.set_option_class(OXG_OPTION_CLASS); genopt.set_option_type(0x02); genopt.set_option_len(1); genopt.payload_mut().copy_from_slice(&1448u32.to_be_bytes()); @@ -405,7 +500,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { let mut udp_data: Vec = vec![0; n]; let mut udp = MutableUdpPacket::new(&mut udp_data).unwrap(); udp.set_source(100); - udp.set_destination(6081); + udp.set_destination(GENEVE_UDP_PORT); udp.set_checksum(0x1701); udp.set_payload(&geneve_data); @@ -419,7 +514,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { ip.set_payload(&udp_data); ip.set_next_header(IpNextHeaderProtocol::new(17)); - phy0.send(&[TxFrame::new(phy2.mac, 0x86dd, &ip_data)])?; + phy0.send(&[TxFrame::new(phy2.mac, ETHERTYPE_IPV6, &ip_data)])?; let fs = phy2.recv(); let f = &fs[0]; @@ -434,10 +529,10 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { assert_eq!(geneve.get_options_len(), 2); assert_eq!(geneve.get_control_packet(), 0); assert_eq!(geneve.get_has_critical_option(), 0); - assert_eq!(geneve.get_protocol_type(), 0x6558); + assert_eq!(geneve.get_protocol_type(), GENEVE_PROTO_ETH); assert_eq!(geneve.get_vni(), 7777); - assert_eq!(geneve_opt.get_option_class(), 0x0129); + assert_eq!(geneve_opt.get_option_class(), OXG_OPTION_CLASS); assert_eq!(geneve_opt.get_critical_option(), 0); assert_eq!(geneve_opt.get_option_type(), 2); assert_eq!(geneve_opt.get_option_len(), 1); @@ -475,7 +570,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { let mut eth = MutableEthernetPacket::new(&mut eth_data).unwrap(); eth.set_destination(MacAddr::new(0x11, 0x11, 0x11, 0x22, 0x22, 0x22)); eth.set_source(MacAddr::new(0x33, 0x33, 0x33, 0x44, 0x44, 0x44)); - eth.set_ethertype(EtherType(0x0800)); + eth.set_ethertype(EtherType(ETHERTYPE_IPV4)); eth.set_payload(&inner_ip_data); n += 24; @@ -483,20 +578,20 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { let mut gen = MutableGenevePacket::new(&mut geneve_data).unwrap(); gen.set_version(0); gen.set_options_len(4); - gen.set_protocol_type(0x6558); + gen.set_protocol_type(GENEVE_PROTO_ETH); gen.set_vni(7777); let opt_space = gen.payload_mut(); let mut mcastopt = MutableGeneveOptPacket::new(&mut opt_space[8..]).unwrap(); - mcastopt.set_option_class(0x0129); + mcastopt.set_option_class(OXG_OPTION_CLASS); mcastopt.set_option_type(0x01); mcastopt.set_option_len(1); mcastopt .payload_mut() .copy_from_slice(&0x8000_0000u32.to_be_bytes()); let mut mssopt = MutableGeneveOptPacket::new(&mut opt_space[..8]).unwrap(); - mssopt.set_option_class(0x0129); + mssopt.set_option_class(OXG_OPTION_CLASS); mssopt.set_option_type(0x02); mssopt.set_option_len(1); mssopt.payload_mut().copy_from_slice(&1448u32.to_be_bytes()); @@ -506,7 +601,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { let mut udp_data: Vec = vec![0; n]; let mut udp = MutableUdpPacket::new(&mut udp_data).unwrap(); udp.set_source(100); - udp.set_destination(6081); + udp.set_destination(GENEVE_UDP_PORT); udp.set_checksum(0x1701); udp.set_payload(&geneve_data); @@ -520,7 +615,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { ip.set_payload(&udp_data); ip.set_next_header(IpNextHeaderProtocol::new(17)); - phy0.send(&[TxFrame::new(phy2.mac, 0x86dd, &ip_data)])?; + phy0.send(&[TxFrame::new(phy2.mac, ETHERTYPE_IPV6, &ip_data)])?; let fs = phy2.recv(); let f = &fs[0]; @@ -538,7 +633,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { assert_eq!(geneve.get_options_len(), 4); assert_eq!(geneve.get_control_packet(), 0); assert_eq!(geneve.get_has_critical_option(), 0); - assert_eq!(geneve.get_protocol_type(), 0x6558); + assert_eq!(geneve.get_protocol_type(), GENEVE_PROTO_ETH); assert_eq!(geneve.get_vni(), 7777); // NOTE: these are **not** in the same order as we put them in. @@ -546,7 +641,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { // extract semantics from e.g. multicast info), the switch places // each header in a dedcated slot. When deparsing, these are // returned in that internal order. - assert_eq!(geneve_opt_0.get_option_class(), 0x0129); + assert_eq!(geneve_opt_0.get_option_class(), OXG_OPTION_CLASS); assert_eq!(geneve_opt_0.get_critical_option(), 0); assert_eq!(geneve_opt_0.get_option_type(), 1); assert_eq!(geneve_opt_0.get_option_len(), 1); @@ -556,7 +651,7 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { &0x8000_0000u32.to_be_bytes() ); - assert_eq!(geneve_opt_1.get_option_class(), 0x0129); + assert_eq!(geneve_opt_1.get_option_class(), OXG_OPTION_CLASS); assert_eq!(geneve_opt_1.get_critical_option(), 0); assert_eq!(geneve_opt_1.get_option_type(), 2); assert_eq!(geneve_opt_1.get_option_len(), 1); @@ -569,97 +664,2617 @@ fn geneve_options_preserved_on_underlay() -> Result<(), anyhow::Error> { Ok(()) } -// Create an entry for the multipath cidr -> index table -fn router_idx_entry( - dst: &str, - prefix_len: u8, - idx: u16, - slots: u8, -) -> (Vec, Vec) { - let mut key_buf = match dst.parse().unwrap() { - IpAddr::V4(a) => a.octets().to_vec(), - IpAddr::V6(a) => a.octets().to_vec(), - }; - key_buf.push(prefix_len); +#[test] +fn ipv4_ttl1_dropped() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(2); + pipeline_init(&mut pipeline); - let mut param_buf = idx.to_le_bytes().to_vec(); - let slots_buf = slots.to_le_bytes().to_vec(); - param_buf.extend_from_slice(&slots_buf); + // Add ttl_exceeded entry for path_idx=2, route_ttl_is_1=1. + // The forward entry for (2, 0) is already in pipeline_init. + let (key_buf, param_buf) = router_ttl_exceeded_entry(2); + pipeline.add_ingress_router_v4_route_rtr_entry( + "ttl_exceeded", + &key_buf, + ¶m_buf, + 0, + ); - (key_buf, param_buf) + let mut npu = SoftNpu::new(2, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + // A plain IPv4 packet with TTL=1, dst=8.8.8.8 (matches 0.0.0.0/0 + // default route). Sent from phy0 so egress port=1 avoids reflection. + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 20 + payload.len()]; + + let mut ip = MutableIpv4Packet::new(&mut ip_data).unwrap(); + ip.set_version(4); + ip.set_header_length(5); + ip.set_source("10.0.0.1".parse().unwrap()); + ip.set_destination("8.8.8.8".parse().unwrap()); + ip.set_total_length(20 + payload.len() as u16); + ip.set_next_level_protocol(IpNextHeaderProtocol::new(17)); + ip.set_ttl(1); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &ip_data)])?; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &sentinel_v4())])?; + wait_for(|| phy1.recv_buffer_len() > 0, "sentinel"); + + assert_eq!( + phy1.recv_buffer_len(), + 1, + "TTL=1 packet should be dropped, only sentinel arrives" + ); + + Ok(()) } -// Create an entry for the multipath index -> forwarding data table -fn router_forward_entry(idx: u16, gw: &str, port: u16) -> (Vec, Vec) { - let key_buf = idx.to_le_bytes().to_vec(); +#[test] +fn ipv4_ttl2_forwarded() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(2); + pipeline_init(&mut pipeline); - let mut param_buf = port.to_le_bytes().to_vec(); + // Add ttl_exceeded entry so both key variants exist. + let (key_buf, param_buf) = router_ttl_exceeded_entry(2); + pipeline.add_ingress_router_v4_route_rtr_entry( + "ttl_exceeded", + &key_buf, + ¶m_buf, + 0, + ); - let mut nexthop_buf = match gw.parse().unwrap() { - IpAddr::V4(a) => a.octets().to_vec(), - IpAddr::V6(a) => a.octets().to_vec(), - }; - nexthop_buf.reverse(); - param_buf.extend_from_slice(&nexthop_buf); + let mut npu = SoftNpu::new(2, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); - (key_buf, param_buf) + npu.run(); + + // Same packet as above but TTL=2. Should be forwarded. + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 20 + payload.len()]; + + let mut ip = MutableIpv4Packet::new(&mut ip_data).unwrap(); + ip.set_version(4); + ip.set_header_length(5); + ip.set_source("10.0.0.1".parse().unwrap()); + ip.set_destination("8.8.8.8".parse().unwrap()); + ip.set_total_length(20 + payload.len() as u16); + ip.set_next_level_protocol(IpNextHeaderProtocol::new(17)); + ip.set_ttl(2); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &ip_data)])?; + + let fs = phy1.recv(); + assert!(!fs.is_empty(), "TTL=2 packet should be forwarded"); + + Ok(()) } -fn nat4_entry( - addr: &str, - begin: u16, - end: u16, - target: &str, - vni: u32, - mac: [u8; 6], -) -> (Vec, Vec) { - let addr: Ipv4Addr = addr.parse().unwrap(); - let target: Ipv6Addr = target.parse().unwrap(); +#[test] +fn ipv6_ttl1_dropped() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(2); + pipeline_init(&mut pipeline); - let mut key_buf = Vec::new(); - let mut buf = addr.octets().to_vec(); - buf.reverse(); - key_buf.extend_from_slice(&buf); - key_buf.extend_from_slice(&begin.to_le_bytes()); - key_buf.extend_from_slice(&end.to_le_bytes()); + // Add ttl_exceeded entry for v6 route table, path_idx=2. + let (key_buf, param_buf) = router_ttl_exceeded_entry(2); + pipeline.add_ingress_router_v6_route_rtr_entry( + "ttl_exceeded", + &key_buf, + ¶m_buf, + 0, + ); - let mut param_buf = Vec::new(); - let mut buf = target.octets().to_vec(); - buf.reverse(); - param_buf.extend_from_slice(&buf); - param_buf.extend_from_slice(&vni.to_le_bytes()[..3]); - param_buf.extend_from_slice(&mac); + let mut npu = SoftNpu::new(2, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); - (key_buf, param_buf) + npu.run(); + + // An IPv6 packet with hop_limit=1, non-multicast dst that matches + // the fd00:1::/64 route (path_idx=2, port=0). Send from phy1 so we + // avoid reflection (egress port=0, ingress port=1). + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 40 + payload.len()]; + + let mut ip = MutableIpv6Packet::new(&mut ip_data).unwrap(); + ip.set_version(6); + ip.set_source("fd00:2::1".parse().unwrap()); + ip.set_destination("fd00:1::99".parse().unwrap()); + ip.set_payload_length(payload.len() as u16); + ip.set_next_header(IpNextHeaderProtocol::new(17)); + ip.set_hop_limit(1); + ip.set_payload(&payload); + + phy1.send(&[TxFrame::new(phy0.mac, ETHERTYPE_IPV6, &ip_data)])?; + phy1.send(&[TxFrame::new(phy0.mac, ETHERTYPE_IPV6, &sentinel_v6())])?; + wait_for(|| phy0.recv_buffer_len() > 0, "sentinel"); + + assert_eq!( + phy0.recv_buffer_len(), + 1, + "hop_limit=1 packet should be dropped, only sentinel arrives" + ); + + Ok(()) } -fn local6_entry(addr: &str) -> (Vec, Vec) { - let addr: Ipv6Addr = addr.parse().unwrap(); - let mut key_buf = addr.octets().to_vec(); - key_buf.reverse(); +#[test] +fn ipv4_mcast_ttl1_rejected() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(2); + pipeline_init(&mut pipeline); - (key_buf, Vec::new()) + let mut npu = SoftNpu::new(2, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + // IPv4 multicast dst (224.1.1.1) with TTL=1. + // Parser rejects before any table processing (RFC 1112). + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 20 + payload.len()]; + + let mut ip = MutableIpv4Packet::new(&mut ip_data).unwrap(); + ip.set_version(4); + ip.set_header_length(5); + ip.set_source("10.0.0.1".parse().unwrap()); + ip.set_destination("224.1.1.1".parse().unwrap()); + ip.set_total_length(20 + payload.len() as u16); + ip.set_next_level_protocol(IpNextHeaderProtocol::new(17)); + ip.set_ttl(1); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &ip_data)])?; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &sentinel_v4())])?; + wait_for(|| phy1.recv_buffer_len() > 0, "sentinel"); + + assert_eq!( + phy1.recv_buffer_len(), + 1, + "IPv4 mcast TTL=1 should be rejected by parser" + ); + assert_eq!( + phy0.recv_buffer_len(), + 0, + "IPv4 mcast TTL=1 should not reflect" + ); + + Ok(()) } -fn resolver4_entry(addr: &str, mac: [u8; 6]) -> (Vec, Vec) { - let addr: Ipv4Addr = addr.parse().unwrap(); - let mut key_buf = addr.octets().to_vec(); - key_buf.reverse(); +#[test] +fn ipv6_mcast_hop_limit1_rejected() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(2); + pipeline_init(&mut pipeline); - (key_buf, mac.to_vec()) + let mut npu = SoftNpu::new(2, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + // IPv6 multicast dst (ff0e::1, admin-scoped) with hop_limit=1. + // Parser rejects for non-link-local multicast with hop_limit <= 1. + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 40 + payload.len()]; + + let mut ip = MutableIpv6Packet::new(&mut ip_data).unwrap(); + ip.set_version(6); + ip.set_source("fd00:1::1".parse().unwrap()); + ip.set_destination("ff0e::1".parse().unwrap()); + ip.set_payload_length(payload.len() as u16); + ip.set_next_header(IpNextHeaderProtocol::new(17)); + ip.set_hop_limit(1); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &sentinel_v4())])?; + wait_for(|| phy1.recv_buffer_len() > 0, "sentinel"); + + assert_eq!( + phy1.recv_buffer_len(), + 1, + "IPv6 mcast hop_limit=1 should be rejected" + ); + assert_eq!( + phy0.recv_buffer_len(), + 0, + "IPv6 mcast hop_limit=1 should not reflect" + ); + + Ok(()) } -fn resolver6_entry(addr: &str, mac: [u8; 6]) -> (Vec, Vec) { - let addr: Ipv6Addr = addr.parse().unwrap(); - let mut key_buf = addr.octets().to_vec(); - key_buf.reverse(); +#[test] +fn ipv6_mcast_ff01_rejected() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(2); + pipeline_init(&mut pipeline); - (key_buf, mac.to_vec()) + let mut npu = SoftNpu::new(2, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + // IPv6 interface-local multicast (ff01::1) is always rejected + // regardless of hop_limit. + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 40 + payload.len()]; + + let mut ip = MutableIpv6Packet::new(&mut ip_data).unwrap(); + ip.set_version(6); + ip.set_source("fd00:1::1".parse().unwrap()); + ip.set_destination("ff01::1".parse().unwrap()); + ip.set_payload_length(payload.len() as u16); + ip.set_next_header(IpNextHeaderProtocol::new(17)); + ip.set_hop_limit(64); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &sentinel_v4())])?; + wait_for(|| phy1.recv_buffer_len() > 0, "sentinel"); + + assert_eq!( + phy1.recv_buffer_len(), + 1, + "ff01:: should be rejected regardless of hop_limit" + ); + assert_eq!(phy0.recv_buffer_len(), 0, "ff01:: should not reflect"); + + Ok(()) } -fn mac_rewrite_entry(port: u16, mac: [u8; 6]) -> (Vec, Vec) { - let key_buf = port.to_le_bytes().to_vec(); - let param_buf = mac.to_vec(); +#[test] +fn ipv4_mcast_ttl0_rejected() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(2); + pipeline_init(&mut pipeline); - (key_buf, param_buf) + let mut npu = SoftNpu::new(2, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 20 + payload.len()]; + let mut ip = MutableIpv4Packet::new(&mut ip_data).unwrap(); + ip.set_version(4); + ip.set_header_length(5); + ip.set_source("10.0.0.1".parse().unwrap()); + ip.set_destination("238.1.1.1".parse().unwrap()); + ip.set_next_level_protocol(IpNextHeaderProtocol::new(17)); + ip.set_total_length(20 + payload.len() as u16); + ip.set_ttl(0); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &ip_data)])?; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &sentinel_v4())])?; + wait_for(|| phy1.recv_buffer_len() > 0, "sentinel"); + + assert_eq!( + phy1.recv_buffer_len(), + 1, + "IPv4 mcast TTL=0 should be rejected" + ); + + Ok(()) +} + +#[test] +fn ipv6_mcast_hop_limit0_rejected() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(2); + pipeline_init(&mut pipeline); + + let mut npu = SoftNpu::new(2, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 40 + payload.len()]; + let mut ip = MutableIpv6Packet::new(&mut ip_data).unwrap(); + ip.set_version(6); + ip.set_source("fd00:1::1".parse().unwrap()); + ip.set_destination("ff0e::1".parse().unwrap()); + ip.set_payload_length(payload.len() as u16); + ip.set_next_header(IpNextHeaderProtocol::new(17)); + ip.set_hop_limit(0); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &sentinel_v4())])?; + wait_for(|| phy1.recv_buffer_len() > 0, "sentinel"); + + assert_eq!( + phy1.recv_buffer_len(), + 1, + "IPv6 mcast hop_limit=0 should be rejected" + ); + + Ok(()) +} + +// ff02:: (link-local multicast) bypasses hop limit check and routes to +// scrimlet via fwd_to_scrimlet(). The sidecar header (ethertype 0x0901) +// wraps the original packet and sends to port 0. +#[test] +fn ipv6_mcast_ff02_to_scrimlet() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(2); + pipeline_init(&mut pipeline); + + let mut npu = SoftNpu::new(2, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + // ff02::1 with hop_limit=1: would be rejected for non-link-local + // multicast, but ff02:: bypasses the check. + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 40 + payload.len()]; + let mut ip = MutableIpv6Packet::new(&mut ip_data).unwrap(); + ip.set_version(6); + ip.set_source("fd00:1::1".parse().unwrap()); + ip.set_destination("ff02::1".parse().unwrap()); + ip.set_payload_length(payload.len() as u16); + ip.set_next_header(IpNextHeaderProtocol::new(17)); + ip.set_hop_limit(1); + ip.set_payload(&payload); + + // Send from port 1 so scrimlet (port 0) can receive it. + phy1.send(&[TxFrame::new(phy0.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for( + || phy0.recv_buffer_len() > 0, + "scrimlet should receive ff02:: packet", + ); + let fs = phy0.recv(); + let f = &fs[0]; + + // fwd_to_scrimlet sets ethernet.ether_type = 0x0901 (sidecar header). + assert_eq!( + f.ethertype, ETHERTYPE_SIDECAR, + "ff02:: packet should arrive with sidecar header ethertype" + ); + + Ok(()) +} + +// Basic multicast replication via IPv4 multicast dst, no geneve. +// Non-encapsulated traffic bypasses source filtering. +#[test] +fn mcast_replication_basic() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + // Route for IPv4 multicast 224.0.0.0/4 → idx=6, slot=1. + let (key_buf, param_buf) = router_idx_entry("224.0.0.0", 4, 6, 1); + pipeline + .add_ingress_router_v4_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "1.2.3.1", 1, 0); + pipeline.add_ingress_router_v4_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + // Replication bitmap for 224.1.1.1. + let (key_buf, param_buf) = + mcast_replication_v4_entry("224.1.1.1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v4_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Plain IPv4 multicast packet, no geneve. Source filtering is + // bypassed for non-encapsulated traffic (allow_source_mcast = true). + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 20 + payload.len()]; + + let mut ip = MutableIpv4Packet::new(&mut ip_data).unwrap(); + ip.set_version(4); + ip.set_header_length(5); + ip.set_source("10.0.0.1".parse().unwrap()); + ip.set_destination("224.1.1.1".parse().unwrap()); + ip.set_total_length(20 + payload.len() as u16); + ip.set_next_level_protocol(IpNextHeaderProtocol::new(17)); + ip.set_ttl(64); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &ip_data)])?; + + wait_for( + || phy1.recv_buffer_len() > 0, + "port 1 mcast copy (external)", + ); + wait_for( + || phy2.recv_buffer_len() > 0, + "port 2 mcast copy (underlay)", + ); + + Ok(()) +} + +// Multicast source filter allows matching (S,G) pairs to proceed +// to replication group lookup. +#[test] +fn mcast_source_filter_allows() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + // Route for outer multicast prefix ff0e::/16 → idx=6, slot=1. + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + // Forward entry for idx=6 → port=1, gw=fe80::1. + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let inner_mcast_dst = "238.1.1.1"; + + // Source filter: allow inner src 10.0.0.0/8 -> inner dst. + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, inner_mcast_dst); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + // Replication bitmap for outer dst ff0e::1. + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + // MAC rewrite for port 2. + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + let ip_data = geneve_mcast_v4_pkt( + "fd00:1::1", + "ff0e::1", + "10.0.0.1", + inner_mcast_dst, + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + // Source filter matches (10.0.0.1 in 10.0.0.0/8), replication proceeds. + wait_for( + || phy1.recv_buffer_len() > 0, + "port 1 mcast copy (external)", + ); + wait_for( + || phy2.recv_buffer_len() > 0, + "port 2 mcast copy (underlay)", + ); + + Ok(()) +} + +// Source filter denies traffic from outside the allowed prefix. +#[test] +fn mcast_source_filter_denies() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + // Route for outer multicast prefix. + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let inner_mcast_dst = "238.1.1.1"; + + // Source filter only allows 10.0.0.0/8 -> inner_mcast_dst. + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, inner_mcast_dst); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + // Replication bitmap for ff0e::1. + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Inner src 192.168.0.1 is outside the allowed 10.0.0.0/8 prefix. + // Source filter should deny, no replication. + let ip_data = geneve_mcast_v4_pkt( + "fd00:1::1", + "ff0e::1", + "192.168.0.1", + inner_mcast_dst, + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &sentinel_v4())])?; + wait_for(|| phy1.recv_buffer_len() > 0, "sentinel"); + + assert_eq!( + phy2.recv_buffer_len(), + 0, + "port 2 should not receive mcast copy when source filter denies" + ); + + Ok(()) +} + +// When hdr.oxg_mcast.mcast_tag == 0 (External), the underlay bitmap is +// suppressed. Only external bitmap ports receive copies. +#[test] +fn mcast_replication_suppresses_underlay() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let inner_mcast_dst = "238.1.1.1"; + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, inner_mcast_dst); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // hdr.oxg_mcast.mcast_tag == 0 (External) -> suppress underlay bitmap. + let ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + "ff0e::1", + "10.0.0.1", + inner_mcast_dst, + Some(MCAST_TAG_EXTERNAL), + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + // External ports (port 1) should get a copy; underlay (port 2) suppressed. + wait_for( + || phy1.recv_buffer_len() > 0, + "port 1 mcast copy (external)", + ); + assert_eq!( + phy2.recv_buffer_len(), + 0, + "port 2 should not receive copy when underlay suppressed" + ); + + Ok(()) +} + +// When hdr.oxg_mcast.mcast_tag == 1 (Underlay), the external bitmap is +// suppressed. Only underlay bitmap ports receive copies. +#[test] +fn mcast_replication_suppresses_external() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let inner_mcast_dst = "238.1.1.1"; + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, inner_mcast_dst); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // hdr.oxg_mcast.mcast_tag == 1 (Underlay) -> suppress external bitmap. + let ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + "ff0e::1", + "10.0.0.1", + inner_mcast_dst, + Some(MCAST_TAG_UNDERLAY), + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + // Underlay ports (port 2) should get a copy; external (port 1) suppressed. + wait_for( + || phy2.recv_buffer_len() > 0, + "port 2 mcast copy (underlay)", + ); + assert_eq!( + phy1.recv_buffer_len(), + 0, + "port 1 should not receive copy when external suppressed" + ); + + Ok(()) +} + +// IPv6 inner multicast source filtering: allow path. +// Uses inner IPv6 dst ff0e::99 (full-byte slice [127:120] == 0xff, no +// x4c workaround needed). +#[test] +fn mcast_source_filter_v6_inner_allows() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + // Source filter: allow inner src fd00::/16, inner dst ff0e::99. + let (key_buf, param_buf) = + mcast_source_filter_v6_entry("fd00::", 16, "ff0e::99"); + pipeline.add_ingress_mcast_mcast_source_filter_v6_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + let ip_data = geneve_mcast_v6_inner_pkt( + "fd00:1::1", + "ff0e::1", + "fd00::1", + "ff0e::99", + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for( + || phy1.recv_buffer_len() > 0, + "port 1 mcast copy (external)", + ); + wait_for( + || phy2.recv_buffer_len() > 0, + "port 2 mcast copy (underlay)", + ); + + Ok(()) +} + +// IPv6 inner source filter deny path. +#[test] +fn mcast_source_filter_v6_inner_denies() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v6_entry("fd00::", 16, "ff0e::99"); + pipeline.add_ingress_mcast_mcast_source_filter_v6_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let _phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Inner src fe80::1 is outside the allowed fd00::/16. + let ip_data = geneve_mcast_v6_inner_pkt( + "fd00:1::1", + "ff0e::1", + "fe80::1", + "ff0e::99", + ); + phy0.send(&[TxFrame::new(_phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + phy0.send(&[TxFrame::new(_phy1.mac, ETHERTYPE_IPV4, &sentinel_v4())])?; + wait_for(|| _phy1.recv_buffer_len() > 0, "sentinel"); + + assert_eq!( + phy2.recv_buffer_len(), + 0, + "port 2 should not receive copy when v6 source filter denies" + ); + + Ok(()) +} + +// IPv4 outer multicast replication (non-encapsulated). +// +// This exercises the mcast_replication_v4 table. +#[test] +fn mcast_replication_v4_outer() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + // IPv4 route for 238.0.0.0/4 -> idx=6. + let (key_buf, param_buf) = router_idx_entry("224.0.0.0", 4, 6, 1); + pipeline + .add_ingress_router_v4_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "1.2.3.1", 1, 0); + pipeline.add_ingress_router_v4_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + // Replication bitmap for 238.1.1.1. + let (key_buf, param_buf) = + mcast_replication_v4_entry("238.1.1.1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v4_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + let payload = [0u8; 8]; + let mut ip_data: Vec = vec![0; 20 + payload.len()]; + let mut ip = MutableIpv4Packet::new(&mut ip_data).unwrap(); + ip.set_version(4); + ip.set_header_length(5); + ip.set_source("10.0.0.1".parse().unwrap()); + ip.set_destination("238.1.1.1".parse().unwrap()); + ip.set_next_level_protocol(IpNextHeaderProtocol::new(17)); + ip.set_total_length(20 + payload.len() as u16); + ip.set_ttl(64); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &ip_data)])?; + + wait_for( + || phy1.recv_buffer_len() > 0, + "port 1 mcast copy (external)", + ); + wait_for( + || phy2.recv_buffer_len() > 0, + "port 2 mcast copy (underlay)", + ); + + Ok(()) +} + +// Non-multicast inner destination in geneve bypasses source filtering. +#[test] +fn mcast_non_mcast_inner_bypasses_source_filter() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + // No source filter entries. Non-mcast inner should bypass. + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Inner dst 10.0.0.2 is unicast, not multicast. Source filter bypassed. + let ip_data = + geneve_mcast_v4_pkt("fd00:1::1", "ff0e::1", "10.0.0.1", "10.0.0.2"); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for( + || phy1.recv_buffer_len() > 0, + "port 1 mcast copy (external)", + ); + wait_for( + || phy2.recv_buffer_len() > 0, + "port 2 mcast copy (underlay)", + ); + + Ok(()) +} + +// Untagged geneve multicast packet (no oxg_mcast option) passes through +// with geneve options unchanged and skips decap even when a decap table +// entry exists for the egress port. The decap gate requires +// oxg_mcast.isValid() and mcast_tag == 2. +#[test] +fn mcast_egress_untagged_passthrough() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mcast_replication_v6_entry("ff0e::1", &[1], &[]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + // Decap entry for port 1, but the packet has no mcast tag. + let (key_buf, param_buf) = mcast_egress_decap_entry(1); + pipeline + .add_egress_mcast_egress_decap_entry("decap", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + let ip_data = + geneve_mcast_v4_pkt("fd00:1::1", "ff0e::1", "10.0.0.1", "238.1.1.1"); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy1.recv_buffer_len() > 0, "port 1 copy"); + let fs = phy1.recv(); + let f = &fs[0]; + + // Encap preserved despite decap table entry -> no tag means no decap. + assert_eq!(f.ethertype, ETHERTYPE_IPV6, "outer IPv6 intact"); + let ip = Ipv6Packet::new(&f.payload).unwrap(); + let udp = UdpPacket::new(ip.payload()).unwrap(); + assert_eq!(udp.get_destination(), GENEVE_UDP_PORT, "geneve intact"); + + let geneve = GenevePacket::new(udp.payload()).unwrap(); + assert_eq!( + geneve.get_options_len(), + 1, + "geneve opt_len unchanged (no egress stamping)" + ); + + // Dst MAC derived from outer IPv6 dst (ff0e::1). + // RFC 2464: 33:33 + lower 32 bits = 33:33:00:00:00:01. + assert_eq!( + f.dst, + [0x33, 0x33, 0x00, 0x00, 0x00, 0x01], + "encapsulated copy dst MAC from outer IPv6 (ff0e::1)" + ); + + Ok(()) +} + +// Egress preserves an existing mcast option. The option count and tag +// value pass through unchanged (read-only egress model). +#[test] +fn mcast_egress_preserves_existing_mcast_tag() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mcast_replication_v6_entry("ff0e::1", &[1], &[]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + // Existing mcast option with mcast_tag=0 (External, suppresses underlay). + // Tag preserved through pipeline (read-only egress model). + let ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + "ff0e::1", + "10.0.0.1", + "238.1.1.1", + Some(MCAST_TAG_EXTERNAL), + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy1.recv_buffer_len() > 0, "port 1 preserved tag copy"); + let fs = phy1.recv(); + let f = &fs[0]; + + let ip = Ipv6Packet::new(&f.payload).unwrap(); + let udp = UdpPacket::new(ip.payload()).unwrap(); + let geneve = GenevePacket::new(udp.payload()).unwrap(); + + // opt_len should remain 3 (external + mcast tag + mcast data). + assert_eq!( + geneve.get_options_len(), + 3, + "opt_len unchanged when mcast option already present" + ); + + // Deparse external_tag first, then mcast_tag, then oxg_mcast data. + let opt0 = GeneveOptPacket::new(geneve.payload()).unwrap(); + assert_eq!(opt0.get_option_type(), 0, "external tag"); + + let opt1 = GeneveOptPacket::new(opt0.payload()).unwrap(); + assert_eq!(opt1.get_option_type(), 1, "mcast tag"); + assert_eq!(opt1.get_option_len(), 1); + + let mcast = OxgMcastOptPacket::new(opt1.payload()).unwrap(); + assert_eq!(mcast.get_mcast_tag(), 0, "tag preserved through pipeline"); + + // Lengths unchanged (no new option added). + let sent_ip = Ipv6Packet::new(&ip_data).unwrap(); + let sent_udp = UdpPacket::new(sent_ip.payload()).unwrap(); + assert_eq!(udp.get_length(), sent_udp.get_length()); + assert_eq!(ip.get_payload_length(), sent_ip.get_payload_length()); + + Ok(()) +} + +// Egress decapsulates packets tagged Both(2) on ports with a decap entry. +// The decapsulated replica exits geneve encapsulation and is forwarded to +// a customer-facing port. Ports not in the decap table keep geneve intact. +#[test] +fn mcast_egress_decap() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff04::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry(UNDERLAY_MCAST_DST, &[1, 2], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + // Decap entry for port 2. + let (key_buf, param_buf) = mcast_egress_decap_entry(2); + pipeline + .add_egress_mcast_egress_decap_entry("decap", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Send with mcast_tag=2 (Both) so egress decap gate is satisfied. + let ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + UNDERLAY_MCAST_DST, + "10.0.0.1", + "238.1.1.1", + Some(MCAST_TAG_UNDERLAY_EXTERNAL), + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy1.recv_buffer_len() > 0, "port 1 encapped copy"); + wait_for(|| phy2.recv_buffer_len() > 0, "port 2 decapped copy"); + + // Port 1: geneve encap intact, mcast option from sender preserved. + let fs1 = phy1.recv(); + let f1 = &fs1[0]; + assert_eq!(f1.ethertype, ETHERTYPE_IPV6, "port 1 keeps IPv6 encap"); + // Encapsulated copy: dst MAC from outer IPv6 dst (ff04::1). + // RFC 2464: 33:33 + lower 32 bits of ff04::1 = 33:33:00:00:00:01. + assert_eq!( + f1.dst, + [0x33, 0x33, 0x00, 0x00, 0x00, 0x01], + "port 1 encapped dst MAC from outer IPv6" + ); + let ip1 = Ipv6Packet::new(&f1.payload).unwrap(); + let udp1 = UdpPacket::new(ip1.payload()).unwrap(); + let geneve1 = GenevePacket::new(udp1.payload()).unwrap(); + assert_eq!( + geneve1.get_options_len(), + 3, + "port 1 keeps mcast option from sender" + ); + + // Port 2: decapped to inner IPv4. + let fs2 = phy2.recv(); + let f2 = &fs2[0]; + assert_eq!( + f2.ethertype, ETHERTYPE_IPV4, + "port 2 decapped to inner IPv4" + ); + + // Decap must restore inner ethernet MACs, not keep underlay MACs. + // + // Inner dst: 01:00:5e:01:01:01 (derived from 238.1.1.1). + // Inner src: aa:bb:cc:dd:ee:ff (set by geneve_mcast_v4_pkt). + assert_eq!( + f2.dst, + [0x01, 0x00, 0x5e, 0x01, 0x01, 0x01], + "decap restores inner eth dst (multicast MAC)" + ); + assert_eq!( + f2.src, + [0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff], + "decap restores inner eth src" + ); + + let inner_ip = Ipv4Packet::new(&f2.payload).unwrap(); + assert_eq!( + inner_ip.get_ttl(), + 63, + "inner TTL decremented from 64 to 63 by decap" + ); + + // IPv4 header checksum must be valid after TTL decrement. + let csum = pnet::packet::ipv4::checksum(&inner_ip); + assert_eq!( + inner_ip.get_checksum(), + csum, + "IPv4 header checksum recomputed after TTL decrement" + ); + assert_eq!( + inner_ip.get_destination(), + "238.1.1.1".parse::().unwrap(), + "inner dst preserved after decap", + ); + + Ok(()) +} + +// Egress decap must drop inner packets with TTL <= 1 instead of wrapping. +// +// An inner TTL of 1 decremented to 0 should be dropped, not forwarded. +// An inner TTL of 0 must never wrap to 255. +#[test] +fn mcast_egress_decap_drops_inner_ttl1() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff04::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry(UNDERLAY_MCAST_DST, &[1, 2], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + // Port 1 in external only (keeps encap, no decap entry). + // Port 2 in both external and underlay (decap entry below). + let (key_buf, param_buf) = mcast_egress_decap_entry(2); + pipeline + .add_egress_mcast_egress_decap_entry("decap", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Geneve mcast packet with mcast_tag=2 (Both) and inner TTL=1. + // The mcast option adds 8 bytes, shifting the inner TTL offset. + let mut ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + UNDERLAY_MCAST_DST, + "10.0.0.1", + "238.1.1.1", + Some(MCAST_TAG_UNDERLAY_EXTERNAL), + ); + ip_data[INNER_IPV4_TTL_OFFSET + 8] = 1; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + // Port 1 (encapped) should still receive since inner TTL check only + // applies at decap. Use it as our drain marker. + wait_for(|| phy1.recv_buffer_len() > 0, "port 1 encapped copy"); + + // Port 2 should not receive: inner TTL=1 decremented to 0 means drop. + assert_eq!( + phy2.recv_buffer_len(), + 0, + "decap must drop inner packet with TTL=1 (would become 0)" + ); + + Ok(()) +} + +// Same as above but for inner TTL=0 (should never be forwarded). +#[test] +fn mcast_egress_decap_drops_inner_ttl0() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff04::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry(UNDERLAY_MCAST_DST, &[1, 2], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mcast_egress_decap_entry(2); + pipeline + .add_egress_mcast_egress_decap_entry("decap", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Inner TTL=0 with mcast_tag=2 (Both) and must not wrap to 255. + // + // The mcast option adds 8 bytes, shifting the inner TTL offset. + let mut ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + UNDERLAY_MCAST_DST, + "10.0.0.1", + "238.1.1.1", + Some(MCAST_TAG_UNDERLAY_EXTERNAL), + ); + ip_data[INNER_IPV4_TTL_OFFSET + 8] = 0; + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy1.recv_buffer_len() > 0, "port 1 encapped copy"); + assert_eq!( + phy2.recv_buffer_len(), + 0, + "decap must drop inner packet with TTL=0 (would wrap to 255)" + ); + + Ok(()) +} + +// Egress decapsulates inner IPv6 payloads, decrementing hop_limit and +// setting ethertype to 0x86dd. +#[test] +fn mcast_egress_decap_v6_inner() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff04::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v6_entry("fd00::", 16, "ff0e::99"); + pipeline.add_ingress_mcast_mcast_source_filter_v6_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry(UNDERLAY_MCAST_DST, &[1, 2], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + // Port 1 in external only. + // Port 2 in both external and underlay, triggers decap. + + // Decap entry for port 2. + let (key_buf, param_buf) = mcast_egress_decap_entry(2); + pipeline + .add_egress_mcast_egress_decap_entry("decap", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Send with mcast_tag=2 (Both) so egress decap gate is satisfied. + let eth = inner_mcast_v6_frame("fd00::1", "ff0e::99"); + let ip_data = wrap_geneve( + "fd00:1::1", + UNDERLAY_MCAST_DST, + ð, + Some(MCAST_TAG_UNDERLAY_EXTERNAL), + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy1.recv_buffer_len() > 0, "port 1 encapped copy"); + wait_for(|| phy2.recv_buffer_len() > 0, "port 2 decapped copy"); + + // Port 1: geneve encap intact. + let fs1 = phy1.recv(); + let f1 = &fs1[0]; + assert_eq!(f1.ethertype, ETHERTYPE_IPV6, "port 1 keeps IPv6 encap"); + + // Port 2: decapped to inner IPv6. + let fs2 = phy2.recv(); + let f2 = &fs2[0]; + assert_eq!( + f2.ethertype, ETHERTYPE_IPV6, + "port 2 decapped to inner IPv6" + ); + + let inner_ip = Ipv6Packet::new(&f2.payload).unwrap(); + assert_eq!( + inner_ip.get_hop_limit(), + 63, + "inner hop_limit decremented from 64 to 63 by decap" + ); + assert_eq!( + inner_ip.get_destination(), + "ff0e::99".parse::().unwrap(), + "inner dst preserved after decap", + ); + assert_eq!( + inner_ip.get_next_header().0, + 58, + "inner next_header preserved (ICMPv6)" + ); + + Ok(()) +} + +// Both ports in the replication bitmap receive encapsulated copies +// with geneve options unchanged (opt_len=1, external tag only). +// No egress stamping occurs (read-only tag model). +#[test] +fn mcast_egress_multi_port_passthrough() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[1, 2], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + // Port 1 in external only. + // Port 2 in both external and underlay. + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + let ip_data = + geneve_mcast_v4_pkt("fd00:1::1", "ff0e::1", "10.0.0.1", "238.1.1.1"); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy1.recv_buffer_len() > 0, "port 1 copy"); + wait_for(|| phy2.recv_buffer_len() > 0, "port 2 copy"); + + // Both ports receive encapsulated copies with geneve options unchanged + // (no egress stamping). opt_len stays 1 (external tag only). + let fs1 = phy1.recv(); + let f1 = &fs1[0]; + let ip1 = Ipv6Packet::new(&f1.payload).unwrap(); + let udp1 = UdpPacket::new(ip1.payload()).unwrap(); + let geneve1 = GenevePacket::new(udp1.payload()).unwrap(); + assert_eq!( + geneve1.get_options_len(), + 1, + "port 1 geneve opt_len unchanged (no stamping)" + ); + + let fs2 = phy2.recv(); + let f2 = &fs2[0]; + let ip2 = Ipv6Packet::new(&f2.payload).unwrap(); + let udp2 = UdpPacket::new(ip2.payload()).unwrap(); + let geneve2 = GenevePacket::new(udp2.payload()).unwrap(); + assert_eq!( + geneve2.get_options_len(), + 1, + "port 2 geneve opt_len unchanged (no stamping)" + ); + + Ok(()) +} + +// Send with mcast_tag=1 (Underlay) as the port is in underlay only. Verify the +// tag passes through unchanged (read-only egress). +#[test] +fn mcast_egress_tagged_passthrough() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + // mcast_tag=1 (Underlay) suppresses external, so only underlay ports + // get copies. Port 1 is underlay only. + let (key_buf, param_buf) = mcast_replication_v6_entry("ff0e::1", &[], &[1]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + // mcast_tag=1 (Underlay) suppresses the external bitmap (empty + // here anyway). Only underlay port 1 gets a copy. + // Tag=1 passes through (read-only egress). + let ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + "ff0e::1", + "10.0.0.1", + "238.1.1.1", + Some(MCAST_TAG_UNDERLAY), + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy1.recv_buffer_len() > 0, "port 1 passthrough tag copy"); + let fs = phy1.recv(); + let f = &fs[0]; + + let ip = Ipv6Packet::new(&f.payload).unwrap(); + let udp = UdpPacket::new(ip.payload()).unwrap(); + let geneve = GenevePacket::new(udp.payload()).unwrap(); + + // Sent with mcast option, meaning opt_len=3. + assert_eq!(geneve.get_options_len(), 3, "mcast option preserved"); + + let opt0 = GeneveOptPacket::new(geneve.payload()).unwrap(); + let opt1 = GeneveOptPacket::new(opt0.payload()).unwrap(); + let mcast = OxgMcastOptPacket::new(opt1.payload()).unwrap(); + assert_eq!( + mcast.get_mcast_tag(), + 1, + "mcast_tag=1 passes through (read-only egress)" + ); + + Ok(()) +} + +// Inner UDP port numbers and length are preserved after egress decap +// strips the outer encapsulation. +#[test] +fn mcast_egress_decap_preserves_inner_udp() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff04::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry(UNDERLAY_MCAST_DST, &[2], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + // Port 2 in both external and underlay, triggers decap. + let (key_buf, param_buf) = mcast_egress_decap_entry(2); + pipeline + .add_egress_mcast_egress_decap_entry("decap", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy2 = npu.phy(2); + + npu.run(); + + // Send with mcast_tag=2 (Both) so egress decap gate is satisfied. + let eth = inner_mcast_v4_udp_frame("10.0.0.1", "238.1.1.1", 12345, 80); + let ip_data = wrap_geneve( + "fd00:1::1", + UNDERLAY_MCAST_DST, + ð, + Some(MCAST_TAG_UNDERLAY_EXTERNAL), + ); + phy0.send(&[TxFrame::new(phy2.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy2.recv_buffer_len() > 0, "port 2 decapped UDP copy"); + let fs = phy2.recv(); + let f = &fs[0]; + + assert_eq!(f.ethertype, ETHERTYPE_IPV4, "decapped to inner IPv4"); + let inner_ip = Ipv4Packet::new(&f.payload).unwrap(); + assert_eq!( + inner_ip.get_next_level_protocol().0, + 17, + "inner proto is UDP" + ); + + let inner_udp = UdpPacket::new(inner_ip.payload()).unwrap(); + assert_eq!( + inner_udp.get_source(), + 12345, + "inner UDP src port preserved" + ); + assert_eq!( + inner_udp.get_destination(), + 80, + "inner UDP dst port preserved" + ); + assert_eq!(inner_udp.get_length(), 12, "inner UDP length preserved"); + + Ok(()) +} + +// Decap with VLAN: the decap_vlan action strips geneve and inserts a VLAN +// tag with the configured VLAN ID (vid). The inner ethertype moves into the +// VLAN header and the outer ethertype becomes 0x8100. +#[test] +fn mcast_egress_decap_vlan() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff04::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry(UNDERLAY_MCAST_DST, &[1, 2], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + // Port 1 in external only. + // Port 2 in both external and underlay. + + // VLAN decap entry for port 2 with vid=100. + let (key_buf, param_buf) = mcast_egress_decap_vlan_entry(2, 100); + pipeline.add_egress_mcast_egress_decap_entry( + "decap_vlan", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Send with mcast_tag=2 (Both) so egress decap gate is satisfied. + let ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + UNDERLAY_MCAST_DST, + "10.0.0.1", + "238.1.1.1", + Some(MCAST_TAG_UNDERLAY_EXTERNAL), + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy2.recv_buffer_len() > 0, "port 2 vlan decapped copy"); + let fs = phy2.recv(); + let f = &fs[0]; + + // The frame receiver strips the VLAN tag into f.vid and sets + // f.ethertype to the inner ethertype. + assert_eq!(f.vid, Some(100), "VLAN tag present with vid=100"); + assert_eq!(f.ethertype, ETHERTYPE_IPV4, "inner ethertype is IPv4"); + + let inner_ip = Ipv4Packet::new(&f.payload).unwrap(); + assert_eq!(inner_ip.get_ttl(), 63, "inner TTL decremented"); + + // Multicast dst MAC derived from inner IPv4 (238.1.1.1). + assert_eq!( + f.dst, + [0x01, 0x00, 0x5e, 0x01, 0x01, 0x01], + "decap_vlan derives mcast dst MAC from inner IPv4" + ); + + Ok(()) +} + +// Port in both external and underlay bitmaps, sent with tag=Both(2) but has +// no mcast_egress_decap entry. Geneve encapsulation stays intact. +#[test] +fn mcast_both_tag_without_decap_keeps_encap() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff04::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = + mcast_replication_v6_entry(UNDERLAY_MCAST_DST, &[2], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + // Port 2 in both external and underlay, no decap entry. + + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy2 = npu.phy(2); + + npu.run(); + + // Send with mcast_tag=2 (Both) to exercise the no-decap path. + let ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + UNDERLAY_MCAST_DST, + "10.0.0.1", + "238.1.1.1", + Some(MCAST_TAG_UNDERLAY_EXTERNAL), + ); + phy0.send(&[TxFrame::new(phy2.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for(|| phy2.recv_buffer_len() > 0, "port 2 copy with tag=Both"); + let fs = phy2.recv(); + let f = &fs[0]; + + // Encap preserved: outer IPv6 with geneve. + assert_eq!(f.ethertype, ETHERTYPE_IPV6, "outer IPv6 encap intact"); + let ip = Ipv6Packet::new(&f.payload).unwrap(); + let udp = UdpPacket::new(ip.payload()).unwrap(); + assert_eq!( + udp.get_destination(), + GENEVE_UDP_PORT, + "geneve UDP port intact" + ); + + let geneve = GenevePacket::new(udp.payload()).unwrap(); + assert_eq!( + geneve.get_options_len(), + 3, + "mcast option preserved from sender" + ); + + // Verify tag = Both = 2 preserved through pipeline. + let opt0 = GeneveOptPacket::new(geneve.payload()).unwrap(); + let opt1 = GeneveOptPacket::new(opt0.payload()).unwrap(); + let mcast = OxgMcastOptPacket::new(opt1.payload()).unwrap(); + assert_eq!( + mcast.get_mcast_tag(), + 2, + "tag = Both = 2, no decap entry: encap preserved" + ); + + Ok(()) +} + +// Non-encapsulated IPv6 multicast replication. +// +// No geneve header means allow_source_mcast = true, and +// mcast_replication_v6 matches on hdr.ipv6.dst directly. +#[test] +fn mcast_replication_v6_outer() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + // IPv6 route for ff0e::/16 -> idx=6. + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + // Replication bitmap for ff0e::1. + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[1], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + let (key_buf, param_buf) = mac_rewrite_entry(2, [1, 2, 3, 4, 5, 8]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // Raw IPv6 multicast, no geneve. + let payload = [0u8; 8]; + let mut ip_data = vec![0u8; 40 + payload.len()]; + let mut ip = MutableIpv6Packet::new(&mut ip_data).unwrap(); + ip.set_version(6); + ip.set_source("fd00:1::1".parse().unwrap()); + ip.set_destination("ff0e::1".parse().unwrap()); + ip.set_payload_length(payload.len() as u16); + ip.set_next_header(IpNextHeaderProtocol::new(17)); + ip.set_hop_limit(64); + ip.set_payload(&payload); + + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + wait_for( + || phy1.recv_buffer_len() > 0, + "port 1 mcast copy (external)", + ); + wait_for( + || phy2.recv_buffer_len() > 0, + "port 2 mcast copy (underlay)", + ); + + Ok(()) +} + +// PRE filters out the ingress port from multicast replication. +#[test] +fn mcast_ingress_port_suppressed() -> Result<(), anyhow::Error> { + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + // External bitmap includes port 0 (ingress) and port 1. + let (key_buf, param_buf) = + mcast_replication_v6_entry("ff0e::1", &[0, 1], &[]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let (key_buf, param_buf) = mac_rewrite_entry(1, [1, 2, 3, 4, 5, 7]); + pipeline + .add_ingress_mac_mac_rewrite_entry("rewrite", &key_buf, ¶m_buf, 0); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + + npu.run(); + + let ip_data = + geneve_mcast_v4_pkt("fd00:1::1", "ff0e::1", "10.0.0.1", "238.1.1.1"); + + // Send from port 0. + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + // Port 1 should receive, port 0 (ingress) should not. Once port 1 + // has its copy the pipeline has finished processing this packet. + wait_for(|| phy1.recv_buffer_len() > 0, "port 1 receives copy"); + assert_eq!( + phy0.recv_buffer_len(), + 0, + "ingress port 0 should not receive a copy" + ); + + Ok(()) +} + +// When the only active bitmap after suppression has no ports, no copies +// are produced. +#[test] +fn mcast_suppression_drops_when_only_group_zeroed() -> Result<(), anyhow::Error> +{ + let mut pipeline = main_pipeline::new(3); + pipeline_init(&mut pipeline); + + let (key_buf, param_buf) = router_idx_entry("ff0e::", 16, 6, 1); + pipeline + .add_ingress_router_v6_idx_rtr_entry("index", &key_buf, ¶m_buf, 0); + + let (key_buf, param_buf) = router_forward_entry(6, "fe80::1", 1, 0); + pipeline.add_ingress_router_v6_route_rtr_entry( + "forward", &key_buf, ¶m_buf, 0, + ); + + let (key_buf, param_buf) = + mcast_source_filter_v4_entry("10.0.0.0", 8, "238.1.1.1"); + pipeline.add_ingress_mcast_mcast_source_filter_v4_entry( + "allow_source", + &key_buf, + ¶m_buf, + 0, + ); + + // External bitmap has no ports. Underlay bitmap has port 2. + // mcast_tag=0 (External) suppresses underlay, leaving external empty. + let (key_buf, param_buf) = mcast_replication_v6_entry("ff0e::1", &[], &[2]); + pipeline.add_ingress_mcast_mcast_replication_v6_entry( + "set_port_bitmap", + &key_buf, + ¶m_buf, + 0, + ); + + let mut npu = SoftNpu::new(3, pipeline, false); + let phy0 = npu.phy(0); + let phy1 = npu.phy(1); + let phy2 = npu.phy(2); + + npu.run(); + + // mcast_tag=0 (External): suppresses underlay. External is empty. No copies. + let ip_data = geneve_mcast_v4_pkt_repl( + "fd00:1::1", + "ff0e::1", + "10.0.0.1", + "238.1.1.1", + Some(MCAST_TAG_EXTERNAL), + ); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV6, &ip_data)])?; + + // Sentinel: send a plain unicast packet that routes to port 1 via the + // default 0.0.0.0/0 entry in pipeline_init. Once it arrives we know + // the pipeline has drained all earlier packets. + let mut sentinel = vec![0u8; 28]; + let mut sip = MutableIpv4Packet::new(&mut sentinel).unwrap(); + sip.set_version(4); + sip.set_header_length(5); + sip.set_source("10.0.0.1".parse().unwrap()); + sip.set_destination("8.8.8.8".parse().unwrap()); + sip.set_total_length(28); + sip.set_next_level_protocol(IpNextHeaderProtocol::new(17)); + sip.set_ttl(64); + phy0.send(&[TxFrame::new(phy1.mac, ETHERTYPE_IPV4, &sentinel)])?; + + // Wait for the sentinel to arrive at port 1, then check mcast ports. + // Port 1 receives 2 frames: the mcast packet unicast-routed via the + // router table (bitmap merged to 0, no replication) plus the sentinel. + // The key assertion is that port 2 (underlay) receives nothing. + wait_for(|| phy1.recv_buffer_len() > 0, "sentinel on port 1"); + assert_eq!( + phy1.recv_buffer_len(), + 2, + "unicast-routed mcast packet + sentinel on port 1" + ); + assert_eq!( + phy2.recv_buffer_len(), + 0, + "port 2 should not receive (underlay suppressed)" + ); + + Ok(()) +} + +// Create an entry for the multipath cidr -> index table +fn router_idx_entry( + dst: &str, + prefix_len: u8, + idx: u16, + slots: u8, +) -> (Vec, Vec) { + let mut key_buf = match dst.parse().unwrap() { + IpAddr::V4(a) => a.octets().to_vec(), + IpAddr::V6(a) => a.octets().to_vec(), + }; + key_buf.push(prefix_len); + + let mut param_buf = idx.to_le_bytes().to_vec(); + let slots_buf = slots.to_le_bytes().to_vec(); + param_buf.extend_from_slice(&slots_buf); + + (key_buf, param_buf) +} + +// Create an entry for the multipath index -> forwarding data table +fn router_forward_entry( + idx: u16, + gw: &str, + port: u16, + route_ttl_is_1: u8, +) -> (Vec, Vec) { + let mut key_buf = idx.to_le_bytes().to_vec(); + key_buf.push(route_ttl_is_1); + + let mut param_buf = port.to_le_bytes().to_vec(); + + let mut nexthop_buf = match gw.parse().unwrap() { + IpAddr::V4(a) => a.octets().to_vec(), + IpAddr::V6(a) => a.octets().to_vec(), + }; + nexthop_buf.reverse(); + param_buf.extend_from_slice(&nexthop_buf); + + (key_buf, param_buf) +} + +// Create a route entry that drops packets with TTL==1. +// The key matches on (path_idx, route_ttl_is_1=1) with the ttl_exceeded +// action which has no parameters. +fn router_ttl_exceeded_entry(idx: u16) -> (Vec, Vec) { + let mut key_buf = idx.to_le_bytes().to_vec(); + key_buf.push(1); + (key_buf, Vec::new()) +} + +fn nat4_entry( + addr: &str, + begin: u16, + end: u16, + target: &str, + vni: u32, + mac: [u8; 6], +) -> (Vec, Vec) { + let addr: Ipv4Addr = addr.parse().unwrap(); + let target: Ipv6Addr = target.parse().unwrap(); + + let mut key_buf = Vec::new(); + let mut buf = addr.octets().to_vec(); + buf.reverse(); + key_buf.extend_from_slice(&buf); + key_buf.extend_from_slice(&begin.to_le_bytes()); + key_buf.extend_from_slice(&end.to_le_bytes()); + + let mut param_buf = Vec::new(); + let mut buf = target.octets().to_vec(); + buf.reverse(); + param_buf.extend_from_slice(&buf); + param_buf.extend_from_slice(&vni.to_le_bytes()[..3]); + param_buf.extend_from_slice(&mac); + + (key_buf, param_buf) +} + +fn local6_entry(addr: &str) -> (Vec, Vec) { + let addr: Ipv6Addr = addr.parse().unwrap(); + let mut key_buf = addr.octets().to_vec(); + key_buf.reverse(); + + (key_buf, Vec::new()) +} + +fn resolver4_entry(addr: &str, mac: [u8; 6]) -> (Vec, Vec) { + let addr: Ipv4Addr = addr.parse().unwrap(); + let mut key_buf = addr.octets().to_vec(); + key_buf.reverse(); + + (key_buf, mac.to_vec()) +} + +fn resolver6_entry(addr: &str, mac: [u8; 6]) -> (Vec, Vec) { + let addr: Ipv6Addr = addr.parse().unwrap(); + let mut key_buf = addr.octets().to_vec(); + key_buf.reverse(); + + (key_buf, mac.to_vec()) +} + +fn mac_rewrite_entry(port: u16, mac: [u8; 6]) -> (Vec, Vec) { + let key_buf = port.to_le_bytes().to_vec(); + let param_buf = mac.to_vec(); + + (key_buf, param_buf) +} + +// Build a port bitmap for use as action parameter_data. +// With Msb0 ordering, bit index N in the BitVec corresponds to port N. +// The bitmap is 128 bits (16 bytes) to match the P4 `bit<128>` field. +fn port_bitmap(ports: &[u16]) -> Vec { + let byte_len = 16; + let mut bm = vec![0u8; byte_len]; + for &p in ports { + let byte_idx = (p / 8) as usize; + let bit_idx = 7 - (p % 8); + assert!(byte_idx < byte_len, "port {p} exceeds bitmap width"); + bm[byte_idx] |= 1 << bit_idx; + } + bm +} + +// Multicast replication bitmap lookup for IPv6 outer destination. +// +// Key matches hdr.ipv6.dst, stored in wire order (not reversed). +// Action has two parameters: external bitmap and underlay bitmap. +fn mcast_replication_v6_entry( + dst: &str, + external_ports: &[u16], + underlay_ports: &[u16], +) -> (Vec, Vec) { + let addr: Ipv6Addr = dst.parse().unwrap(); + let mut key_buf = addr.octets().to_vec(); + key_buf.reverse(); + + let mut param_buf = port_bitmap(external_ports); + param_buf.extend_from_slice(&port_bitmap(underlay_ports)); + + (key_buf, param_buf) +} + +// Multicast replication bitmap lookup for IPv4 outer destination. +fn mcast_replication_v4_entry( + dst: &str, + external_ports: &[u16], + underlay_ports: &[u16], +) -> (Vec, Vec) { + let addr: Ipv4Addr = dst.parse().unwrap(); + let mut key_buf = addr.octets().to_vec(); + key_buf.reverse(); + + let mut param_buf = port_bitmap(external_ports); + param_buf.extend_from_slice(&port_bitmap(underlay_ports)); + + (key_buf, param_buf) +} + +// Source filter entry for inner IPv4 multicast. +// Key: inner src (LPM, wire order) + inner dst (exact, wire order). +// Both are header fields, so bytes match packet wire order. +fn mcast_source_filter_v4_entry( + src: &str, + src_prefix_len: u8, + dst: &str, +) -> (Vec, Vec) { + let src_addr: Ipv4Addr = src.parse().unwrap(); + let dst_addr: Ipv4Addr = dst.parse().unwrap(); + + // LPM key: network order (IpAddr::from expects BE) + let mut key_buf = src_addr.octets().to_vec(); + key_buf.push(src_prefix_len); + // Exact key: reversed to match confused-endian header storage + let mut dst_bytes = dst_addr.octets().to_vec(); + dst_bytes.reverse(); + key_buf.extend_from_slice(&dst_bytes); + + (key_buf, Vec::new()) +} + +// Source filter entry for inner IPv6 multicast. +fn mcast_source_filter_v6_entry( + src: &str, + src_prefix_len: u8, + dst: &str, +) -> (Vec, Vec) { + let src_addr: Ipv6Addr = src.parse().unwrap(); + let dst_addr: Ipv6Addr = dst.parse().unwrap(); + + // LPM key: network order (IpAddr::from expects BE) + let mut key_buf = src_addr.octets().to_vec(); + key_buf.push(src_prefix_len); + // Exact key: reversed to match confused-endian header storage + let mut dst_bytes = dst_addr.octets().to_vec(); + dst_bytes.reverse(); + key_buf.extend_from_slice(&dst_bytes); + + (key_buf, Vec::new()) +} + +// Egress decap entry for multicast replicated copies. +fn mcast_egress_decap_entry(port: u16) -> (Vec, Vec) { + let key_buf = port.to_le_bytes().to_vec(); + (key_buf, Vec::new()) +} + +fn mcast_egress_decap_vlan_entry( + port: u16, + vlan_id: u16, +) -> (Vec, Vec) { + let key_buf = port.to_le_bytes().to_vec(); + let param_buf = vlan_id.to_le_bytes().to_vec(); + (key_buf, param_buf) +} + +/// Wrap an inner ethernet frame in geneve-over-IPv6 with an Oxide +/// external tag. When `mcast_tag` is `Some(v)`, also includes the +/// oxg_mcast option (0 = External, 1 = Underlay, 2 = Both). +fn wrap_geneve( + outer_src: &str, + outer_dst: &str, + inner_eth: &[u8], + mcast_tag: Option, +) -> Vec { + let opt_chunks: u8 = if mcast_tag.is_some() { 3 } else { 1 }; + // Geneve byte 0: version(2 bits) = 0, opt_len(6 bits) = opt_chunks. + let geneve_byte0 = opt_chunks & 0x3f; + let proto = GENEVE_PROTO_ETH.to_be_bytes(); + let oxg = OXG_OPTION_CLASS.to_be_bytes(); + + let mut geneve_data: Vec = vec![ + geneve_byte0, + 0x00, + proto[0], + proto[1], + 0x00, + 0x00, + 0x01, + 0x00, + ]; + geneve_data.extend_from_slice(&[oxg[0], oxg[1], 0x00, 0x00]); + + if let Some(tag) = mcast_tag { + geneve_data.extend_from_slice(&[oxg[0], oxg[1], 0x01, 0x01]); + let mcast_byte0 = tag << 6; + geneve_data.extend_from_slice(&[mcast_byte0, 0x00, 0x00, 0x00]); + } + + geneve_data.extend_from_slice(inner_eth); + + let mut udp_data = vec![0u8; 8 + geneve_data.len()]; + let mut udp = MutableUdpPacket::new(&mut udp_data).unwrap(); + udp.set_source(100); + udp.set_destination(GENEVE_UDP_PORT); + udp.set_checksum(0); + udp.set_payload(&geneve_data); + + let mut ip_data = vec![0u8; 40 + udp_data.len()]; + let mut ip = MutableIpv6Packet::new(&mut ip_data).unwrap(); + ip.set_version(6); + ip.set_source(outer_src.parse().unwrap()); + ip.set_destination(outer_dst.parse().unwrap()); + ip.set_payload_length(udp_data.len() as u16); + ip.set_payload(&udp_data); + ip.set_next_header(IpNextHeaderProtocol::new(17)); + ip.set_hop_limit(64); + + ip_data +} + +/// Inner IPv4/ICMP multicast ethernet frame. +fn inner_mcast_v4_frame(src: &str, dst: &str) -> Vec { + let mut icmp_data = vec![0u8; 8]; + let mut icmp = MutableIcmpPacket::new(&mut icmp_data).unwrap(); + icmp.set_payload([0x04, 0x17, 0x00, 0x00].as_slice()); + + let mut inner_ip_data = vec![0u8; 28]; + let mut inner_ip = MutableIpv4Packet::new(&mut inner_ip_data).unwrap(); + inner_ip.set_version(4); + inner_ip.set_header_length(5); + inner_ip.set_source(src.parse().unwrap()); + inner_ip.set_destination(dst.parse().unwrap()); + inner_ip.set_next_level_protocol(IpNextHeaderProtocol::new(1)); + inner_ip.set_total_length(28); + inner_ip.set_ttl(64); + inner_ip.set_payload(&icmp_data); + let csum = pnet::packet::ipv4::checksum(&inner_ip.to_immutable()); + inner_ip.set_checksum(csum); + + let d: Ipv4Addr = dst.parse().unwrap(); + let o = d.octets(); + let mut eth_data = vec![0u8; 14 + inner_ip_data.len()]; + let mut eth = MutableEthernetPacket::new(&mut eth_data).unwrap(); + eth.set_destination(MacAddr::new( + 0x01, + 0x00, + 0x5e, + o[1] & 0x7f, + o[2], + o[3], + )); + eth.set_source(MacAddr::new(0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff)); + eth.set_ethertype(EtherType(ETHERTYPE_IPV4)); + eth.set_payload(&inner_ip_data); + + eth_data +} + +/// Inner IPv6/ICMPv6 multicast ethernet frame. +fn inner_mcast_v6_frame(src: &str, dst: &str) -> Vec { + let icmpv6_data = [0x80u8, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01]; + + let mut inner_ip_data = vec![0u8; 40 + icmpv6_data.len()]; + let mut inner_ip = MutableIpv6Packet::new(&mut inner_ip_data).unwrap(); + inner_ip.set_version(6); + inner_ip.set_source(src.parse().unwrap()); + inner_ip.set_destination(dst.parse().unwrap()); + inner_ip.set_payload_length(icmpv6_data.len() as u16); + inner_ip.set_next_header(IpNextHeaderProtocol::new(58)); + inner_ip.set_hop_limit(64); + inner_ip.set_payload(&icmpv6_data); + + let d: Ipv6Addr = dst.parse().unwrap(); + let o = d.octets(); + let mut eth_data = vec![0u8; 14 + inner_ip_data.len()]; + let mut eth = MutableEthernetPacket::new(&mut eth_data).unwrap(); + eth.set_destination(MacAddr::new(0x33, 0x33, o[12], o[13], o[14], o[15])); + eth.set_source(MacAddr::new(0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff)); + eth.set_ethertype(EtherType(ETHERTYPE_IPV6)); + eth.set_payload(&inner_ip_data); + + eth_data +} + +/// Inner IPv4/UDP multicast ethernet frame. +fn inner_mcast_v4_udp_frame( + src: &str, + dst: &str, + udp_src: u16, + udp_dst: u16, +) -> Vec { + let mut inner_udp_data = vec![0u8; 12]; + let mut iudp = MutableUdpPacket::new(&mut inner_udp_data).unwrap(); + iudp.set_source(udp_src); + iudp.set_destination(udp_dst); + iudp.set_length(12); + iudp.set_payload(&[0xde, 0xad, 0xbe, 0xef]); + + let inner_ip_len = 20 + inner_udp_data.len(); + let mut inner_ip_data = vec![0u8; inner_ip_len]; + let mut inner_ip = MutableIpv4Packet::new(&mut inner_ip_data).unwrap(); + inner_ip.set_version(4); + inner_ip.set_header_length(5); + inner_ip.set_source(src.parse().unwrap()); + inner_ip.set_destination(dst.parse().unwrap()); + inner_ip.set_next_level_protocol(IpNextHeaderProtocol::new(17)); + inner_ip.set_total_length(inner_ip_len as u16); + inner_ip.set_ttl(64); + inner_ip.set_payload(&inner_udp_data); + + let d: Ipv4Addr = dst.parse().unwrap(); + let o = d.octets(); + let mut eth_data = vec![0u8; 14 + inner_ip_data.len()]; + let mut eth = MutableEthernetPacket::new(&mut eth_data).unwrap(); + eth.set_destination(MacAddr::new( + 0x01, + 0x00, + 0x5e, + o[1] & 0x7f, + o[2], + o[3], + )); + eth.set_source(MacAddr::new(0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff)); + eth.set_ethertype(EtherType(ETHERTYPE_IPV4)); + eth.set_payload(&inner_ip_data); + + eth_data +} + +/// Geneve-over-IPv6 with inner IPv4/ICMP, no mcast tag. +fn geneve_mcast_v4_pkt( + outer_src: &str, + outer_dst: &str, + inner_src: &str, + inner_dst: &str, +) -> Vec { + geneve_mcast_v4_pkt_repl(outer_src, outer_dst, inner_src, inner_dst, None) +} + +/// Geneve-over-IPv6 with inner IPv4/ICMP and optional mcast tag. +fn geneve_mcast_v4_pkt_repl( + outer_src: &str, + outer_dst: &str, + inner_src: &str, + inner_dst: &str, + mcast_tag: Option, +) -> Vec { + let eth = inner_mcast_v4_frame(inner_src, inner_dst); + wrap_geneve(outer_src, outer_dst, ð, mcast_tag) +} + +/// Geneve-over-IPv6 with inner IPv6/ICMPv6. +fn geneve_mcast_v6_inner_pkt( + outer_src: &str, + outer_dst: &str, + inner_src: &str, + inner_dst: &str, +) -> Vec { + let eth = inner_mcast_v6_frame(inner_src, inner_dst); + wrap_geneve(outer_src, outer_dst, ð, None) }