跳转至

L3 转发

一台交换机上的两台主机,数据平面通过启动时由 Python 编程下发的 ipv4_lpm 表来转发。

你将看到什么

pingall 成功,因为控制器在 Shell 进入之前为两台主机各装了一条 /32 路由。

拓扑

examples/l3_forwarding/topology.py

"""Two hosts on a /24, ipv4_lpm forwarding programmed via P4Runtime.

The P4 program (`ipv4_lpm.p4`) defines a single LPM table that matches the
destination IPv4 address and sets an egress port. Forwarding entries are
installed at runtime by `setup(net)`, which also pre-seeds static ARP so
ICMP unicast does not have to resolve neighbours at test time.

Run as root:

    sudo python examples/l3_forwarding/topology.py
    sudo p4net examples/l3_forwarding/topology.py
"""

from __future__ import annotations

from pathlib import Path

from p4net import Network
from p4net.topo import Topology

HERE = Path(__file__).resolve().parent

topology = Topology()
h1 = topology.add_host("h1", ip="10.0.0.1/24", mac="00:00:00:00:00:01")
h2 = topology.add_host("h2", ip="10.0.0.2/24", mac="00:00:00:00:00:02")
s1 = topology.add_switch("s1", p4_src=HERE / "ipv4_lpm.p4")
topology.add_link(h1, s1, port_b=1)
topology.add_link(h2, s1, port_b=2)


def setup(net: Network) -> None:
    """Pre-seed static ARP and install ipv4_lpm forwarding entries."""
    h1 = net.host("h1")
    h2 = net.host("h2")
    h1.exec(
        [
            "ip",
            "neigh",
            "replace",
            "10.0.0.2",
            "lladdr",
            "00:00:00:00:00:02",
            "dev",
            "h1-eth0",
            "nud",
            "permanent",
        ]
    )
    h2.exec(
        [
            "ip",
            "neigh",
            "replace",
            "10.0.0.1",
            "lladdr",
            "00:00:00:00:00:01",
            "dev",
            "h2-eth0",
            "nud",
            "permanent",
        ]
    )

    s1 = net.switch("s1")
    s1.client.insert_table_entry(
        table="MyIngress.ipv4_lpm",
        match={"hdr.ipv4.dstAddr": "10.0.0.1/32"},
        action="MyIngress.set_egress_port",
        params={"port": 1},
    )
    s1.client.insert_table_entry(
        table="MyIngress.ipv4_lpm",
        match={"hdr.ipv4.dstAddr": "10.0.0.2/32"},
        action="MyIngress.set_egress_port",
        params={"port": 2},
    )


if __name__ == "__main__":
    from p4net.cli.main import main

    raise SystemExit(main([__file__]))

setup(net) 调用了两次 client.insert_table_entry(...)——每台 主机一次——表名使用 P4Info 中的全限定名。

P4 程序

examples/l3_forwarding/ipv4_lpm.p4

#include <core.p4>
#include <v1model.p4>

header ethernet_t {
    bit<48> dstAddr;
    bit<48> srcAddr;
    bit<16> etherType;
}

header ipv4_t {
    bit<4>  version;
    bit<4>  ihl;
    bit<8>  diffserv;
    bit<16> totalLen;
    bit<16> identification;
    bit<3>  flags;
    bit<13> fragOffset;
    bit<8>  ttl;
    bit<8>  protocol;
    bit<16> hdrChecksum;
    bit<32> srcAddr;
    bit<32> dstAddr;
}

const bit<16> ETHERTYPE_IPV4 = 0x0800;

struct headers {
    ethernet_t ethernet;
    ipv4_t     ipv4;
}

struct metadata {}

parser MyParser(packet_in pkt, out headers hdr, inout metadata meta,
                inout standard_metadata_t std) {
    state start {
        pkt.extract(hdr.ethernet);
        transition select(hdr.ethernet.etherType) {
            ETHERTYPE_IPV4: parse_ipv4;
            default: accept;
        }
    }
    state parse_ipv4 {
        pkt.extract(hdr.ipv4);
        transition accept;
    }
}

control MyVerifyChecksum(inout headers hdr, inout metadata meta) { apply {} }

control MyIngress(inout headers hdr, inout metadata meta,
                  inout standard_metadata_t std) {
    counter(256, CounterType.packets) ingress_pkts;

    action drop() {
        mark_to_drop(std);
    }

    action set_egress_port(bit<9> port) {
        std.egress_spec = port;
        ingress_pkts.count((bit<32>) port);
    }

    table ipv4_lpm {
        key = {
            hdr.ipv4.dstAddr: lpm;
        }
        actions = {
            drop;
            set_egress_port;
            NoAction;
        }
        default_action = NoAction();
        size = 1024;
    }

    apply {
        if (hdr.ipv4.isValid()) {
            ipv4_lpm.apply();
        }
    }
}

control MyEgress(inout headers hdr, inout metadata meta,
                 inout standard_metadata_t std) { apply {} }

control MyComputeChecksum(inout headers hdr, inout metadata meta) { apply {} }

control MyDeparser(packet_out pkt, in headers hdr) {
    apply {
        pkt.emit(hdr.ethernet);
        pkt.emit(hdr.ipv4);
    }
}

V1Switch(MyParser(), MyVerifyChecksum(), MyIngress(), MyEgress(),
         MyComputeChecksum(), MyDeparser()) main;

入口控制只在存在 IPv4 报头时应用 ipv4_lpm——非 IPv4 流量 (例如 ARP)走默认 NoAction,无处可去。ARP 之所以能工作,是 因为 setup(net) 注入了静态条目。

运行

sudo p4net examples/l3_forwarding/topology.py

Shell 中:

p4net> s1 table dump MyIngress.ipv4_lpm
#0
  table:    MyIngress.ipv4_lpm
  match:    {'hdr.ipv4.dstAddr': '10.0.0.1/32'}
  action:   MyIngress.set_egress_port
  params:   {'port': '1'}
#1
  table:    MyIngress.ipv4_lpm
  match:    {'hdr.ipv4.dstAddr': '10.0.0.2/32'}
  action:   MyIngress.set_egress_port
  params:   {'port': '2'}

p4net> pingall
H \ H   h1   h2
   h1    -    1
   h2    1    -
2/2 succeeded

p4net> s1 counter MyIngress.ingress_pkts 1
pkts=1 bytes=98

match 值渲染为 10.0.0.1/32——这是 decode_match 把 P4Runtime 规范字节还原为人类可读的 IPv4 字符串。

关键设计点

  • 同一份数据平面可以承载 5 主机、100 主机或完全不同的 L3 设计—— 改变的只是表项编程,不动 P4。
  • s1.client.insert_table_entry(...) 接受普通 Python 类型 (字符串、字典、整数);P4InfoIndex 根据已加载的 P4Info 把 它们翻译成 P4Runtime 的 FieldMatch 与 Action proto。

可尝试的变体

  • 加一台 10.0.0.3/24 的主机,再装一条 LPM 表项。无需修改 P4。
  • 把两条 /32 替换为同一条覆盖整个子网、走相同出口的 /24, 验证 pingall 仍然成功。
  • 增加一台位于 10.1.0.0/24 的主机,使用 MyIngress.drop 动作丢弃其流量——观察 LPM 从最长前缀向最短前缀解析的过程。