L3 转发¶
一台交换机上的两台主机,数据平面通过启动时由 Python 编程下发的
ipv4_lpm 表来转发。
你将看到什么¶
pingall 成功,因为控制器在 Shell 进入之前为两台主机各装了一条
/32 路由。
拓扑¶
examples/l3_forwarding/topology.py:
"""Two hosts on a /24, ipv4_lpm forwarding programmed via P4Runtime.
The P4 program (`ipv4_lpm.p4`) defines a single LPM table that matches the
destination IPv4 address and sets an egress port. Forwarding entries are
installed at runtime by `setup(net)`, which also pre-seeds static ARP so
ICMP unicast does not have to resolve neighbours at test time.
Run as root:
sudo python examples/l3_forwarding/topology.py
sudo p4net examples/l3_forwarding/topology.py
"""
from __future__ import annotations
from pathlib import Path
from p4net import Network
from p4net.topo import Topology
HERE = Path(__file__).resolve().parent
topology = Topology()
h1 = topology.add_host("h1", ip="10.0.0.1/24", mac="00:00:00:00:00:01")
h2 = topology.add_host("h2", ip="10.0.0.2/24", mac="00:00:00:00:00:02")
s1 = topology.add_switch("s1", p4_src=HERE / "ipv4_lpm.p4")
topology.add_link(h1, s1, port_b=1)
topology.add_link(h2, s1, port_b=2)
def setup(net: Network) -> None:
"""Pre-seed static ARP and install ipv4_lpm forwarding entries."""
h1 = net.host("h1")
h2 = net.host("h2")
h1.exec(
[
"ip",
"neigh",
"replace",
"10.0.0.2",
"lladdr",
"00:00:00:00:00:02",
"dev",
"h1-eth0",
"nud",
"permanent",
]
)
h2.exec(
[
"ip",
"neigh",
"replace",
"10.0.0.1",
"lladdr",
"00:00:00:00:00:01",
"dev",
"h2-eth0",
"nud",
"permanent",
]
)
s1 = net.switch("s1")
s1.client.insert_table_entry(
table="MyIngress.ipv4_lpm",
match={"hdr.ipv4.dstAddr": "10.0.0.1/32"},
action="MyIngress.set_egress_port",
params={"port": 1},
)
s1.client.insert_table_entry(
table="MyIngress.ipv4_lpm",
match={"hdr.ipv4.dstAddr": "10.0.0.2/32"},
action="MyIngress.set_egress_port",
params={"port": 2},
)
if __name__ == "__main__":
from p4net.cli.main import main
raise SystemExit(main([__file__]))
setup(net) 调用了两次 client.insert_table_entry(...)——每台
主机一次——表名使用 P4Info 中的全限定名。
P4 程序¶
examples/l3_forwarding/ipv4_lpm.p4:
#include <core.p4>
#include <v1model.p4>
header ethernet_t {
bit<48> dstAddr;
bit<48> srcAddr;
bit<16> etherType;
}
header ipv4_t {
bit<4> version;
bit<4> ihl;
bit<8> diffserv;
bit<16> totalLen;
bit<16> identification;
bit<3> flags;
bit<13> fragOffset;
bit<8> ttl;
bit<8> protocol;
bit<16> hdrChecksum;
bit<32> srcAddr;
bit<32> dstAddr;
}
const bit<16> ETHERTYPE_IPV4 = 0x0800;
struct headers {
ethernet_t ethernet;
ipv4_t ipv4;
}
struct metadata {}
parser MyParser(packet_in pkt, out headers hdr, inout metadata meta,
inout standard_metadata_t std) {
state start {
pkt.extract(hdr.ethernet);
transition select(hdr.ethernet.etherType) {
ETHERTYPE_IPV4: parse_ipv4;
default: accept;
}
}
state parse_ipv4 {
pkt.extract(hdr.ipv4);
transition accept;
}
}
control MyVerifyChecksum(inout headers hdr, inout metadata meta) { apply {} }
control MyIngress(inout headers hdr, inout metadata meta,
inout standard_metadata_t std) {
counter(256, CounterType.packets) ingress_pkts;
action drop() {
mark_to_drop(std);
}
action set_egress_port(bit<9> port) {
std.egress_spec = port;
ingress_pkts.count((bit<32>) port);
}
table ipv4_lpm {
key = {
hdr.ipv4.dstAddr: lpm;
}
actions = {
drop;
set_egress_port;
NoAction;
}
default_action = NoAction();
size = 1024;
}
apply {
if (hdr.ipv4.isValid()) {
ipv4_lpm.apply();
}
}
}
control MyEgress(inout headers hdr, inout metadata meta,
inout standard_metadata_t std) { apply {} }
control MyComputeChecksum(inout headers hdr, inout metadata meta) { apply {} }
control MyDeparser(packet_out pkt, in headers hdr) {
apply {
pkt.emit(hdr.ethernet);
pkt.emit(hdr.ipv4);
}
}
V1Switch(MyParser(), MyVerifyChecksum(), MyIngress(), MyEgress(),
MyComputeChecksum(), MyDeparser()) main;
入口控制只在存在 IPv4 报头时应用 ipv4_lpm——非 IPv4 流量
(例如 ARP)走默认 NoAction,无处可去。ARP 之所以能工作,是
因为 setup(net) 注入了静态条目。
运行¶
Shell 中:
p4net> s1 table dump MyIngress.ipv4_lpm
#0
table: MyIngress.ipv4_lpm
match: {'hdr.ipv4.dstAddr': '10.0.0.1/32'}
action: MyIngress.set_egress_port
params: {'port': '1'}
#1
table: MyIngress.ipv4_lpm
match: {'hdr.ipv4.dstAddr': '10.0.0.2/32'}
action: MyIngress.set_egress_port
params: {'port': '2'}
p4net> pingall
H \ H h1 h2
h1 - 1
h2 1 -
2/2 succeeded
p4net> s1 counter MyIngress.ingress_pkts 1
pkts=1 bytes=98
match 值渲染为 10.0.0.1/32——这是 decode_match 把 P4Runtime
规范字节还原为人类可读的 IPv4 字符串。
关键设计点¶
- 同一份数据平面可以承载 5 主机、100 主机或完全不同的 L3 设计—— 改变的只是表项编程,不动 P4。
s1.client.insert_table_entry(...)接受普通 Python 类型 (字符串、字典、整数);P4InfoIndex根据已加载的 P4Info 把 它们翻译成 P4Runtime 的 FieldMatch 与 Action proto。
可尝试的变体¶
- 加一台
10.0.0.3/24的主机,再装一条 LPM 表项。无需修改 P4。 - 把两条
/32替换为同一条覆盖整个子网、走相同出口的/24, 验证pingall仍然成功。 - 增加一台位于
10.1.0.0/24的主机,使用MyIngress.drop动作丢弃其流量——观察 LPM 从最长前缀向最短前缀解析的过程。