2 * Copyright (C) 2016-2018 Netronome Systems, Inc.
4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this
6 * source tree or the BSD 2-Clause License provided below. You have the
7 * option to license this software under the complete terms of either license.
9 * The BSD 2-Clause License:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * 1. Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * 2. Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #define pr_fmt(fmt) "NFP net bpf: " fmt
36 #include <linux/bug.h>
37 #include <linux/bpf.h>
38 #include <linux/filter.h>
39 #include <linux/kernel.h>
40 #include <linux/pkt_cls.h>
41 #include <linux/reciprocal_div.h>
42 #include <linux/unistd.h>
45 #include "../nfp_asm.h"
46 #include "../nfp_net_ctrl.h"
48 /* --- NFP prog --- */
49 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
50 * It's safe to modify the next pointers (but not pos).
52 #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \
53 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
54 next = list_next_entry(pos, l); \
55 &(nfp_prog)->insns != &pos->l && \
56 &(nfp_prog)->insns != &next->l; \
57 pos = nfp_meta_next(pos), \
58 next = nfp_meta_next(pos))
60 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \
61 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
62 next = list_next_entry(pos, l), \
63 next2 = list_next_entry(next, l); \
64 &(nfp_prog)->insns != &pos->l && \
65 &(nfp_prog)->insns != &next->l && \
66 &(nfp_prog)->insns != &next2->l; \
67 pos = nfp_meta_next(pos), \
68 next = nfp_meta_next(pos), \
69 next2 = nfp_meta_next(next))
72 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
74 return meta->l.prev != &nfp_prog->insns;
77 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
79 if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) {
80 pr_warn("instruction limit reached (%u NFP instructions)\n",
82 nfp_prog->error = -ENOSPC;
86 nfp_prog->prog[nfp_prog->prog_len] = insn;
90 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
92 return nfp_prog->prog_len;
96 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
98 /* If there is a recorded error we may have dropped instructions;
99 * that doesn't have to be due to translator bug, and the translation
100 * will fail anyway, so just return OK.
104 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
107 /* --- Emitters --- */
109 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
110 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
115 insn = FIELD_PREP(OP_CMD_A_SRC, areg) |
116 FIELD_PREP(OP_CMD_CTX, ctx) |
117 FIELD_PREP(OP_CMD_B_SRC, breg) |
118 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
119 FIELD_PREP(OP_CMD_XFER, xfer) |
120 FIELD_PREP(OP_CMD_CNT, size) |
121 FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
122 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
123 FIELD_PREP(OP_CMD_INDIR, indir) |
124 FIELD_PREP(OP_CMD_MODE, mode);
126 nfp_prog_push(nfp_prog, insn);
130 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
131 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
133 struct nfp_insn_re_regs reg;
136 err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false);
138 nfp_prog->error = err;
142 pr_err("cmd can't swap arguments\n");
143 nfp_prog->error = -EFAULT;
146 if (reg.dst_lmextn || reg.src_lmextn) {
147 pr_err("cmd can't use LMextn\n");
148 nfp_prog->error = -EFAULT;
152 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
157 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
158 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
160 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
164 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
165 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
167 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
171 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
172 enum br_ctx_signal_state css, u16 addr, u8 defer)
174 u16 addr_lo, addr_hi;
177 addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
178 addr_hi = addr != addr_lo;
181 FIELD_PREP(OP_BR_MASK, mask) |
182 FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
183 FIELD_PREP(OP_BR_CSS, css) |
184 FIELD_PREP(OP_BR_DEFBR, defer) |
185 FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
186 FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
188 nfp_prog_push(nfp_prog, insn);
192 emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer,
193 enum nfp_relo_type relo)
195 if (mask == BR_UNC && defer > 2) {
196 pr_err("BUG: branch defer out of bounds %d\n", defer);
197 nfp_prog->error = -EFAULT;
201 __emit_br(nfp_prog, mask,
202 mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
203 BR_CSS_NONE, addr, defer);
205 nfp_prog->prog[nfp_prog->prog_len - 1] |=
206 FIELD_PREP(OP_RELO_TYPE, relo);
210 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
212 emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
216 __emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
217 bool set, bool src_lmextn)
219 u16 addr_lo, addr_hi;
222 addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
223 addr_hi = addr != addr_lo;
225 insn = OP_BR_BIT_BASE |
226 FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
227 FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
228 FIELD_PREP(OP_BR_BIT_BV, set) |
229 FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
230 FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
231 FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
232 FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
234 nfp_prog_push(nfp_prog, insn);
238 emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
239 u8 defer, bool set, enum nfp_relo_type relo)
241 struct nfp_insn_re_regs reg;
244 /* NOTE: The bit to test is specified as an rotation amount, such that
245 * the bit to test will be placed on the MSB of the result when
246 * doing a rotate right. For bit X, we need right rotate X + 1.
250 err = swreg_to_restricted(reg_none(), src, reg_imm(bit), ®, false);
252 nfp_prog->error = err;
256 __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
259 nfp_prog->prog[nfp_prog->prog_len - 1] |=
260 FIELD_PREP(OP_RELO_TYPE, relo);
264 emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
266 emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
270 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
271 enum immed_width width, bool invert,
272 enum immed_shift shift, bool wr_both,
273 bool dst_lmextn, bool src_lmextn)
277 insn = OP_IMMED_BASE |
278 FIELD_PREP(OP_IMMED_A_SRC, areg) |
279 FIELD_PREP(OP_IMMED_B_SRC, breg) |
280 FIELD_PREP(OP_IMMED_IMM, imm_hi) |
281 FIELD_PREP(OP_IMMED_WIDTH, width) |
282 FIELD_PREP(OP_IMMED_INV, invert) |
283 FIELD_PREP(OP_IMMED_SHIFT, shift) |
284 FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
285 FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
286 FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
288 nfp_prog_push(nfp_prog, insn);
292 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
293 enum immed_width width, bool invert, enum immed_shift shift)
295 struct nfp_insn_ur_regs reg;
298 if (swreg_type(dst) == NN_REG_IMM) {
299 nfp_prog->error = -EFAULT;
303 err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®);
305 nfp_prog->error = err;
309 /* Use reg.dst when destination is No-Dest. */
310 __emit_immed(nfp_prog,
311 swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
312 reg.breg, imm >> 8, width, invert, shift,
313 reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
317 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
318 enum shf_sc sc, u8 shift,
319 u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
320 bool dst_lmextn, bool src_lmextn)
324 if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
325 nfp_prog->error = -EFAULT;
329 /* NFP shift instruction has something special. If shift direction is
330 * left then shift amount of 1 to 31 is specified as 32 minus the amount
333 * But no need to do this for indirect shift which has shift amount be
334 * 0. Even after we do this subtraction, shift amount 0 will be turned
335 * into 32 which will eventually be encoded the same as 0 because only
336 * low 5 bits are encoded, but shift amount be 32 will fail the
337 * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of
340 if (sc == SHF_SC_L_SHF && shift)
344 FIELD_PREP(OP_SHF_A_SRC, areg) |
345 FIELD_PREP(OP_SHF_SC, sc) |
346 FIELD_PREP(OP_SHF_B_SRC, breg) |
347 FIELD_PREP(OP_SHF_I8, i8) |
348 FIELD_PREP(OP_SHF_SW, sw) |
349 FIELD_PREP(OP_SHF_DST, dst) |
350 FIELD_PREP(OP_SHF_SHIFT, shift) |
351 FIELD_PREP(OP_SHF_OP, op) |
352 FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
353 FIELD_PREP(OP_SHF_WR_AB, wr_both) |
354 FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
355 FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
357 nfp_prog_push(nfp_prog, insn);
361 emit_shf(struct nfp_prog *nfp_prog, swreg dst,
362 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
364 struct nfp_insn_re_regs reg;
367 err = swreg_to_restricted(dst, lreg, rreg, ®, true);
369 nfp_prog->error = err;
373 __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
374 reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
375 reg.dst_lmextn, reg.src_lmextn);
379 emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
380 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
382 if (sc == SHF_SC_R_ROT) {
383 pr_err("indirect shift is not allowed on rotation\n");
384 nfp_prog->error = -EFAULT;
388 emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
392 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
393 u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
394 bool dst_lmextn, bool src_lmextn)
399 FIELD_PREP(OP_ALU_A_SRC, areg) |
400 FIELD_PREP(OP_ALU_B_SRC, breg) |
401 FIELD_PREP(OP_ALU_DST, dst) |
402 FIELD_PREP(OP_ALU_SW, swap) |
403 FIELD_PREP(OP_ALU_OP, op) |
404 FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
405 FIELD_PREP(OP_ALU_WR_AB, wr_both) |
406 FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
407 FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
409 nfp_prog_push(nfp_prog, insn);
413 emit_alu(struct nfp_prog *nfp_prog, swreg dst,
414 swreg lreg, enum alu_op op, swreg rreg)
416 struct nfp_insn_ur_regs reg;
419 err = swreg_to_unrestricted(dst, lreg, rreg, ®);
421 nfp_prog->error = err;
425 __emit_alu(nfp_prog, reg.dst, reg.dst_ab,
426 reg.areg, op, reg.breg, reg.swap, reg.wr_both,
427 reg.dst_lmextn, reg.src_lmextn);
431 __emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg,
432 enum mul_type type, enum mul_step step, u16 breg, bool swap,
433 bool wr_both, bool dst_lmextn, bool src_lmextn)
438 FIELD_PREP(OP_MUL_A_SRC, areg) |
439 FIELD_PREP(OP_MUL_B_SRC, breg) |
440 FIELD_PREP(OP_MUL_STEP, step) |
441 FIELD_PREP(OP_MUL_DST_AB, dst_ab) |
442 FIELD_PREP(OP_MUL_SW, swap) |
443 FIELD_PREP(OP_MUL_TYPE, type) |
444 FIELD_PREP(OP_MUL_WR_AB, wr_both) |
445 FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) |
446 FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn);
448 nfp_prog_push(nfp_prog, insn);
452 emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type,
453 enum mul_step step, swreg rreg)
455 struct nfp_insn_ur_regs reg;
459 if (type == MUL_TYPE_START && step != MUL_STEP_NONE) {
460 nfp_prog->error = -EINVAL;
464 if (step == MUL_LAST || step == MUL_LAST_2) {
465 /* When type is step and step Number is LAST or LAST2, left
466 * source is used as destination.
468 err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®);
471 err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®);
476 nfp_prog->error = err;
480 __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap,
481 reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
485 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
486 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
487 bool zero, bool swap, bool wr_both,
488 bool dst_lmextn, bool src_lmextn)
493 FIELD_PREP(OP_LDF_A_SRC, areg) |
494 FIELD_PREP(OP_LDF_SC, sc) |
495 FIELD_PREP(OP_LDF_B_SRC, breg) |
496 FIELD_PREP(OP_LDF_I8, imm8) |
497 FIELD_PREP(OP_LDF_SW, swap) |
498 FIELD_PREP(OP_LDF_ZF, zero) |
499 FIELD_PREP(OP_LDF_BMASK, bmask) |
500 FIELD_PREP(OP_LDF_SHF, shift) |
501 FIELD_PREP(OP_LDF_WR_AB, wr_both) |
502 FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
503 FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
505 nfp_prog_push(nfp_prog, insn);
509 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
510 enum shf_sc sc, u8 shift, bool zero)
512 struct nfp_insn_re_regs reg;
515 /* Note: ld_field is special as it uses one of the src regs as dst */
516 err = swreg_to_restricted(dst, dst, src, ®, true);
518 nfp_prog->error = err;
522 __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
523 reg.i8, zero, reg.swap, reg.wr_both,
524 reg.dst_lmextn, reg.src_lmextn);
528 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
529 enum shf_sc sc, u8 shift)
531 emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
535 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
536 bool dst_lmextn, bool src_lmextn)
540 insn = OP_LCSR_BASE |
541 FIELD_PREP(OP_LCSR_A_SRC, areg) |
542 FIELD_PREP(OP_LCSR_B_SRC, breg) |
543 FIELD_PREP(OP_LCSR_WRITE, wr) |
544 FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
545 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
546 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
548 nfp_prog_push(nfp_prog, insn);
551 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
553 struct nfp_insn_ur_regs reg;
556 /* This instruction takes immeds instead of reg_none() for the ignored
557 * operand, but we can't encode 2 immeds in one instr with our normal
558 * swreg infra so if param is an immed, we encode as reg_none() and
559 * copy the immed to both operands.
561 if (swreg_type(src) == NN_REG_IMM) {
562 err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®);
565 err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®);
568 nfp_prog->error = err;
572 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
573 false, reg.src_lmextn);
576 /* CSR value is read in following immed[gpr, 0] */
577 static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
579 __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
582 static void emit_nop(struct nfp_prog *nfp_prog)
584 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
587 /* --- Wrappers --- */
588 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
590 if (!(imm & 0xffff0000)) {
592 *shift = IMMED_SHIFT_0B;
593 } else if (!(imm & 0xff0000ff)) {
595 *shift = IMMED_SHIFT_1B;
596 } else if (!(imm & 0x0000ffff)) {
598 *shift = IMMED_SHIFT_2B;
606 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
608 enum immed_shift shift;
611 if (pack_immed(imm, &val, &shift)) {
612 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
613 } else if (pack_immed(~imm, &val, &shift)) {
614 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
616 emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
617 false, IMMED_SHIFT_0B);
618 emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
619 false, IMMED_SHIFT_2B);
624 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
625 enum nfp_relo_type relo)
628 pr_err("relocation of a large immediate!\n");
629 nfp_prog->error = -EFAULT;
632 emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
634 nfp_prog->prog[nfp_prog->prog_len - 1] |=
635 FIELD_PREP(OP_RELO_TYPE, relo);
638 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
639 * If the @imm is small enough encode it directly in operand and return
640 * otherwise load @imm to a spare register and return its encoding.
642 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
644 if (FIELD_FIT(UR_REG_IMM_MAX, imm))
647 wrp_immed(nfp_prog, tmp_reg, imm);
651 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
652 * If the @imm is small enough encode it directly in operand and return
653 * otherwise load @imm to a spare register and return its encoding.
655 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
657 if (FIELD_FIT(RE_REG_IMM_MAX, imm))
660 wrp_immed(nfp_prog, tmp_reg, imm);
664 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
670 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
672 emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
675 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
677 wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
680 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
681 * result to @dst from low end.
684 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
687 enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
688 u8 mask = (1 << field_len) - 1;
690 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
693 /* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
694 * result to @dst from offset, there is no change on the other bits of @dst.
697 wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
698 u8 field_len, u8 offset)
700 enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
701 u8 mask = ((1 << field_len) - 1) << offset;
703 emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
707 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
708 swreg *rega, swreg *regb)
710 if (offset == reg_imm(0)) {
711 *rega = reg_a(src_gpr);
712 *regb = reg_b(src_gpr + 1);
716 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
717 emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
719 *rega = imm_a(nfp_prog);
720 *regb = imm_b(nfp_prog);
723 /* NFP has Command Push Pull bus which supports bluk memory operations. */
724 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
726 bool descending_seq = meta->ldst_gather_len < 0;
727 s16 len = abs(meta->ldst_gather_len);
733 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
734 src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
735 src_base = reg_a(meta->insn.src_reg * 2);
736 xfer_num = round_up(len, 4) / 4;
739 addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base,
742 /* Setup PREV_ALU fields to override memory read length. */
744 wrp_immed(nfp_prog, reg_none(),
745 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
747 /* Memory read from source addr into transfer-in registers. */
748 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
749 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
750 src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
752 /* Move from transfer-in to transfer-out. */
753 for (i = 0; i < xfer_num; i++)
754 wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
756 off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
759 /* Use single direct_ref write8. */
760 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
761 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
763 } else if (len <= 32 && IS_ALIGNED(len, 4)) {
764 /* Use single direct_ref write32. */
765 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
766 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
768 } else if (len <= 32) {
769 /* Use single indirect_ref write8. */
770 wrp_immed(nfp_prog, reg_none(),
771 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
772 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
773 reg_a(meta->paired_st->dst_reg * 2), off,
774 len - 1, CMD_CTX_SWAP);
775 } else if (IS_ALIGNED(len, 4)) {
776 /* Use single indirect_ref write32. */
777 wrp_immed(nfp_prog, reg_none(),
778 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
779 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
780 reg_a(meta->paired_st->dst_reg * 2), off,
781 xfer_num - 1, CMD_CTX_SWAP);
782 } else if (len <= 40) {
783 /* Use one direct_ref write32 to write the first 32-bytes, then
784 * another direct_ref write8 to write the remaining bytes.
786 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
787 reg_a(meta->paired_st->dst_reg * 2), off, 7,
790 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
792 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
793 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
796 /* Use one indirect_ref write32 to write 4-bytes aligned length,
797 * then another direct_ref write8 to write the remaining bytes.
801 wrp_immed(nfp_prog, reg_none(),
802 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
803 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
804 reg_a(meta->paired_st->dst_reg * 2), off,
805 xfer_num - 2, CMD_CTX_SWAP);
806 new_off = meta->paired_st->off + (xfer_num - 1) * 4;
807 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
808 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
809 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
810 (len & 0x3) - 1, CMD_CTX_SWAP);
813 /* TODO: The following extra load is to make sure data flow be identical
814 * before and after we do memory copy optimization.
816 * The load destination register is not guaranteed to be dead, so we
817 * need to make sure it is loaded with the value the same as before
818 * this transformation.
820 * These extra loads could be removed once we have accurate register
825 else if (BPF_SIZE(meta->insn.code) != BPF_DW)
826 xfer_num = xfer_num - 1;
828 xfer_num = xfer_num - 2;
830 switch (BPF_SIZE(meta->insn.code)) {
832 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
833 reg_xfer(xfer_num), 1,
834 IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
837 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
838 reg_xfer(xfer_num), 2, (len & 3) ^ 2);
841 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
845 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
847 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
848 reg_xfer(xfer_num + 1));
852 if (BPF_SIZE(meta->insn.code) != BPF_DW)
853 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
859 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
864 /* We load the value from the address indicated in @offset and then
865 * shift out the data we don't need. Note: this is big endian!
868 shift = size < 4 ? 4 - size : 0;
870 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
871 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
875 emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
876 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
878 for (; i * 4 < size; i++)
879 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
882 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
888 data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
889 swreg lreg, swreg rreg, int size, enum cmd_mode mode)
894 /* We load the value from the address indicated in rreg + lreg and then
895 * mask out the data we don't need. Note: this is little endian!
898 mask = size < 4 ? GENMASK(size - 1, 0) : 0;
900 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
901 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
905 emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
906 reg_xfer(0), SHF_SC_NONE, 0, true);
908 for (; i * 4 < size; i++)
909 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
912 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
918 data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
921 return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
926 data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
931 addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b);
933 return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
934 size, CMD_MODE_40b_BA);
938 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
942 /* Calculate the true offset (src_reg + imm) */
943 tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
944 emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
946 /* Check packet length (size guaranteed to fit b/c it's u8) */
947 emit_alu(nfp_prog, imm_a(nfp_prog),
948 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
949 emit_alu(nfp_prog, reg_none(),
950 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
951 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
954 return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
957 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
961 /* Check packet length */
962 tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
963 emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
964 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
967 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
968 return data_ld(nfp_prog, tmp_reg, 0, size);
972 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
977 for (i = 0; i * 4 < size; i++)
978 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
980 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
981 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
987 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
990 wrp_immed(nfp_prog, reg_xfer(0), imm);
992 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
994 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
995 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
1001 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
1002 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1006 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
1007 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1010 bool should_inc = needs_inc && new_gpr && !last;
1017 if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
1022 /* Move the entire word */
1024 wrp_mov(nfp_prog, reg_both(dst),
1025 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
1029 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
1034 mask = (1 << size) - 1;
1037 if (WARN_ON_ONCE(mask > 0xf))
1040 shf = abs(src_byte - dst_byte) * 8;
1041 if (src_byte == dst_byte) {
1043 } else if (src_byte < dst_byte) {
1050 /* ld_field can address fewer indexes, if offset too large do RMW.
1051 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
1053 if (idx <= RE_REG_LM_IDX_MAX) {
1054 reg = reg_lm(lm3 ? 3 : 0, idx);
1056 reg = imm_a(nfp_prog);
1057 /* If it's not the first part of the load and we start a new GPR
1058 * that means we are loading a second part of the LMEM word into
1059 * a new GPR. IOW we've already looked that LMEM word and
1060 * therefore it has been loaded into imm_a().
1062 if (first || !new_gpr)
1063 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
1066 emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
1069 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
1075 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
1076 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1079 bool should_inc = needs_inc && new_gpr && !last;
1086 if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
1091 /* Move the entire word */
1094 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
1099 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
1104 mask = (1 << size) - 1;
1107 if (WARN_ON_ONCE(mask > 0xf))
1110 shf = abs(src_byte - dst_byte) * 8;
1111 if (src_byte == dst_byte) {
1113 } else if (src_byte < dst_byte) {
1120 /* ld_field can address fewer indexes, if offset too large do RMW.
1121 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
1123 if (idx <= RE_REG_LM_IDX_MAX) {
1124 reg = reg_lm(lm3 ? 3 : 0, idx);
1126 reg = imm_a(nfp_prog);
1127 /* Only first and last LMEM locations are going to need RMW,
1128 * the middle location will be overwritten fully.
1131 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
1134 emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
1136 if (new_gpr || last) {
1137 if (idx > RE_REG_LM_IDX_MAX)
1138 wrp_mov(nfp_prog, reg_lm(0, idx), reg);
1140 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
1147 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1148 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
1149 bool clr_gpr, lmem_step step)
1151 s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
1152 bool first = true, last;
1153 bool needs_inc = false;
1154 swreg stack_off_reg;
1160 if (meta->ptr_not_const) {
1161 /* Use of the last encountered ptr_off is OK, they all have
1162 * the same alignment. Depend on low bits of value being
1163 * discarded when written to LMaddr register.
1165 stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
1166 stack_imm(nfp_prog));
1168 emit_alu(nfp_prog, imm_b(nfp_prog),
1169 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
1172 } else if (off + size <= 64) {
1173 /* We can reach bottom 64B with LMaddr0 */
1175 } else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
1176 /* We have to set up a new pointer. If we know the offset
1177 * and the entire access falls into a single 32 byte aligned
1178 * window we won't have to increment the LM pointer.
1179 * The 32 byte alignment is imporant because offset is ORed in
1180 * not added when doing *l$indexN[off].
1182 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
1183 stack_imm(nfp_prog));
1184 emit_alu(nfp_prog, imm_b(nfp_prog),
1185 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1189 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
1190 stack_imm(nfp_prog));
1192 emit_alu(nfp_prog, imm_b(nfp_prog),
1193 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1198 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
1199 /* For size < 4 one slot will be filled by zeroing of upper. */
1200 wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
1203 if (clr_gpr && size < 8)
1204 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1210 slice_size = min(size, 4 - gpr_byte);
1211 slice_end = min(off + slice_size, round_up(off + 1, 4));
1212 slice_size = slice_end - off;
1214 last = slice_size == size;
1219 ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
1220 first, gpr != prev_gpr, last, lm3, needs_inc);
1227 gpr_byte += slice_size;
1228 if (gpr_byte >= 4) {
1241 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
1245 if (alu_op == ALU_OP_AND) {
1247 wrp_immed(nfp_prog, reg_both(dst), 0);
1251 if (alu_op == ALU_OP_OR) {
1253 wrp_immed(nfp_prog, reg_both(dst), ~0U);
1257 if (alu_op == ALU_OP_XOR) {
1259 emit_alu(nfp_prog, reg_both(dst), reg_none(),
1260 ALU_OP_NOT, reg_b(dst));
1265 tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1266 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
1270 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1271 enum alu_op alu_op, bool skip)
1273 const struct bpf_insn *insn = &meta->insn;
1274 u64 imm = insn->imm; /* sign extend */
1281 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
1282 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
1288 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1291 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1293 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1294 emit_alu(nfp_prog, reg_both(dst + 1),
1295 reg_a(dst + 1), alu_op, reg_b(src + 1));
1301 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1304 const struct bpf_insn *insn = &meta->insn;
1306 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
1307 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1313 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1316 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1318 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1319 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1325 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
1326 enum br_mask br_mask, u16 off)
1328 emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
1329 emit_br(nfp_prog, br_mask, off, 0);
1333 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1334 enum alu_op alu_op, enum br_mask br_mask)
1336 const struct bpf_insn *insn = &meta->insn;
1338 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
1339 insn->src_reg * 2, br_mask, insn->off);
1340 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1341 insn->src_reg * 2 + 1, br_mask, insn->off);
1346 static const struct jmp_code_map {
1347 enum br_mask br_mask;
1349 } jmp_code_map[] = {
1350 [BPF_JGT >> 4] = { BR_BLO, true },
1351 [BPF_JGE >> 4] = { BR_BHS, false },
1352 [BPF_JLT >> 4] = { BR_BLO, false },
1353 [BPF_JLE >> 4] = { BR_BHS, true },
1354 [BPF_JSGT >> 4] = { BR_BLT, true },
1355 [BPF_JSGE >> 4] = { BR_BGE, false },
1356 [BPF_JSLT >> 4] = { BR_BLT, false },
1357 [BPF_JSLE >> 4] = { BR_BGE, true },
1360 static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
1364 op = BPF_OP(meta->insn.code) >> 4;
1365 /* br_mask of 0 is BR_BEQ which we don't use in jump code table */
1366 if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
1367 !jmp_code_map[op].br_mask,
1368 "no code found for jump instruction"))
1371 return &jmp_code_map[op];
1374 static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1376 const struct bpf_insn *insn = &meta->insn;
1377 u64 imm = insn->imm; /* sign extend */
1378 const struct jmp_code_map *code;
1379 enum alu_op alu_op, carry_op;
1380 u8 reg = insn->dst_reg * 2;
1383 code = nfp_jmp_code_get(meta);
1387 alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
1388 carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
1390 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1392 emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
1394 emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
1396 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1398 emit_alu(nfp_prog, reg_none(),
1399 reg_a(reg + 1), carry_op, tmp_reg);
1401 emit_alu(nfp_prog, reg_none(),
1402 tmp_reg, carry_op, reg_a(reg + 1));
1404 emit_br(nfp_prog, code->br_mask, insn->off, 0);
1409 static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1411 const struct bpf_insn *insn = &meta->insn;
1412 const struct jmp_code_map *code;
1415 code = nfp_jmp_code_get(meta);
1419 areg = insn->dst_reg * 2;
1420 breg = insn->src_reg * 2;
1428 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1429 emit_alu(nfp_prog, reg_none(),
1430 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1431 emit_br(nfp_prog, code->br_mask, insn->off, 0);
1436 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1438 emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1440 emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1445 wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
1446 swreg rreg, bool gen_high_half)
1448 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
1449 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg);
1450 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg);
1451 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg);
1452 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg);
1453 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none());
1455 emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2,
1458 wrp_immed(nfp_prog, dst_hi, 0);
1462 wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
1465 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
1466 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg);
1467 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg);
1468 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none());
1472 wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1473 bool gen_high_half, bool ropnd_from_reg)
1475 swreg multiplier, multiplicand, dst_hi, dst_lo;
1476 const struct bpf_insn *insn = &meta->insn;
1477 u32 lopnd_max, ropnd_max;
1480 dst_reg = insn->dst_reg;
1481 multiplicand = reg_a(dst_reg * 2);
1482 dst_hi = reg_both(dst_reg * 2 + 1);
1483 dst_lo = reg_both(dst_reg * 2);
1484 lopnd_max = meta->umax_dst;
1485 if (ropnd_from_reg) {
1486 multiplier = reg_b(insn->src_reg * 2);
1487 ropnd_max = meta->umax_src;
1489 u32 imm = insn->imm;
1491 multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1494 if (lopnd_max > U16_MAX || ropnd_max > U16_MAX)
1495 wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier,
1498 wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier);
1503 static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
1505 swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
1506 struct reciprocal_value_adv rvalue;
1510 if (imm > U32_MAX) {
1511 wrp_immed(nfp_prog, dst_both, 0);
1515 /* NOTE: because we are using "reciprocal_value_adv" which doesn't
1516 * support "divisor > (1u << 31)", we need to JIT separate NFP sequence
1517 * to handle such case which actually equals to the result of unsigned
1518 * comparison "dst >= imm" which could be calculated using the following
1521 * alu[--, dst, -, imm]
1523 * alu[dst, imm, +carry, 0]
1526 if (imm > 1U << 31) {
1527 swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1529 emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
1530 wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
1531 emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
1536 rvalue = reciprocal_value_adv(imm, 32);
1538 if (rvalue.is_wide_m && !(imm & 1)) {
1539 pre_shift = fls(imm & -imm) - 1;
1540 rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
1544 magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
1545 if (imm == 1U << exp) {
1546 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1548 } else if (rvalue.is_wide_m) {
1549 wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
1551 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
1553 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1555 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
1557 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1558 SHF_SC_R_SHF, rvalue.sh - 1);
1561 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
1562 dst_b, SHF_SC_R_SHF, pre_shift);
1563 wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
1564 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
1565 dst_b, SHF_SC_R_SHF, rvalue.sh);
1571 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1573 swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
1574 struct nfp_bpf_cap_adjust_head *adjust_head;
1575 u32 ret_einval, end;
1577 adjust_head = &nfp_prog->bpf->adjust_head;
1579 /* Optimized version - 5 vs 14 cycles */
1580 if (nfp_prog->adjust_head_location != UINT_MAX) {
1581 if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
1584 emit_alu(nfp_prog, pptr_reg(nfp_prog),
1585 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
1586 emit_alu(nfp_prog, plen_reg(nfp_prog),
1587 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1588 emit_alu(nfp_prog, pv_len(nfp_prog),
1589 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1591 wrp_immed(nfp_prog, reg_both(0), 0);
1592 wrp_immed(nfp_prog, reg_both(1), 0);
1594 /* TODO: when adjust head is guaranteed to succeed we can
1595 * also eliminate the following if (r0 == 0) branch.
1601 ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
1602 end = ret_einval + 2;
1604 /* We need to use a temp because offset is just a part of the pkt ptr */
1605 emit_alu(nfp_prog, tmp,
1606 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
1608 /* Validate result will fit within FW datapath constraints */
1609 emit_alu(nfp_prog, reg_none(),
1610 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
1611 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1612 emit_alu(nfp_prog, reg_none(),
1613 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
1614 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1616 /* Validate the length is at least ETH_HLEN */
1617 emit_alu(nfp_prog, tmp_len,
1618 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1619 emit_alu(nfp_prog, reg_none(),
1620 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
1621 emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1623 /* Load the ret code */
1624 wrp_immed(nfp_prog, reg_both(0), 0);
1625 wrp_immed(nfp_prog, reg_both(1), 0);
1627 /* Modify the packet metadata */
1628 emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
1630 /* Skip over the -EINVAL ret code (defer 2) */
1631 emit_br(nfp_prog, BR_UNC, end, 2);
1633 emit_alu(nfp_prog, plen_reg(nfp_prog),
1634 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1635 emit_alu(nfp_prog, pv_len(nfp_prog),
1636 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1638 /* return -EINVAL target */
1639 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1642 wrp_immed(nfp_prog, reg_both(0), -22);
1643 wrp_immed(nfp_prog, reg_both(1), ~0);
1645 if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1651 static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1653 u32 ret_einval, end;
1656 BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN));
1658 plen = imm_a(nfp_prog);
1659 delta = reg_a(2 * 2);
1661 ret_einval = nfp_prog_current_offset(nfp_prog) + 9;
1662 end = nfp_prog_current_offset(nfp_prog) + 11;
1664 /* Calculate resulting length */
1665 emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta);
1666 /* delta == 0 is not allowed by the kernel, add must overflow to make
1669 emit_br(nfp_prog, BR_BCC, ret_einval, 0);
1671 /* if (new_len < 14) then -EINVAL */
1672 emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN));
1673 emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1675 emit_alu(nfp_prog, plen_reg(nfp_prog),
1676 plen_reg(nfp_prog), ALU_OP_ADD, delta);
1677 emit_alu(nfp_prog, pv_len(nfp_prog),
1678 pv_len(nfp_prog), ALU_OP_ADD, delta);
1680 emit_br(nfp_prog, BR_UNC, end, 2);
1681 wrp_immed(nfp_prog, reg_both(0), 0);
1682 wrp_immed(nfp_prog, reg_both(1), 0);
1684 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1687 wrp_immed(nfp_prog, reg_both(0), -22);
1688 wrp_immed(nfp_prog, reg_both(1), ~0);
1690 if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1697 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1703 /* We only have to reload LM0 if the key is not at start of stack */
1704 lm_off = nfp_prog->stack_depth;
1705 lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
1706 load_lm_ptr = meta->arg2.var_off || lm_off;
1708 /* Set LM0 to start of key */
1710 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
1711 if (meta->func_id == BPF_FUNC_map_update_elem)
1712 emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
1714 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1716 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1718 /* Load map ID into A0 */
1719 wrp_mov(nfp_prog, reg_a(0), reg_a(2));
1721 /* Load the return address into B0 */
1722 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1724 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1727 /* Reset the LM0 pointer */
1731 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
1732 wrp_nops(nfp_prog, 3);
1738 nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1740 __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
1741 /* CSR value is read in following immed[gpr, 0] */
1742 emit_immed(nfp_prog, reg_both(0), 0,
1743 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1744 emit_immed(nfp_prog, reg_both(1), 0,
1745 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1750 nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1755 ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
1757 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
1759 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1762 /* Load ptr type into A1 */
1763 wrp_mov(nfp_prog, reg_a(1), ptr_type);
1765 /* Load the return address into B0 */
1766 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1768 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1775 nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1779 jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5;
1781 /* Make sure the queue id fits into FW field */
1782 emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2),
1783 ALU_OP_AND_NOT_B, reg_imm(0xff));
1784 emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2);
1786 /* Set the 'queue selected' bit and the queue value */
1787 emit_shf(nfp_prog, pv_qsel_set(nfp_prog),
1788 pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1),
1789 SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT);
1790 emit_ld_field(nfp_prog,
1791 pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2),
1793 /* Delay slots end here, we will jump over next instruction if queue
1794 * value fits into the field.
1796 emit_ld_field(nfp_prog,
1797 pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX),
1800 if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt))
1806 /* --- Callbacks --- */
1807 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1809 const struct bpf_insn *insn = &meta->insn;
1810 u8 dst = insn->dst_reg * 2;
1811 u8 src = insn->src_reg * 2;
1813 if (insn->src_reg == BPF_REG_10) {
1814 swreg stack_depth_reg;
1816 stack_depth_reg = ur_load_imm_any(nfp_prog,
1817 nfp_prog->stack_depth,
1818 stack_imm(nfp_prog));
1819 emit_alu(nfp_prog, reg_both(dst),
1820 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
1821 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1823 wrp_reg_mov(nfp_prog, dst, src);
1824 wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1830 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1832 u64 imm = meta->insn.imm; /* sign extend */
1834 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1835 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1840 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1842 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1845 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1847 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1850 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1852 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1855 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1857 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1860 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1862 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1865 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1867 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1870 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1872 const struct bpf_insn *insn = &meta->insn;
1874 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1875 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1876 reg_b(insn->src_reg * 2));
1877 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1878 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1879 reg_b(insn->src_reg * 2 + 1));
1884 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1886 const struct bpf_insn *insn = &meta->insn;
1887 u64 imm = insn->imm; /* sign extend */
1889 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1890 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1895 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1897 const struct bpf_insn *insn = &meta->insn;
1899 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1900 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1901 reg_b(insn->src_reg * 2));
1902 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1903 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1904 reg_b(insn->src_reg * 2 + 1));
1909 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1911 const struct bpf_insn *insn = &meta->insn;
1912 u64 imm = insn->imm; /* sign extend */
1914 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1915 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1920 static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1922 return wrp_mul(nfp_prog, meta, true, true);
1925 static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1927 return wrp_mul(nfp_prog, meta, true, false);
1930 static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1932 const struct bpf_insn *insn = &meta->insn;
1934 return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm);
1937 static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1939 /* NOTE: verifier hook has rejected cases for which verifier doesn't
1940 * know whether the source operand is constant or not.
1942 return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src);
1945 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1947 const struct bpf_insn *insn = &meta->insn;
1949 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1950 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1951 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1952 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1958 * if shift_amt >= 32
1959 * dst_high = dst_low << shift_amt[4:0]
1962 * dst_high = (dst_high, dst_low) >> (32 - shift_amt)
1963 * dst_low = dst_low << shift_amt
1965 * The indirect shift will use the same logic at runtime.
1967 static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
1969 if (shift_amt < 32) {
1970 emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
1971 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
1973 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
1974 reg_b(dst), SHF_SC_L_SHF, shift_amt);
1975 } else if (shift_amt == 32) {
1976 wrp_reg_mov(nfp_prog, dst + 1, dst);
1977 wrp_immed(nfp_prog, reg_both(dst), 0);
1978 } else if (shift_amt > 32) {
1979 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
1980 reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
1981 wrp_immed(nfp_prog, reg_both(dst), 0);
1987 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1989 const struct bpf_insn *insn = &meta->insn;
1990 u8 dst = insn->dst_reg * 2;
1992 return __shl_imm64(nfp_prog, dst, insn->imm);
1995 static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
1997 emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
1999 emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
2000 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
2001 reg_b(dst), SHF_SC_R_DSHF);
2004 /* NOTE: for indirect left shift, HIGH part should be calculated first. */
2005 static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2007 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2008 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2009 reg_b(dst), SHF_SC_L_SHF);
2012 static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2014 shl_reg64_lt32_high(nfp_prog, dst, src);
2015 shl_reg64_lt32_low(nfp_prog, dst, src);
2018 static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2020 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2021 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2022 reg_b(dst), SHF_SC_L_SHF);
2023 wrp_immed(nfp_prog, reg_both(dst), 0);
2026 static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2028 const struct bpf_insn *insn = &meta->insn;
2032 dst = insn->dst_reg * 2;
2033 umin = meta->umin_src;
2034 umax = meta->umax_src;
2036 return __shl_imm64(nfp_prog, dst, umin);
2038 src = insn->src_reg * 2;
2040 shl_reg64_lt32(nfp_prog, dst, src);
2041 } else if (umin >= 32) {
2042 shl_reg64_ge32(nfp_prog, dst, src);
2044 /* Generate different instruction sequences depending on runtime
2045 * value of shift amount.
2047 u16 label_ge32, label_end;
2049 label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
2050 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2052 shl_reg64_lt32_high(nfp_prog, dst, src);
2053 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2054 emit_br(nfp_prog, BR_UNC, label_end, 2);
2055 /* shl_reg64_lt32_low packed in delay slot. */
2056 shl_reg64_lt32_low(nfp_prog, dst, src);
2058 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2060 shl_reg64_ge32(nfp_prog, dst, src);
2062 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2070 * if shift_amt >= 32
2072 * dst_low = dst_high >> shift_amt[4:0]
2074 * dst_high = dst_high >> shift_amt
2075 * dst_low = (dst_high, dst_low) >> shift_amt
2077 * The indirect shift will use the same logic at runtime.
2079 static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2081 if (shift_amt < 32) {
2082 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2083 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
2084 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2085 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
2086 } else if (shift_amt == 32) {
2087 wrp_reg_mov(nfp_prog, dst, dst + 1);
2088 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2089 } else if (shift_amt > 32) {
2090 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2091 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
2092 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2098 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2100 const struct bpf_insn *insn = &meta->insn;
2101 u8 dst = insn->dst_reg * 2;
2103 return __shr_imm64(nfp_prog, dst, insn->imm);
2106 /* NOTE: for indirect right shift, LOW part should be calculated first. */
2107 static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2109 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2110 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2111 reg_b(dst + 1), SHF_SC_R_SHF);
2114 static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2116 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2117 emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2118 reg_b(dst), SHF_SC_R_DSHF);
2121 static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2123 shr_reg64_lt32_low(nfp_prog, dst, src);
2124 shr_reg64_lt32_high(nfp_prog, dst, src);
2127 static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2129 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2130 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2131 reg_b(dst + 1), SHF_SC_R_SHF);
2132 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2135 static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2137 const struct bpf_insn *insn = &meta->insn;
2141 dst = insn->dst_reg * 2;
2142 umin = meta->umin_src;
2143 umax = meta->umax_src;
2145 return __shr_imm64(nfp_prog, dst, umin);
2147 src = insn->src_reg * 2;
2149 shr_reg64_lt32(nfp_prog, dst, src);
2150 } else if (umin >= 32) {
2151 shr_reg64_ge32(nfp_prog, dst, src);
2153 /* Generate different instruction sequences depending on runtime
2154 * value of shift amount.
2156 u16 label_ge32, label_end;
2158 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
2159 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2160 shr_reg64_lt32_low(nfp_prog, dst, src);
2161 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2162 emit_br(nfp_prog, BR_UNC, label_end, 2);
2163 /* shr_reg64_lt32_high packed in delay slot. */
2164 shr_reg64_lt32_high(nfp_prog, dst, src);
2166 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2168 shr_reg64_ge32(nfp_prog, dst, src);
2170 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2177 /* Code logic is the same as __shr_imm64 except ashr requires signedness bit
2178 * told through PREV_ALU result.
2180 static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2182 if (shift_amt < 32) {
2183 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2184 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
2185 /* Set signedness bit. */
2186 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
2188 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2189 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
2190 } else if (shift_amt == 32) {
2191 /* NOTE: this also helps setting signedness bit. */
2192 wrp_reg_mov(nfp_prog, dst, dst + 1);
2193 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2194 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2195 } else if (shift_amt > 32) {
2196 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
2198 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2199 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
2200 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2201 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2207 static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2209 const struct bpf_insn *insn = &meta->insn;
2210 u8 dst = insn->dst_reg * 2;
2212 return __ashr_imm64(nfp_prog, dst, insn->imm);
2215 static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2217 /* NOTE: the first insn will set both indirect shift amount (source A)
2218 * and signedness bit (MSB of result).
2220 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
2221 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2222 reg_b(dst + 1), SHF_SC_R_SHF);
2225 static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2227 /* NOTE: it is the same as logic shift because we don't need to shift in
2228 * signedness bit when the shift amount is less than 32.
2230 return shr_reg64_lt32_low(nfp_prog, dst, src);
2233 static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2235 ashr_reg64_lt32_low(nfp_prog, dst, src);
2236 ashr_reg64_lt32_high(nfp_prog, dst, src);
2239 static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2241 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
2242 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2243 reg_b(dst + 1), SHF_SC_R_SHF);
2244 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2245 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2248 /* Like ashr_imm64, but need to use indirect shift. */
2249 static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2251 const struct bpf_insn *insn = &meta->insn;
2255 dst = insn->dst_reg * 2;
2256 umin = meta->umin_src;
2257 umax = meta->umax_src;
2259 return __ashr_imm64(nfp_prog, dst, umin);
2261 src = insn->src_reg * 2;
2263 ashr_reg64_lt32(nfp_prog, dst, src);
2264 } else if (umin >= 32) {
2265 ashr_reg64_ge32(nfp_prog, dst, src);
2267 u16 label_ge32, label_end;
2269 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
2270 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2271 ashr_reg64_lt32_low(nfp_prog, dst, src);
2272 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2273 emit_br(nfp_prog, BR_UNC, label_end, 2);
2274 /* ashr_reg64_lt32_high packed in delay slot. */
2275 ashr_reg64_lt32_high(nfp_prog, dst, src);
2277 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2279 ashr_reg64_ge32(nfp_prog, dst, src);
2281 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2288 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2290 const struct bpf_insn *insn = &meta->insn;
2292 wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
2293 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2298 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2300 const struct bpf_insn *insn = &meta->insn;
2302 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
2303 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2308 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2310 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
2313 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2315 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR);
2318 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2320 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
2323 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2325 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND);
2328 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2330 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
2333 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2335 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR);
2338 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2340 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
2343 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2345 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD);
2348 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2350 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
2353 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2355 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB);
2358 static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2360 return wrp_mul(nfp_prog, meta, false, true);
2363 static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2365 return wrp_mul(nfp_prog, meta, false, false);
2368 static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2370 return div_reg64(nfp_prog, meta);
2373 static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2375 return div_imm64(nfp_prog, meta);
2378 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2380 u8 dst = meta->insn.dst_reg * 2;
2382 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
2383 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2388 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2390 const struct bpf_insn *insn = &meta->insn;
2393 return 1; /* TODO: zero shift means indirect */
2395 emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
2396 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
2397 SHF_SC_L_SHF, insn->imm);
2398 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2403 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2405 const struct bpf_insn *insn = &meta->insn;
2406 u8 gpr = insn->dst_reg * 2;
2408 switch (insn->imm) {
2410 emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
2412 emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
2415 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
2418 wrp_end32(nfp_prog, reg_a(gpr), gpr);
2419 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
2422 wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
2424 wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
2425 wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
2432 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2434 struct nfp_insn_meta *prev = nfp_meta_prev(meta);
2438 dst = prev->insn.dst_reg * 2;
2439 imm_lo = prev->insn.imm;
2440 imm_hi = meta->insn.imm;
2442 wrp_immed(nfp_prog, reg_both(dst), imm_lo);
2444 /* mov is always 1 insn, load imm may be two, so try to use mov */
2445 if (imm_hi == imm_lo)
2446 wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
2448 wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
2453 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2455 meta->double_cb = imm_ld8_part2;
2459 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2461 return construct_data_ld(nfp_prog, meta->insn.imm, 1);
2464 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2466 return construct_data_ld(nfp_prog, meta->insn.imm, 2);
2469 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2471 return construct_data_ld(nfp_prog, meta->insn.imm, 4);
2474 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2476 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2477 meta->insn.src_reg * 2, 1);
2480 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2482 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2483 meta->insn.src_reg * 2, 2);
2486 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2488 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2489 meta->insn.src_reg * 2, 4);
2493 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2494 unsigned int size, unsigned int ptr_off)
2496 return mem_op_stack(nfp_prog, meta, size, ptr_off,
2497 meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
2498 true, wrp_lmem_load);
2501 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2504 swreg dst = reg_both(meta->insn.dst_reg * 2);
2506 switch (meta->insn.off) {
2507 case offsetof(struct __sk_buff, len):
2508 if (size != FIELD_SIZEOF(struct __sk_buff, len))
2510 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
2512 case offsetof(struct __sk_buff, data):
2513 if (size != FIELD_SIZEOF(struct __sk_buff, data))
2515 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
2517 case offsetof(struct __sk_buff, data_end):
2518 if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
2520 emit_alu(nfp_prog, dst,
2521 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
2527 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2532 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2535 swreg dst = reg_both(meta->insn.dst_reg * 2);
2537 switch (meta->insn.off) {
2538 case offsetof(struct xdp_md, data):
2539 if (size != FIELD_SIZEOF(struct xdp_md, data))
2541 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
2543 case offsetof(struct xdp_md, data_end):
2544 if (size != FIELD_SIZEOF(struct xdp_md, data_end))
2546 emit_alu(nfp_prog, dst,
2547 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
2553 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2559 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2564 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2566 return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
2567 tmp_reg, meta->insn.dst_reg * 2, size);
2571 mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2576 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2578 return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
2579 tmp_reg, meta->insn.dst_reg * 2, size);
2583 mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
2584 struct nfp_insn_meta *meta)
2586 s16 range_start = meta->pkt_cache.range_start;
2587 s16 range_end = meta->pkt_cache.range_end;
2588 swreg src_base, off;
2592 off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
2593 src_base = reg_a(meta->insn.src_reg * 2);
2594 len = range_end - range_start;
2595 xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
2597 indir = len > 8 * REG_WIDTH;
2598 /* Setup PREV_ALU for indirect mode. */
2600 wrp_immed(nfp_prog, reg_none(),
2601 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
2603 /* Cache memory into transfer-in registers. */
2604 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
2605 off, xfer_num - 1, CMD_CTX_SWAP, indir);
2609 mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
2610 struct nfp_insn_meta *meta,
2613 s16 range_start = meta->pkt_cache.range_start;
2614 s16 insn_off = meta->insn.off - range_start;
2615 swreg dst_lo, dst_hi, src_lo, src_mid;
2616 u8 dst_gpr = meta->insn.dst_reg * 2;
2617 u8 len_lo = size, len_mid = 0;
2618 u8 idx = insn_off / REG_WIDTH;
2619 u8 off = insn_off % REG_WIDTH;
2621 dst_hi = reg_both(dst_gpr + 1);
2622 dst_lo = reg_both(dst_gpr);
2623 src_lo = reg_xfer(idx);
2625 /* The read length could involve as many as three registers. */
2626 if (size > REG_WIDTH - off) {
2627 /* Calculate the part in the second register. */
2628 len_lo = REG_WIDTH - off;
2629 len_mid = size - len_lo;
2631 /* Calculate the part in the third register. */
2632 if (size > 2 * REG_WIDTH - off)
2633 len_mid = REG_WIDTH;
2636 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
2639 wrp_immed(nfp_prog, dst_hi, 0);
2643 src_mid = reg_xfer(idx + 1);
2645 if (size <= REG_WIDTH) {
2646 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
2647 wrp_immed(nfp_prog, dst_hi, 0);
2649 swreg src_hi = reg_xfer(idx + 2);
2651 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
2652 REG_WIDTH - len_lo, len_lo);
2653 wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
2654 REG_WIDTH - len_lo);
2655 wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
2663 mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
2664 struct nfp_insn_meta *meta,
2667 swreg dst_lo, dst_hi, src_lo;
2670 idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
2671 dst_gpr = meta->insn.dst_reg * 2;
2672 dst_hi = reg_both(dst_gpr + 1);
2673 dst_lo = reg_both(dst_gpr);
2674 src_lo = reg_xfer(idx);
2676 if (size < REG_WIDTH) {
2677 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
2678 wrp_immed(nfp_prog, dst_hi, 0);
2679 } else if (size == REG_WIDTH) {
2680 wrp_mov(nfp_prog, dst_lo, src_lo);
2681 wrp_immed(nfp_prog, dst_hi, 0);
2683 swreg src_hi = reg_xfer(idx + 1);
2685 wrp_mov(nfp_prog, dst_lo, src_lo);
2686 wrp_mov(nfp_prog, dst_hi, src_hi);
2693 mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
2694 struct nfp_insn_meta *meta, unsigned int size)
2696 u8 off = meta->insn.off - meta->pkt_cache.range_start;
2698 if (IS_ALIGNED(off, REG_WIDTH))
2699 return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
2701 return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
2705 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2708 if (meta->ldst_gather_len)
2709 return nfp_cpp_memcpy(nfp_prog, meta);
2711 if (meta->ptr.type == PTR_TO_CTX) {
2712 if (nfp_prog->type == BPF_PROG_TYPE_XDP)
2713 return mem_ldx_xdp(nfp_prog, meta, size);
2715 return mem_ldx_skb(nfp_prog, meta, size);
2718 if (meta->ptr.type == PTR_TO_PACKET) {
2719 if (meta->pkt_cache.range_end) {
2720 if (meta->pkt_cache.do_init)
2721 mem_ldx_data_init_pktcache(nfp_prog, meta);
2723 return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
2725 return mem_ldx_data(nfp_prog, meta, size);
2729 if (meta->ptr.type == PTR_TO_STACK)
2730 return mem_ldx_stack(nfp_prog, meta, size,
2731 meta->ptr.off + meta->ptr.var_off.value);
2733 if (meta->ptr.type == PTR_TO_MAP_VALUE)
2734 return mem_ldx_emem(nfp_prog, meta, size);
2739 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2741 return mem_ldx(nfp_prog, meta, 1);
2744 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2746 return mem_ldx(nfp_prog, meta, 2);
2749 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2751 return mem_ldx(nfp_prog, meta, 4);
2754 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2756 return mem_ldx(nfp_prog, meta, 8);
2760 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2763 u64 imm = meta->insn.imm; /* sign extend */
2766 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2768 return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2772 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2775 if (meta->ptr.type == PTR_TO_PACKET)
2776 return mem_st_data(nfp_prog, meta, size);
2781 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2783 return mem_st(nfp_prog, meta, 1);
2786 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2788 return mem_st(nfp_prog, meta, 2);
2791 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2793 return mem_st(nfp_prog, meta, 4);
2796 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2798 return mem_st(nfp_prog, meta, 8);
2802 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2807 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2809 return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2810 meta->insn.src_reg * 2, size);
2814 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2815 unsigned int size, unsigned int ptr_off)
2817 return mem_op_stack(nfp_prog, meta, size, ptr_off,
2818 meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
2819 false, wrp_lmem_store);
2822 static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2824 switch (meta->insn.off) {
2825 case offsetof(struct xdp_md, rx_queue_index):
2826 return nfp_queue_select(nfp_prog, meta);
2829 WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */
2834 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2837 if (meta->ptr.type == PTR_TO_PACKET)
2838 return mem_stx_data(nfp_prog, meta, size);
2840 if (meta->ptr.type == PTR_TO_STACK)
2841 return mem_stx_stack(nfp_prog, meta, size,
2842 meta->ptr.off + meta->ptr.var_off.value);
2847 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2849 return mem_stx(nfp_prog, meta, 1);
2852 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2854 return mem_stx(nfp_prog, meta, 2);
2857 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2859 if (meta->ptr.type == PTR_TO_CTX)
2860 if (nfp_prog->type == BPF_PROG_TYPE_XDP)
2861 return mem_stx_xdp(nfp_prog, meta);
2862 return mem_stx(nfp_prog, meta, 4);
2865 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2867 return mem_stx(nfp_prog, meta, 8);
2871 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
2873 u8 dst_gpr = meta->insn.dst_reg * 2;
2874 u8 src_gpr = meta->insn.src_reg * 2;
2875 unsigned int full_add, out;
2876 swreg addra, addrb, off;
2878 off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2880 /* We can fit 16 bits into command immediate, if we know the immediate
2881 * is guaranteed to either always or never fit into 16 bit we only
2882 * generate code to handle that particular case, otherwise generate
2885 out = nfp_prog_current_offset(nfp_prog);
2886 full_add = nfp_prog_current_offset(nfp_prog);
2888 if (meta->insn.off) {
2892 if (meta->xadd_maybe_16bit) {
2896 if (meta->xadd_over_16bit)
2898 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2903 /* Generate the branch for choosing add_imm vs add */
2904 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2905 swreg max_imm = imm_a(nfp_prog);
2907 wrp_immed(nfp_prog, max_imm, 0xffff);
2908 emit_alu(nfp_prog, reg_none(),
2909 max_imm, ALU_OP_SUB, reg_b(src_gpr));
2910 emit_alu(nfp_prog, reg_none(),
2911 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
2912 emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
2916 /* If insn has an offset add to the address */
2917 if (!meta->insn.off) {
2918 addra = reg_a(dst_gpr);
2919 addrb = reg_b(dst_gpr + 1);
2921 emit_alu(nfp_prog, imma_a(nfp_prog),
2922 reg_a(dst_gpr), ALU_OP_ADD, off);
2923 emit_alu(nfp_prog, imma_b(nfp_prog),
2924 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
2925 addra = imma_a(nfp_prog);
2926 addrb = imma_b(nfp_prog);
2929 /* Generate the add_imm if 16 bits are possible */
2930 if (meta->xadd_maybe_16bit) {
2931 swreg prev_alu = imm_a(nfp_prog);
2933 wrp_immed(nfp_prog, prev_alu,
2934 FIELD_PREP(CMD_OVE_DATA, 2) |
2936 FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
2937 wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
2938 emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
2939 addra, addrb, 0, CMD_CTX_NO_SWAP);
2941 if (meta->xadd_over_16bit)
2942 emit_br(nfp_prog, BR_UNC, out, 0);
2945 if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
2948 /* Generate the add if 16 bits are not guaranteed */
2949 if (meta->xadd_over_16bit) {
2950 emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
2951 addra, addrb, is64 << 2,
2952 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
2954 wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
2956 wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
2959 if (!nfp_prog_confirm_current_offset(nfp_prog, out))
2965 static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2967 return mem_xadd(nfp_prog, meta, false);
2970 static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2972 return mem_xadd(nfp_prog, meta, true);
2975 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2977 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
2982 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2984 const struct bpf_insn *insn = &meta->insn;
2985 u64 imm = insn->imm; /* sign extend */
2986 swreg or1, or2, tmp_reg;
2988 or1 = reg_a(insn->dst_reg * 2);
2989 or2 = reg_b(insn->dst_reg * 2 + 1);
2992 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2993 emit_alu(nfp_prog, imm_a(nfp_prog),
2994 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2995 or1 = imm_a(nfp_prog);
2999 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3000 emit_alu(nfp_prog, imm_b(nfp_prog),
3001 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
3002 or2 = imm_b(nfp_prog);
3005 emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
3006 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3011 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3013 const struct bpf_insn *insn = &meta->insn;
3014 u64 imm = insn->imm; /* sign extend */
3023 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3024 emit_alu(nfp_prog, reg_none(),
3025 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
3026 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3030 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3031 emit_alu(nfp_prog, reg_none(),
3032 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
3033 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3039 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3041 const struct bpf_insn *insn = &meta->insn;
3042 u64 imm = insn->imm; /* sign extend */
3046 emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
3047 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
3048 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3052 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3053 emit_alu(nfp_prog, reg_none(),
3054 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
3055 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3057 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3058 emit_alu(nfp_prog, reg_none(),
3059 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
3060 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3065 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3067 const struct bpf_insn *insn = &meta->insn;
3069 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
3070 ALU_OP_XOR, reg_b(insn->src_reg * 2));
3071 emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
3072 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
3073 emit_alu(nfp_prog, reg_none(),
3074 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
3075 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3080 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3082 return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
3085 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3087 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
3090 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3092 switch (meta->insn.imm) {
3093 case BPF_FUNC_xdp_adjust_head:
3094 return adjust_head(nfp_prog, meta);
3095 case BPF_FUNC_xdp_adjust_tail:
3096 return adjust_tail(nfp_prog, meta);
3097 case BPF_FUNC_map_lookup_elem:
3098 case BPF_FUNC_map_update_elem:
3099 case BPF_FUNC_map_delete_elem:
3100 return map_call_stack_common(nfp_prog, meta);
3101 case BPF_FUNC_get_prandom_u32:
3102 return nfp_get_prandom_u32(nfp_prog, meta);
3103 case BPF_FUNC_perf_event_output:
3104 return nfp_perf_event_output(nfp_prog, meta);
3106 WARN_ONCE(1, "verifier allowed unsupported function\n");
3111 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3113 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
3118 static const instr_cb_t instr_cb[256] = {
3119 [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64,
3120 [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64,
3121 [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64,
3122 [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64,
3123 [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64,
3124 [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64,
3125 [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64,
3126 [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64,
3127 [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64,
3128 [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64,
3129 [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
3130 [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
3131 [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64,
3132 [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64,
3133 [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64,
3134 [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64,
3135 [BPF_ALU64 | BPF_NEG] = neg_reg64,
3136 [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
3137 [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
3138 [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64,
3139 [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
3140 [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
3141 [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
3142 [BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
3143 [BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
3144 [BPF_ALU | BPF_XOR | BPF_X] = xor_reg,
3145 [BPF_ALU | BPF_XOR | BPF_K] = xor_imm,
3146 [BPF_ALU | BPF_AND | BPF_X] = and_reg,
3147 [BPF_ALU | BPF_AND | BPF_K] = and_imm,
3148 [BPF_ALU | BPF_OR | BPF_X] = or_reg,
3149 [BPF_ALU | BPF_OR | BPF_K] = or_imm,
3150 [BPF_ALU | BPF_ADD | BPF_X] = add_reg,
3151 [BPF_ALU | BPF_ADD | BPF_K] = add_imm,
3152 [BPF_ALU | BPF_SUB | BPF_X] = sub_reg,
3153 [BPF_ALU | BPF_SUB | BPF_K] = sub_imm,
3154 [BPF_ALU | BPF_MUL | BPF_X] = mul_reg,
3155 [BPF_ALU | BPF_MUL | BPF_K] = mul_imm,
3156 [BPF_ALU | BPF_DIV | BPF_X] = div_reg,
3157 [BPF_ALU | BPF_DIV | BPF_K] = div_imm,
3158 [BPF_ALU | BPF_NEG] = neg_reg,
3159 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm,
3160 [BPF_ALU | BPF_END | BPF_X] = end_reg32,
3161 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8,
3162 [BPF_LD | BPF_ABS | BPF_B] = data_ld1,
3163 [BPF_LD | BPF_ABS | BPF_H] = data_ld2,
3164 [BPF_LD | BPF_ABS | BPF_W] = data_ld4,
3165 [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1,
3166 [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2,
3167 [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4,
3168 [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1,
3169 [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2,
3170 [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4,
3171 [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8,
3172 [BPF_STX | BPF_MEM | BPF_B] = mem_stx1,
3173 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2,
3174 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4,
3175 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8,
3176 [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4,
3177 [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8,
3178 [BPF_ST | BPF_MEM | BPF_B] = mem_st1,
3179 [BPF_ST | BPF_MEM | BPF_H] = mem_st2,
3180 [BPF_ST | BPF_MEM | BPF_W] = mem_st4,
3181 [BPF_ST | BPF_MEM | BPF_DW] = mem_st8,
3182 [BPF_JMP | BPF_JA | BPF_K] = jump,
3183 [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
3184 [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm,
3185 [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm,
3186 [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm,
3187 [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm,
3188 [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm,
3189 [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm,
3190 [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm,
3191 [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm,
3192 [BPF_JMP | BPF_JSET | BPF_K] = jset_imm,
3193 [BPF_JMP | BPF_JNE | BPF_K] = jne_imm,
3194 [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg,
3195 [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg,
3196 [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg,
3197 [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg,
3198 [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg,
3199 [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg,
3200 [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg,
3201 [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg,
3202 [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg,
3203 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
3204 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
3205 [BPF_JMP | BPF_CALL] = call,
3206 [BPF_JMP | BPF_EXIT] = goto_out,
3209 /* --- Assembler logic --- */
3210 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
3212 struct nfp_insn_meta *meta, *jmp_dst;
3215 list_for_each_entry(meta, &nfp_prog->insns, l) {
3218 if (meta->insn.code == (BPF_JMP | BPF_CALL))
3220 if (BPF_CLASS(meta->insn.code) != BPF_JMP)
3223 if (list_is_last(&meta->l, &nfp_prog->insns))
3224 br_idx = nfp_prog->last_bpf_off;
3226 br_idx = list_next_entry(meta, l)->off - 1;
3228 if (!nfp_is_br(nfp_prog->prog[br_idx])) {
3229 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
3230 br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
3233 /* Leave special branches for later */
3234 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
3238 if (!meta->jmp_dst) {
3239 pr_err("Non-exit jump doesn't have destination info recorded!!\n");
3243 jmp_dst = meta->jmp_dst;
3245 if (jmp_dst->skip) {
3246 pr_err("Branch landing on removed instruction!!\n");
3250 for (idx = meta->off; idx <= br_idx; idx++) {
3251 if (!nfp_is_br(nfp_prog->prog[idx]))
3253 br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
3260 static void nfp_intro(struct nfp_prog *nfp_prog)
3262 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
3263 emit_alu(nfp_prog, plen_reg(nfp_prog),
3264 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
3267 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
3269 /* TC direct-action mode:
3270 * 0,1 ok NOT SUPPORTED[1]
3271 * 2 drop 0x22 -> drop, count as stat1
3272 * 4,5 nuke 0x02 -> drop
3273 * 7 redir 0x44 -> redir, count as stat2
3274 * * unspec 0x11 -> pass, count as stat0
3276 * [1] We can't support OK and RECLASSIFY because we can't tell TC
3277 * the exact decision made. We are forced to support UNSPEC
3278 * to handle aborts so that's the only one we handle for passing
3279 * packets up the stack.
3281 /* Target for aborts */
3282 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
3284 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3286 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3287 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
3289 /* Target for normal exits */
3290 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
3292 /* if R0 > 7 jump to abort */
3293 emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
3294 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
3295 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3297 wrp_immed(nfp_prog, reg_b(2), 0x41221211);
3298 wrp_immed(nfp_prog, reg_b(3), 0x41001211);
3300 emit_shf(nfp_prog, reg_a(1),
3301 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
3303 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3304 emit_shf(nfp_prog, reg_a(2),
3305 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
3307 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3308 emit_shf(nfp_prog, reg_b(2),
3309 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
3311 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3313 emit_shf(nfp_prog, reg_b(2),
3314 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
3315 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
3318 static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
3320 /* XDP return codes:
3321 * 0 aborted 0x82 -> drop, count as stat3
3322 * 1 drop 0x22 -> drop, count as stat1
3323 * 2 pass 0x11 -> pass, count as stat0
3324 * 3 tx 0x44 -> redir, count as stat2
3325 * * unknown 0x82 -> drop, count as stat3
3327 /* Target for aborts */
3328 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
3330 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3332 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3333 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
3335 /* Target for normal exits */
3336 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
3338 /* if R0 > 3 jump to abort */
3339 emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
3340 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
3342 wrp_immed(nfp_prog, reg_b(2), 0x44112282);
3344 emit_shf(nfp_prog, reg_a(1),
3345 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
3347 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3348 emit_shf(nfp_prog, reg_b(2),
3349 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
3351 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3353 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3354 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
3357 static void nfp_outro(struct nfp_prog *nfp_prog)
3359 switch (nfp_prog->type) {
3360 case BPF_PROG_TYPE_SCHED_CLS:
3361 nfp_outro_tc_da(nfp_prog);
3363 case BPF_PROG_TYPE_XDP:
3364 nfp_outro_xdp(nfp_prog);
3371 static int nfp_translate(struct nfp_prog *nfp_prog)
3373 struct nfp_insn_meta *meta;
3376 nfp_intro(nfp_prog);
3377 if (nfp_prog->error)
3378 return nfp_prog->error;
3380 list_for_each_entry(meta, &nfp_prog->insns, l) {
3381 instr_cb_t cb = instr_cb[meta->insn.code];
3383 meta->off = nfp_prog_current_offset(nfp_prog);
3386 nfp_prog->n_translated++;
3390 if (nfp_meta_has_prev(nfp_prog, meta) &&
3391 nfp_meta_prev(meta)->double_cb)
3392 cb = nfp_meta_prev(meta)->double_cb;
3395 err = cb(nfp_prog, meta);
3398 if (nfp_prog->error)
3399 return nfp_prog->error;
3401 nfp_prog->n_translated++;
3404 nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
3406 nfp_outro(nfp_prog);
3407 if (nfp_prog->error)
3408 return nfp_prog->error;
3410 wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
3411 if (nfp_prog->error)
3412 return nfp_prog->error;
3414 return nfp_fixup_branches(nfp_prog);
3417 /* --- Optimizations --- */
3418 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
3420 struct nfp_insn_meta *meta;
3422 list_for_each_entry(meta, &nfp_prog->insns, l) {
3423 struct bpf_insn insn = meta->insn;
3425 /* Programs converted from cBPF start with register xoring */
3426 if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
3427 insn.src_reg == insn.dst_reg)
3430 /* Programs start with R6 = R1 but we ignore the skb pointer */
3431 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
3432 insn.src_reg == 1 && insn.dst_reg == 6)
3435 /* Return as soon as something doesn't match */
3441 /* abs(insn.imm) will fit better into unrestricted reg immediate -
3442 * convert add/sub of a negative number into a sub/add of a positive one.
3444 static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
3446 struct nfp_insn_meta *meta;
3448 list_for_each_entry(meta, &nfp_prog->insns, l) {
3449 struct bpf_insn insn = meta->insn;
3454 if (BPF_CLASS(insn.code) != BPF_ALU &&
3455 BPF_CLASS(insn.code) != BPF_ALU64 &&
3456 BPF_CLASS(insn.code) != BPF_JMP)
3458 if (BPF_SRC(insn.code) != BPF_K)
3463 if (BPF_CLASS(insn.code) == BPF_JMP) {
3464 switch (BPF_OP(insn.code)) {
3469 meta->jump_neg_op = true;
3475 if (BPF_OP(insn.code) == BPF_ADD)
3476 insn.code = BPF_CLASS(insn.code) | BPF_SUB;
3477 else if (BPF_OP(insn.code) == BPF_SUB)
3478 insn.code = BPF_CLASS(insn.code) | BPF_ADD;
3482 meta->insn.code = insn.code | BPF_K;
3485 meta->insn.imm = -insn.imm;
3489 /* Remove masking after load since our load guarantees this is not needed */
3490 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
3492 struct nfp_insn_meta *meta1, *meta2;
3493 const s32 exp_mask[] = {
3494 [BPF_B] = 0x000000ffU,
3495 [BPF_H] = 0x0000ffffU,
3496 [BPF_W] = 0xffffffffU,
3499 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3500 struct bpf_insn insn, next;
3505 if (BPF_CLASS(insn.code) != BPF_LD)
3507 if (BPF_MODE(insn.code) != BPF_ABS &&
3508 BPF_MODE(insn.code) != BPF_IND)
3511 if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
3514 if (!exp_mask[BPF_SIZE(insn.code)])
3516 if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
3519 if (next.src_reg || next.dst_reg)
3522 if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
3529 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
3531 struct nfp_insn_meta *meta1, *meta2, *meta3;
3533 nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
3534 struct bpf_insn insn, next1, next2;
3537 next1 = meta2->insn;
3538 next2 = meta3->insn;
3540 if (BPF_CLASS(insn.code) != BPF_LD)
3542 if (BPF_MODE(insn.code) != BPF_ABS &&
3543 BPF_MODE(insn.code) != BPF_IND)
3545 if (BPF_SIZE(insn.code) != BPF_W)
3548 if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
3549 next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
3550 !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
3551 next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
3554 if (next1.src_reg || next1.dst_reg ||
3555 next2.src_reg || next2.dst_reg)
3558 if (next1.imm != 0x20 || next2.imm != 0x20)
3561 if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
3562 meta3->flags & FLAG_INSN_IS_JUMP_DST)
3570 /* load/store pair that forms memory copy sould look like the following:
3572 * ld_width R, [addr_src + offset_src]
3573 * st_width [addr_dest + offset_dest], R
3575 * The destination register of load and source register of store should
3576 * be the same, load and store should also perform at the same width.
3577 * If either of addr_src or addr_dest is stack pointer, we don't do the
3578 * CPP optimization as stack is modelled by registers on NFP.
3581 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
3582 struct nfp_insn_meta *st_meta)
3584 struct bpf_insn *ld = &ld_meta->insn;
3585 struct bpf_insn *st = &st_meta->insn;
3587 if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
3590 if (ld_meta->ptr.type != PTR_TO_PACKET &&
3591 ld_meta->ptr.type != PTR_TO_MAP_VALUE)
3594 if (st_meta->ptr.type != PTR_TO_PACKET)
3597 if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
3600 if (ld->dst_reg != st->src_reg)
3603 /* There is jump to the store insn in this pair. */
3604 if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
3610 /* Currently, we only support chaining load/store pairs if:
3612 * - Their address base registers are the same.
3613 * - Their address offsets are in the same order.
3614 * - They operate at the same memory width.
3615 * - There is no jump into the middle of them.
3618 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
3619 struct nfp_insn_meta *st_meta,
3620 struct bpf_insn *prev_ld,
3621 struct bpf_insn *prev_st)
3623 u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
3624 struct bpf_insn *ld = &ld_meta->insn;
3625 struct bpf_insn *st = &st_meta->insn;
3626 s16 prev_ld_off, prev_st_off;
3628 /* This pair is the start pair. */
3632 prev_size = BPF_LDST_BYTES(prev_ld);
3633 curr_size = BPF_LDST_BYTES(ld);
3634 prev_ld_base = prev_ld->src_reg;
3635 prev_st_base = prev_st->dst_reg;
3636 prev_ld_dst = prev_ld->dst_reg;
3637 prev_ld_off = prev_ld->off;
3638 prev_st_off = prev_st->off;
3640 if (ld->dst_reg != prev_ld_dst)
3643 if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
3646 if (curr_size != prev_size)
3649 /* There is jump to the head of this pair. */
3650 if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
3653 /* Both in ascending order. */
3654 if (prev_ld_off + prev_size == ld->off &&
3655 prev_st_off + prev_size == st->off)
3658 /* Both in descending order. */
3659 if (ld->off + curr_size == prev_ld_off &&
3660 st->off + curr_size == prev_st_off)
3666 /* Return TRUE if cross memory access happens. Cross memory access means
3667 * store area is overlapping with load area that a later load might load
3668 * the value from previous store, for this case we can't treat the sequence
3669 * as an memory copy.
3672 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
3673 struct nfp_insn_meta *head_st_meta)
3675 s16 head_ld_off, head_st_off, ld_off;
3677 /* Different pointer types does not overlap. */
3678 if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
3681 /* load and store are both PTR_TO_PACKET, check ID info. */
3682 if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
3685 /* Canonicalize the offsets. Turn all of them against the original
3688 head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
3689 head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
3690 ld_off = ld->off + head_ld_meta->ptr.off;
3692 /* Ascending order cross. */
3693 if (ld_off > head_ld_off &&
3694 head_ld_off < head_st_off && ld_off >= head_st_off)
3697 /* Descending order cross. */
3698 if (ld_off < head_ld_off &&
3699 head_ld_off > head_st_off && ld_off <= head_st_off)
3705 /* This pass try to identify the following instructoin sequences.
3707 * load R, [regA + offA]
3708 * store [regB + offB], R
3709 * load R, [regA + offA + const_imm_A]
3710 * store [regB + offB + const_imm_A], R
3711 * load R, [regA + offA + 2 * const_imm_A]
3712 * store [regB + offB + 2 * const_imm_A], R
3715 * Above sequence is typically generated by compiler when lowering
3716 * memcpy. NFP prefer using CPP instructions to accelerate it.
3718 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
3720 struct nfp_insn_meta *head_ld_meta = NULL;
3721 struct nfp_insn_meta *head_st_meta = NULL;
3722 struct nfp_insn_meta *meta1, *meta2;
3723 struct bpf_insn *prev_ld = NULL;
3724 struct bpf_insn *prev_st = NULL;
3727 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3728 struct bpf_insn *ld = &meta1->insn;
3729 struct bpf_insn *st = &meta2->insn;
3731 /* Reset record status if any of the following if true:
3732 * - The current insn pair is not load/store.
3733 * - The load/store pair doesn't chain with previous one.
3734 * - The chained load/store pair crossed with previous pair.
3735 * - The chained load/store pair has a total size of memory
3736 * copy beyond 128 bytes which is the maximum length a
3737 * single NFP CPP command can transfer.
3739 if (!curr_pair_is_memcpy(meta1, meta2) ||
3740 !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
3742 (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
3744 head_ld_meta->ldst_gather_len >= 128))) {
3749 s16 prev_ld_off = prev_ld->off;
3750 s16 prev_st_off = prev_st->off;
3751 s16 head_ld_off = head_ld_meta->insn.off;
3753 if (prev_ld_off < head_ld_off) {
3754 head_ld_meta->insn.off = prev_ld_off;
3755 head_st_meta->insn.off = prev_st_off;
3756 head_ld_meta->ldst_gather_len =
3757 -head_ld_meta->ldst_gather_len;
3760 head_ld_meta->paired_st = &head_st_meta->insn;
3761 head_st_meta->skip = true;
3763 head_ld_meta->ldst_gather_len = 0;
3766 /* If the chain is ended by an load/store pair then this
3767 * could serve as the new head of the the next chain.
3769 if (curr_pair_is_memcpy(meta1, meta2)) {
3770 head_ld_meta = meta1;
3771 head_st_meta = meta2;
3772 head_ld_meta->ldst_gather_len =
3774 meta1 = nfp_meta_next(meta1);
3775 meta2 = nfp_meta_next(meta2);
3780 head_ld_meta = NULL;
3781 head_st_meta = NULL;
3790 if (!head_ld_meta) {
3791 head_ld_meta = meta1;
3792 head_st_meta = meta2;
3798 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
3799 meta1 = nfp_meta_next(meta1);
3800 meta2 = nfp_meta_next(meta2);
3807 static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
3809 struct nfp_insn_meta *meta, *range_node = NULL;
3810 s16 range_start = 0, range_end = 0;
3811 bool cache_avail = false;
3812 struct bpf_insn *insn;
3813 s32 range_ptr_off = 0;
3814 u32 range_ptr_id = 0;
3816 list_for_each_entry(meta, &nfp_prog->insns, l) {
3817 if (meta->flags & FLAG_INSN_IS_JUMP_DST)
3818 cache_avail = false;
3825 if (is_mbpf_store_pkt(meta) ||
3826 insn->code == (BPF_JMP | BPF_CALL) ||
3827 is_mbpf_classic_store_pkt(meta) ||
3828 is_mbpf_classic_load(meta)) {
3829 cache_avail = false;
3833 if (!is_mbpf_load(meta))
3836 if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
3837 cache_avail = false;
3844 goto end_current_then_start_new;
3848 /* Check ID to make sure two reads share the same
3849 * variable offset against PTR_TO_PACKET, and check OFF
3850 * to make sure they also share the same constant
3853 * OFFs don't really need to be the same, because they
3854 * are the constant offsets against PTR_TO_PACKET, so
3855 * for different OFFs, we could canonicalize them to
3856 * offsets against original packet pointer. We don't
3859 if (meta->ptr.id == range_ptr_id &&
3860 meta->ptr.off == range_ptr_off) {
3861 s16 new_start = range_start;
3862 s16 end, off = insn->off;
3863 s16 new_end = range_end;
3864 bool changed = false;
3866 if (off < range_start) {
3871 end = off + BPF_LDST_BYTES(insn);
3872 if (end > range_end) {
3880 if (new_end - new_start <= 64) {
3881 /* Install new range. */
3882 range_start = new_start;
3883 range_end = new_end;
3888 end_current_then_start_new:
3889 range_node->pkt_cache.range_start = range_start;
3890 range_node->pkt_cache.range_end = range_end;
3893 range_node->pkt_cache.do_init = true;
3894 range_ptr_id = range_node->ptr.id;
3895 range_ptr_off = range_node->ptr.off;
3896 range_start = insn->off;
3897 range_end = insn->off + BPF_LDST_BYTES(insn);
3901 range_node->pkt_cache.range_start = range_start;
3902 range_node->pkt_cache.range_end = range_end;
3905 list_for_each_entry(meta, &nfp_prog->insns, l) {
3909 if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
3910 if (meta->pkt_cache.do_init) {
3911 range_start = meta->pkt_cache.range_start;
3912 range_end = meta->pkt_cache.range_end;
3914 meta->pkt_cache.range_start = range_start;
3915 meta->pkt_cache.range_end = range_end;
3921 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
3923 nfp_bpf_opt_reg_init(nfp_prog);
3925 nfp_bpf_opt_neg_add_sub(nfp_prog);
3926 nfp_bpf_opt_ld_mask(nfp_prog);
3927 nfp_bpf_opt_ld_shift(nfp_prog);
3928 nfp_bpf_opt_ldst_gather(nfp_prog);
3929 nfp_bpf_opt_pkt_cache(nfp_prog);
3934 static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
3936 struct nfp_insn_meta *meta1, *meta2;
3937 struct nfp_bpf_map *nfp_map;
3938 struct bpf_map *map;
3941 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3942 if (meta1->skip || meta2->skip)
3945 if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
3946 meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
3949 map = (void *)(unsigned long)((u32)meta1->insn.imm |
3950 (u64)meta2->insn.imm << 32);
3951 if (bpf_map_offload_neutral(map)) {
3954 nfp_map = map_to_offmap(map)->dev_priv;
3958 meta1->insn.imm = id;
3959 meta2->insn.imm = 0;
3965 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
3967 __le64 *ustore = (__force __le64 *)prog;
3970 for (i = 0; i < len; i++) {
3973 err = nfp_ustore_check_valid_no_ecc(prog[i]);
3977 ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i]));
3983 static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog)
3987 prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL);
3991 nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64);
3992 memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len);
3993 kvfree(nfp_prog->prog);
3994 nfp_prog->prog = prog;
3997 int nfp_bpf_jit(struct nfp_prog *nfp_prog)
4001 ret = nfp_bpf_replace_map_ptrs(nfp_prog);
4005 ret = nfp_bpf_optimize(nfp_prog);
4009 ret = nfp_translate(nfp_prog);
4011 pr_err("Translation failed with error %d (translated: %u)\n",
4012 ret, nfp_prog->n_translated);
4016 nfp_bpf_prog_trim(nfp_prog);
4021 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
4023 struct nfp_insn_meta *meta;
4025 /* Another pass to record jump information. */
4026 list_for_each_entry(meta, &nfp_prog->insns, l) {
4027 u64 code = meta->insn.code;
4029 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
4030 BPF_OP(code) != BPF_CALL) {
4031 struct nfp_insn_meta *dst_meta;
4032 unsigned short dst_indx;
4034 dst_indx = meta->n + 1 + meta->insn.off;
4035 dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
4038 meta->jmp_dst = dst_meta;
4039 dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
4044 bool nfp_bpf_supported_opcode(u8 code)
4046 return !!instr_cb[code];
4049 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
4055 prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
4058 return ERR_PTR(-ENOMEM);
4060 for (i = 0; i < nfp_prog->prog_len; i++) {
4061 enum nfp_relo_type special;
4064 special = FIELD_GET(OP_RELO_TYPE, prog[i]);
4069 br_add_offset(&prog[i], bv->start_off);
4071 case RELO_BR_GO_OUT:
4072 br_set_offset(&prog[i],
4073 nfp_prog->tgt_out + bv->start_off);
4075 case RELO_BR_GO_ABORT:
4076 br_set_offset(&prog[i],
4077 nfp_prog->tgt_abort + bv->start_off);
4079 case RELO_BR_NEXT_PKT:
4080 br_set_offset(&prog[i], bv->tgt_done);
4082 case RELO_BR_HELPER:
4083 val = br_get_offset(prog[i]);
4086 case BPF_FUNC_map_lookup_elem:
4087 val = nfp_prog->bpf->helpers.map_lookup;
4089 case BPF_FUNC_map_update_elem:
4090 val = nfp_prog->bpf->helpers.map_update;
4092 case BPF_FUNC_map_delete_elem:
4093 val = nfp_prog->bpf->helpers.map_delete;
4095 case BPF_FUNC_perf_event_output:
4096 val = nfp_prog->bpf->helpers.perf_event_output;
4099 pr_err("relocation of unknown helper %d\n",
4104 br_set_offset(&prog[i], val);
4106 case RELO_IMMED_REL:
4107 immed_add_value(&prog[i], bv->start_off);
4111 prog[i] &= ~OP_RELO_TYPE;
4114 err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len);
4122 return ERR_PTR(err);